Branch data Line data Source code
1 : : /* RTL dead store elimination.
2 : : Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 : :
4 : : Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5 : : and Kenneth Zadeck <zadeck@naturalbridge.com>
6 : :
7 : : This file is part of GCC.
8 : :
9 : : GCC is free software; you can redistribute it and/or modify it under
10 : : the terms of the GNU General Public License as published by the Free
11 : : Software Foundation; either version 3, or (at your option) any later
12 : : version.
13 : :
14 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 : : for more details.
18 : :
19 : : You should have received a copy of the GNU General Public License
20 : : along with GCC; see the file COPYING3. If not see
21 : : <http://www.gnu.org/licenses/>. */
22 : :
23 : : #undef BASELINE
24 : :
25 : : #include "config.h"
26 : : #include "system.h"
27 : : #include "coretypes.h"
28 : : #include "backend.h"
29 : : #include "target.h"
30 : : #include "rtl.h"
31 : : #include "tree.h"
32 : : #include "gimple.h"
33 : : #include "predict.h"
34 : : #include "df.h"
35 : : #include "memmodel.h"
36 : : #include "tm_p.h"
37 : : #include "gimple-ssa.h"
38 : : #include "expmed.h"
39 : : #include "optabs.h"
40 : : #include "emit-rtl.h"
41 : : #include "recog.h"
42 : : #include "alias.h"
43 : : #include "stor-layout.h"
44 : : #include "cfgrtl.h"
45 : : #include "cselib.h"
46 : : #include "tree-pass.h"
47 : : #include "explow.h"
48 : : #include "expr.h"
49 : : #include "dbgcnt.h"
50 : : #include "rtl-iter.h"
51 : : #include "cfgcleanup.h"
52 : : #include "calls.h"
53 : :
54 : : /* This file contains three techniques for performing Dead Store
55 : : Elimination (dse).
56 : :
57 : : * The first technique performs dse locally on any base address. It
58 : : is based on the cselib which is a local value numbering technique.
59 : : This technique is local to a basic block but deals with a fairly
60 : : general addresses.
61 : :
62 : : * The second technique performs dse globally but is restricted to
63 : : base addresses that are either constant or are relative to the
64 : : frame_pointer.
65 : :
66 : : * The third technique, (which is only done after register allocation)
67 : : processes the spill slots. This differs from the second
68 : : technique because it takes advantage of the fact that spilling is
69 : : completely free from the effects of aliasing.
70 : :
71 : : Logically, dse is a backwards dataflow problem. A store can be
72 : : deleted if it if cannot be reached in the backward direction by any
73 : : use of the value being stored. However, the local technique uses a
74 : : forwards scan of the basic block because cselib requires that the
75 : : block be processed in that order.
76 : :
77 : : The pass is logically broken into 7 steps:
78 : :
79 : : 0) Initialization.
80 : :
81 : : 1) The local algorithm, as well as scanning the insns for the two
82 : : global algorithms.
83 : :
84 : : 2) Analysis to see if the global algs are necessary. In the case
85 : : of stores base on a constant address, there must be at least two
86 : : stores to that address, to make it possible to delete some of the
87 : : stores. In the case of stores off of the frame or spill related
88 : : stores, only one store to an address is necessary because those
89 : : stores die at the end of the function.
90 : :
91 : : 3) Set up the global dataflow equations based on processing the
92 : : info parsed in the first step.
93 : :
94 : : 4) Solve the dataflow equations.
95 : :
96 : : 5) Delete the insns that the global analysis has indicated are
97 : : unnecessary.
98 : :
99 : : 6) Delete insns that store the same value as preceding store
100 : : where the earlier store couldn't be eliminated.
101 : :
102 : : 7) Cleanup.
103 : :
104 : : This step uses cselib and canon_rtx to build the largest expression
105 : : possible for each address. This pass is a forwards pass through
106 : : each basic block. From the point of view of the global technique,
107 : : the first pass could examine a block in either direction. The
108 : : forwards ordering is to accommodate cselib.
109 : :
110 : : We make a simplifying assumption: addresses fall into four broad
111 : : categories:
112 : :
113 : : 1) base has rtx_varies_p == false, offset is constant.
114 : : 2) base has rtx_varies_p == false, offset variable.
115 : : 3) base has rtx_varies_p == true, offset constant.
116 : : 4) base has rtx_varies_p == true, offset variable.
117 : :
118 : : The local passes are able to process all 4 kinds of addresses. The
119 : : global pass only handles 1).
120 : :
121 : : The global problem is formulated as follows:
122 : :
123 : : A store, S1, to address A, where A is not relative to the stack
124 : : frame, can be eliminated if all paths from S1 to the end of the
125 : : function contain another store to A before a read to A.
126 : :
127 : : If the address A is relative to the stack frame, a store S2 to A
128 : : can be eliminated if there are no paths from S2 that reach the
129 : : end of the function that read A before another store to A. In
130 : : this case S2 can be deleted if there are paths from S2 to the
131 : : end of the function that have no reads or writes to A. This
132 : : second case allows stores to the stack frame to be deleted that
133 : : would otherwise die when the function returns. This cannot be
134 : : done if stores_off_frame_dead_at_return is not true. See the doc
135 : : for that variable for when this variable is false.
136 : :
137 : : The global problem is formulated as a backwards set union
138 : : dataflow problem where the stores are the gens and reads are the
139 : : kills. Set union problems are rare and require some special
140 : : handling given our representation of bitmaps. A straightforward
141 : : implementation requires a lot of bitmaps filled with 1s.
142 : : These are expensive and cumbersome in our bitmap formulation so
143 : : care has been taken to avoid large vectors filled with 1s. See
144 : : the comments in bb_info and in the dataflow confluence functions
145 : : for details.
146 : :
147 : : There are two places for further enhancements to this algorithm:
148 : :
149 : : 1) The original dse which was embedded in a pass called flow also
150 : : did local address forwarding. For example in
151 : :
152 : : A <- r100
153 : : ... <- A
154 : :
155 : : flow would replace the right hand side of the second insn with a
156 : : reference to r100. Most of the information is available to add this
157 : : to this pass. It has not done it because it is a lot of work in
158 : : the case that either r100 is assigned to between the first and
159 : : second insn and/or the second insn is a load of part of the value
160 : : stored by the first insn.
161 : :
162 : : insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163 : : insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164 : : insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165 : : insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166 : :
167 : : 2) The cleaning up of spill code is quite profitable. It currently
168 : : depends on reading tea leaves and chicken entrails left by reload.
169 : : This pass depends on reload creating a singleton alias set for each
170 : : spill slot and telling the next dse pass which of these alias sets
171 : : are the singletons. Rather than analyze the addresses of the
172 : : spills, dse's spill processing just does analysis of the loads and
173 : : stores that use those alias sets. There are three cases where this
174 : : falls short:
175 : :
176 : : a) Reload sometimes creates the slot for one mode of access, and
177 : : then inserts loads and/or stores for a smaller mode. In this
178 : : case, the current code just punts on the slot. The proper thing
179 : : to do is to back out and use one bit vector position for each
180 : : byte of the entity associated with the slot. This depends on
181 : : KNOWING that reload always generates the accesses for each of the
182 : : bytes in some canonical (read that easy to understand several
183 : : passes after reload happens) way.
184 : :
185 : : b) Reload sometimes decides that spill slot it allocated was not
186 : : large enough for the mode and goes back and allocates more slots
187 : : with the same mode and alias set. The backout in this case is a
188 : : little more graceful than (a). In this case the slot is unmarked
189 : : as being a spill slot and if final address comes out to be based
190 : : off the frame pointer, the global algorithm handles this slot.
191 : :
192 : : c) For any pass that may prespill, there is currently no
193 : : mechanism to tell the dse pass that the slot being used has the
194 : : special properties that reload uses. It may be that all that is
195 : : required is to have those passes make the same calls that reload
196 : : does, assuming that the alias sets can be manipulated in the same
197 : : way. */
198 : :
199 : : /* There are limits to the size of constant offsets we model for the
200 : : global problem. There are certainly test cases, that exceed this
201 : : limit, however, it is unlikely that there are important programs
202 : : that really have constant offsets this size. */
203 : : #define MAX_OFFSET (64 * 1024)
204 : :
205 : : /* Obstack for the DSE dataflow bitmaps. We don't want to put these
206 : : on the default obstack because these bitmaps can grow quite large
207 : : (~2GB for the small (!) test case of PR54146) and we'll hold on to
208 : : all that memory until the end of the compiler run.
209 : : As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210 : : releasing the whole obstack. */
211 : : static bitmap_obstack dse_bitmap_obstack;
212 : :
213 : : /* Obstack for other data. As for above: Kinda nice to be able to
214 : : throw it all away at the end in one big sweep. */
215 : : static struct obstack dse_obstack;
216 : :
217 : : /* Scratch bitmap for cselib's cselib_expand_value_rtx. */
218 : : static bitmap scratch = NULL;
219 : :
220 : : struct insn_info_type;
221 : :
222 : : /* This structure holds information about a candidate store. */
223 : : class store_info
224 : : {
225 : : public:
226 : :
227 : : /* False means this is a clobber. */
228 : : bool is_set;
229 : :
230 : : /* False if a single HOST_WIDE_INT bitmap is used for positions_needed. */
231 : : bool is_large;
232 : :
233 : : /* The id of the mem group of the base address. If rtx_varies_p is
234 : : true, this is -1. Otherwise, it is the index into the group
235 : : table. */
236 : : int group_id;
237 : :
238 : : /* This is the cselib value. */
239 : : cselib_val *cse_base;
240 : :
241 : : /* This canonized mem. */
242 : : rtx mem;
243 : :
244 : : /* Canonized MEM address for use by canon_true_dependence. */
245 : : rtx mem_addr;
246 : :
247 : : /* The offset of the first byte associated with the operation. */
248 : : poly_int64 offset;
249 : :
250 : : /* The number of bytes covered by the operation. This is always exact
251 : : and known (rather than -1). */
252 : : poly_int64 width;
253 : :
254 : : /* The address space that the memory reference uses. */
255 : : unsigned char addrspace;
256 : :
257 : : union
258 : : {
259 : : /* A bitmask as wide as the number of bytes in the word that
260 : : contains a 1 if the byte may be needed. The store is unused if
261 : : all of the bits are 0. This is used if IS_LARGE is false. */
262 : : unsigned HOST_WIDE_INT small_bitmask;
263 : :
264 : : struct
265 : : {
266 : : /* A bitmap with one bit per byte, or null if the number of
267 : : bytes isn't known at compile time. A cleared bit means
268 : : the position is needed. Used if IS_LARGE is true. */
269 : : bitmap bmap;
270 : :
271 : : /* When BITMAP is nonnull, this counts the number of set bits
272 : : (i.e. unneeded bytes) in the bitmap. If it is equal to
273 : : WIDTH, the whole store is unused.
274 : :
275 : : When BITMAP is null:
276 : : - the store is definitely not needed when COUNT == 1
277 : : - all the store is needed when COUNT == 0 and RHS is nonnull
278 : : - otherwise we don't know which parts of the store are needed. */
279 : : int count;
280 : : } large;
281 : : } positions_needed;
282 : :
283 : : /* The next store info for this insn. */
284 : : class store_info *next;
285 : :
286 : : /* The right hand side of the store. This is used if there is a
287 : : subsequent reload of the mems address somewhere later in the
288 : : basic block. */
289 : : rtx rhs;
290 : :
291 : : /* If rhs is or holds a constant, this contains that constant,
292 : : otherwise NULL. */
293 : : rtx const_rhs;
294 : :
295 : : /* Set if this store stores the same constant value as REDUNDANT_REASON
296 : : insn stored. These aren't eliminated early, because doing that
297 : : might prevent the earlier larger store to be eliminated. */
298 : : struct insn_info_type *redundant_reason;
299 : : };
300 : :
301 : : /* Return a bitmask with the first N low bits set. */
302 : :
303 : : static unsigned HOST_WIDE_INT
304 : 18887519 : lowpart_bitmask (int n)
305 : : {
306 : 18887519 : unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
307 : 18887519 : return mask >> (HOST_BITS_PER_WIDE_INT - n);
308 : : }
309 : :
310 : : static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
311 : :
312 : : static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
313 : :
314 : : /* This structure holds information about a load. These are only
315 : : built for rtx bases. */
316 : : class read_info_type
317 : : {
318 : : public:
319 : : /* The id of the mem group of the base address. */
320 : : int group_id;
321 : :
322 : : /* The offset of the first byte associated with the operation. */
323 : : poly_int64 offset;
324 : :
325 : : /* The number of bytes covered by the operation, or -1 if not known. */
326 : : poly_int64 width;
327 : :
328 : : /* The mem being read. */
329 : : rtx mem;
330 : :
331 : : /* The next read_info for this insn. */
332 : : class read_info_type *next;
333 : : };
334 : : typedef class read_info_type *read_info_t;
335 : :
336 : : static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
337 : :
338 : : /* One of these records is created for each insn. */
339 : :
340 : : struct insn_info_type
341 : : {
342 : : /* Set true if the insn contains a store but the insn itself cannot
343 : : be deleted. This is set if the insn is a parallel and there is
344 : : more than one non dead output or if the insn is in some way
345 : : volatile. */
346 : : bool cannot_delete;
347 : :
348 : : /* This field is only used by the global algorithm. It is set true
349 : : if the insn contains any read of mem except for a (1). This is
350 : : also set if the insn is a call or has a clobber mem. If the insn
351 : : contains a wild read, the use_rec will be null. */
352 : : bool wild_read;
353 : :
354 : : /* This is true only for CALL instructions which could potentially read
355 : : any non-frame memory location. This field is used by the global
356 : : algorithm. */
357 : : bool non_frame_wild_read;
358 : :
359 : : /* This field is only used for the processing of const functions.
360 : : These functions cannot read memory, but they can read the stack
361 : : because that is where they may get their parms. We need to be
362 : : this conservative because, like the store motion pass, we don't
363 : : consider CALL_INSN_FUNCTION_USAGE when processing call insns.
364 : : Moreover, we need to distinguish two cases:
365 : : 1. Before reload (register elimination), the stores related to
366 : : outgoing arguments are stack pointer based and thus deemed
367 : : of non-constant base in this pass. This requires special
368 : : handling but also means that the frame pointer based stores
369 : : need not be killed upon encountering a const function call.
370 : : 2. After reload, the stores related to outgoing arguments can be
371 : : either stack pointer or hard frame pointer based. This means
372 : : that we have no other choice than also killing all the frame
373 : : pointer based stores upon encountering a const function call.
374 : : This field is set after reload for const function calls and before
375 : : reload for const tail function calls on targets where arg pointer
376 : : is the frame pointer. Having this set is less severe than a wild
377 : : read, it just means that all the frame related stores are killed
378 : : rather than all the stores. */
379 : : bool frame_read;
380 : :
381 : : /* This field is only used for the processing of const functions.
382 : : It is set if the insn may contain a stack pointer based store. */
383 : : bool stack_pointer_based;
384 : :
385 : : /* This is true if any of the sets within the store contains a
386 : : cselib base. Such stores can only be deleted by the local
387 : : algorithm. */
388 : : bool contains_cselib_groups;
389 : :
390 : : /* The insn. */
391 : : rtx_insn *insn;
392 : :
393 : : /* The list of mem sets or mem clobbers that are contained in this
394 : : insn. If the insn is deletable, it contains only one mem set.
395 : : But it could also contain clobbers. Insns that contain more than
396 : : one mem set are not deletable, but each of those mems are here in
397 : : order to provide info to delete other insns. */
398 : : store_info *store_rec;
399 : :
400 : : /* The linked list of mem uses in this insn. Only the reads from
401 : : rtx bases are listed here. The reads to cselib bases are
402 : : completely processed during the first scan and so are never
403 : : created. */
404 : : read_info_t read_rec;
405 : :
406 : : /* The live fixed registers. We assume only fixed registers can
407 : : cause trouble by being clobbered from an expanded pattern;
408 : : storing only the live fixed registers (rather than all registers)
409 : : means less memory needs to be allocated / copied for the individual
410 : : stores. */
411 : : regset fixed_regs_live;
412 : :
413 : : /* The prev insn in the basic block. */
414 : : struct insn_info_type * prev_insn;
415 : :
416 : : /* The linked list of insns that are in consideration for removal in
417 : : the forwards pass through the basic block. This pointer may be
418 : : trash as it is not cleared when a wild read occurs. The only
419 : : time it is guaranteed to be correct is when the traversal starts
420 : : at active_local_stores. */
421 : : struct insn_info_type * next_local_store;
422 : : };
423 : : typedef struct insn_info_type *insn_info_t;
424 : :
425 : : static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
426 : :
427 : : /* The linked list of stores that are under consideration in this
428 : : basic block. */
429 : : static insn_info_t active_local_stores;
430 : : static int active_local_stores_len;
431 : :
432 : : struct dse_bb_info_type
433 : : {
434 : : /* Pointer to the insn info for the last insn in the block. These
435 : : are linked so this is how all of the insns are reached. During
436 : : scanning this is the current insn being scanned. */
437 : : insn_info_t last_insn;
438 : :
439 : : /* The info for the global dataflow problem. */
440 : :
441 : :
442 : : /* This is set if the transfer function should and in the wild_read
443 : : bitmap before applying the kill and gen sets. That vector knocks
444 : : out most of the bits in the bitmap and thus speeds up the
445 : : operations. */
446 : : bool apply_wild_read;
447 : :
448 : : /* The following 4 bitvectors hold information about which positions
449 : : of which stores are live or dead. They are indexed by
450 : : get_bitmap_index. */
451 : :
452 : : /* The set of store positions that exist in this block before a wild read. */
453 : : bitmap gen;
454 : :
455 : : /* The set of load positions that exist in this block above the
456 : : same position of a store. */
457 : : bitmap kill;
458 : :
459 : : /* The set of stores that reach the top of the block without being
460 : : killed by a read.
461 : :
462 : : Do not represent the in if it is all ones. Note that this is
463 : : what the bitvector should logically be initialized to for a set
464 : : intersection problem. However, like the kill set, this is too
465 : : expensive. So initially, the in set will only be created for the
466 : : exit block and any block that contains a wild read. */
467 : : bitmap in;
468 : :
469 : : /* The set of stores that reach the bottom of the block from it's
470 : : successors.
471 : :
472 : : Do not represent the in if it is all ones. Note that this is
473 : : what the bitvector should logically be initialized to for a set
474 : : intersection problem. However, like the kill and in set, this is
475 : : too expensive. So what is done is that the confluence operator
476 : : just initializes the vector from one of the out sets of the
477 : : successors of the block. */
478 : : bitmap out;
479 : :
480 : : /* The following bitvector is indexed by the reg number. It
481 : : contains the set of regs that are live at the current instruction
482 : : being processed. While it contains info for all of the
483 : : registers, only the hard registers are actually examined. It is used
484 : : to assure that shift and/or add sequences that are inserted do not
485 : : accidentally clobber live hard regs. */
486 : : bitmap regs_live;
487 : : };
488 : :
489 : : typedef struct dse_bb_info_type *bb_info_t;
490 : :
491 : : static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
492 : : ("bb_info_pool");
493 : :
494 : : /* Table to hold all bb_infos. */
495 : : static bb_info_t *bb_table;
496 : :
497 : : /* There is a group_info for each rtx base that is used to reference
498 : : memory. There are also not many of the rtx bases because they are
499 : : very limited in scope. */
500 : :
501 : : struct group_info
502 : : {
503 : : /* The actual base of the address. */
504 : : rtx rtx_base;
505 : :
506 : : /* The sequential id of the base. This allows us to have a
507 : : canonical ordering of these that is not based on addresses. */
508 : : int id;
509 : :
510 : : /* True if there are any positions that are to be processed
511 : : globally. */
512 : : bool process_globally;
513 : :
514 : : /* True if the base of this group is either the frame_pointer or
515 : : hard_frame_pointer. */
516 : : bool frame_related;
517 : :
518 : : /* A mem wrapped around the base pointer for the group in order to do
519 : : read dependency. It must be given BLKmode in order to encompass all
520 : : the possible offsets from the base. */
521 : : rtx base_mem;
522 : :
523 : : /* Canonized version of base_mem's address. */
524 : : rtx canon_base_addr;
525 : :
526 : : /* These two sets of two bitmaps are used to keep track of how many
527 : : stores are actually referencing that position from this base. We
528 : : only do this for rtx bases as this will be used to assign
529 : : positions in the bitmaps for the global problem. Bit N is set in
530 : : store1 on the first store for offset N. Bit N is set in store2
531 : : for the second store to offset N. This is all we need since we
532 : : only care about offsets that have two or more stores for them.
533 : :
534 : : The "_n" suffix is for offsets less than 0 and the "_p" suffix is
535 : : for 0 and greater offsets.
536 : :
537 : : There is one special case here, for stores into the stack frame,
538 : : we will or store1 into store2 before deciding which stores look
539 : : at globally. This is because stores to the stack frame that have
540 : : no other reads before the end of the function can also be
541 : : deleted. */
542 : : bitmap store1_n, store1_p, store2_n, store2_p;
543 : :
544 : : /* These bitmaps keep track of offsets in this group escape this function.
545 : : An offset escapes if it corresponds to a named variable whose
546 : : addressable flag is set. */
547 : : bitmap escaped_n, escaped_p;
548 : :
549 : : /* The positions in this bitmap have the same assignments as the in,
550 : : out, gen and kill bitmaps. This bitmap is all zeros except for
551 : : the positions that are occupied by stores for this group. */
552 : : bitmap group_kill;
553 : :
554 : : /* The offset_map is used to map the offsets from this base into
555 : : positions in the global bitmaps. It is only created after all of
556 : : the all of stores have been scanned and we know which ones we
557 : : care about. */
558 : : int *offset_map_n, *offset_map_p;
559 : : int offset_map_size_n, offset_map_size_p;
560 : : };
561 : :
562 : : static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
563 : :
564 : : /* Index into the rtx_group_vec. */
565 : : static int rtx_group_next_id;
566 : :
567 : :
568 : : static vec<group_info *> rtx_group_vec;
569 : :
570 : :
571 : : /* This structure holds the set of changes that are being deferred
572 : : when removing read operation. See replace_read. */
573 : : struct deferred_change
574 : : {
575 : :
576 : : /* The mem that is being replaced. */
577 : : rtx *loc;
578 : :
579 : : /* The reg it is being replaced with. */
580 : : rtx reg;
581 : :
582 : : struct deferred_change *next;
583 : : };
584 : :
585 : : static object_allocator<deferred_change> deferred_change_pool
586 : : ("deferred_change_pool");
587 : :
588 : : static deferred_change *deferred_change_list = NULL;
589 : :
590 : : /* This is true except if cfun->stdarg -- i.e. we cannot do
591 : : this for vararg functions because they play games with the frame. */
592 : : static bool stores_off_frame_dead_at_return;
593 : :
594 : : /* Counter for stats. */
595 : : static int globally_deleted;
596 : : static int locally_deleted;
597 : :
598 : : static bitmap all_blocks;
599 : :
600 : : /* Locations that are killed by calls in the global phase. */
601 : : static bitmap kill_on_calls;
602 : :
603 : : /* The number of bits used in the global bitmaps. */
604 : : static unsigned int current_position;
605 : :
606 : : /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE. */
607 : :
608 : : static void
609 : 0 : print_range (FILE *file, poly_int64 offset, poly_int64 width)
610 : : {
611 : 0 : fprintf (file, "[");
612 : 0 : print_dec (offset, file, SIGNED);
613 : 0 : fprintf (file, "..");
614 : 0 : print_dec (offset + width, file, SIGNED);
615 : 0 : fprintf (file, ")");
616 : 0 : }
617 : :
618 : : /*----------------------------------------------------------------------------
619 : : Zeroth step.
620 : :
621 : : Initialization.
622 : : ----------------------------------------------------------------------------*/
623 : :
624 : :
625 : : /* Hashtable callbacks for maintaining the "bases" field of
626 : : store_group_info, given that the addresses are function invariants. */
627 : :
628 : : struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
629 : : {
630 : : static inline hashval_t hash (const group_info *);
631 : : static inline bool equal (const group_info *, const group_info *);
632 : : };
633 : :
634 : : inline bool
635 : 74379114 : invariant_group_base_hasher::equal (const group_info *gi1,
636 : : const group_info *gi2)
637 : : {
638 : 74379114 : return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
639 : : }
640 : :
641 : : inline hashval_t
642 : 86519904 : invariant_group_base_hasher::hash (const group_info *gi)
643 : : {
644 : 86519904 : int do_not_record;
645 : 86519904 : return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
646 : : }
647 : :
648 : : /* Tables of group_info structures, hashed by base value. */
649 : : static hash_table<invariant_group_base_hasher> *rtx_group_table;
650 : :
651 : :
652 : : /* Get the GROUP for BASE. Add a new group if it is not there. */
653 : :
654 : : static group_info *
655 : 18741979 : get_group_info (rtx base)
656 : : {
657 : 18741979 : struct group_info tmp_gi;
658 : 18741979 : group_info *gi;
659 : 18741979 : group_info **slot;
660 : :
661 : 18741979 : gcc_assert (base != NULL_RTX);
662 : :
663 : : /* Find the store_base_info structure for BASE, creating a new one
664 : : if necessary. */
665 : 18741979 : tmp_gi.rtx_base = base;
666 : 18741979 : slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
667 : 18741979 : gi = *slot;
668 : :
669 : 18741979 : if (gi == NULL)
670 : : {
671 : 5744010 : *slot = gi = group_info_pool.allocate ();
672 : 5744010 : gi->rtx_base = base;
673 : 5744010 : gi->id = rtx_group_next_id++;
674 : 5744010 : gi->base_mem = gen_rtx_MEM (BLKmode, base);
675 : 5744010 : gi->canon_base_addr = canon_rtx (base);
676 : 5744010 : gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
677 : 5744010 : gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
678 : 5744010 : gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
679 : 5744010 : gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
680 : 5744010 : gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
681 : 5744010 : gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
682 : 5744010 : gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
683 : 5744010 : gi->process_globally = false;
684 : 11488020 : gi->frame_related =
685 : 5524514 : (base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx)
686 : 11232854 : || (base == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM]);
687 : 5744010 : gi->offset_map_size_n = 0;
688 : 5744010 : gi->offset_map_size_p = 0;
689 : 5744010 : gi->offset_map_n = NULL;
690 : 5744010 : gi->offset_map_p = NULL;
691 : 5744010 : rtx_group_vec.safe_push (gi);
692 : : }
693 : :
694 : 18741979 : return gi;
695 : : }
696 : :
697 : :
698 : : /* Initialization of data structures. */
699 : :
700 : : static void
701 : 1969986 : dse_step0 (void)
702 : : {
703 : 1969986 : locally_deleted = 0;
704 : 1969986 : globally_deleted = 0;
705 : :
706 : 1969986 : bitmap_obstack_initialize (&dse_bitmap_obstack);
707 : 1969986 : gcc_obstack_init (&dse_obstack);
708 : :
709 : 1969986 : scratch = BITMAP_ALLOC (®_obstack);
710 : 1969986 : kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
711 : :
712 : :
713 : 1969986 : rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
714 : :
715 : 1969986 : bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
716 : 1969986 : rtx_group_next_id = 0;
717 : :
718 : 1969986 : stores_off_frame_dead_at_return = !cfun->stdarg;
719 : :
720 : 1969986 : init_alias_analysis ();
721 : 1969986 : }
722 : :
723 : :
724 : :
725 : : /*----------------------------------------------------------------------------
726 : : First step.
727 : :
728 : : Scan all of the insns. Any random ordering of the blocks is fine.
729 : : Each block is scanned in forward order to accommodate cselib which
730 : : is used to remove stores with non-constant bases.
731 : : ----------------------------------------------------------------------------*/
732 : :
733 : : /* Delete all of the store_info recs from INSN_INFO. */
734 : :
735 : : static void
736 : 13641246 : free_store_info (insn_info_t insn_info)
737 : : {
738 : 13641246 : store_info *cur = insn_info->store_rec;
739 : 27330520 : while (cur)
740 : : {
741 : 13689274 : store_info *next = cur->next;
742 : 13689274 : if (cur->is_large)
743 : 9336 : BITMAP_FREE (cur->positions_needed.large.bmap);
744 : 13689274 : if (cur->cse_base)
745 : 13665583 : cse_store_info_pool.remove (cur);
746 : : else
747 : 23691 : rtx_store_info_pool.remove (cur);
748 : : cur = next;
749 : : }
750 : :
751 : 13641246 : insn_info->cannot_delete = true;
752 : 13641246 : insn_info->contains_cselib_groups = false;
753 : 13641246 : insn_info->store_rec = NULL;
754 : 13641246 : }
755 : :
756 : : struct note_add_store_info
757 : : {
758 : : rtx_insn *first, *current;
759 : : regset fixed_regs_live;
760 : : bool failure;
761 : : };
762 : :
763 : : /* Callback for emit_inc_dec_insn_before via note_stores.
764 : : Check if a register is clobbered which is live afterwards. */
765 : :
766 : : static void
767 : 0 : note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
768 : : {
769 : 0 : rtx_insn *insn;
770 : 0 : note_add_store_info *info = (note_add_store_info *) data;
771 : :
772 : 0 : if (!REG_P (loc))
773 : : return;
774 : :
775 : : /* If this register is referenced by the current or an earlier insn,
776 : : that's OK. E.g. this applies to the register that is being incremented
777 : : with this addition. */
778 : 0 : for (insn = info->first;
779 : 0 : insn != NEXT_INSN (info->current);
780 : 0 : insn = NEXT_INSN (insn))
781 : 0 : if (reg_referenced_p (loc, PATTERN (insn)))
782 : : return;
783 : :
784 : : /* If we come here, we have a clobber of a register that's only OK
785 : : if that register is not live. If we don't have liveness information
786 : : available, fail now. */
787 : 0 : if (!info->fixed_regs_live)
788 : : {
789 : 0 : info->failure = true;
790 : 0 : return;
791 : : }
792 : : /* Now check if this is a live fixed register. */
793 : 0 : unsigned int end_regno = END_REGNO (loc);
794 : 0 : for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
795 : 0 : if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
796 : 0 : info->failure = true;
797 : : }
798 : :
799 : : /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
800 : : SRC + SRCOFF before insn ARG. */
801 : :
802 : : static int
803 : 0 : emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
804 : : rtx op ATTRIBUTE_UNUSED,
805 : : rtx dest, rtx src, rtx srcoff, void *arg)
806 : : {
807 : 0 : insn_info_t insn_info = (insn_info_t) arg;
808 : 0 : rtx_insn *insn = insn_info->insn, *new_insn, *cur;
809 : 0 : note_add_store_info info;
810 : :
811 : : /* We can reuse all operands without copying, because we are about
812 : : to delete the insn that contained it. */
813 : 0 : if (srcoff)
814 : : {
815 : 0 : start_sequence ();
816 : 0 : emit_insn (gen_add3_insn (dest, src, srcoff));
817 : 0 : new_insn = get_insns ();
818 : 0 : end_sequence ();
819 : : }
820 : : else
821 : 0 : new_insn = gen_move_insn (dest, src);
822 : 0 : info.first = new_insn;
823 : 0 : info.fixed_regs_live = insn_info->fixed_regs_live;
824 : 0 : info.failure = false;
825 : 0 : for (cur = new_insn; cur; cur = NEXT_INSN (cur))
826 : : {
827 : 0 : info.current = cur;
828 : 0 : note_stores (cur, note_add_store, &info);
829 : : }
830 : :
831 : : /* If a failure was flagged above, return 1 so that for_each_inc_dec will
832 : : return it immediately, communicating the failure to its caller. */
833 : 0 : if (info.failure)
834 : : return 1;
835 : :
836 : 0 : emit_insn_before (new_insn, insn);
837 : :
838 : 0 : return 0;
839 : : }
840 : :
841 : : /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
842 : : is there, is split into a separate insn.
843 : : Return true on success (or if there was nothing to do), false on failure. */
844 : :
845 : : static bool
846 : 83148 : check_for_inc_dec_1 (insn_info_t insn_info)
847 : : {
848 : 83148 : rtx_insn *insn = insn_info->insn;
849 : 83148 : rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
850 : 83148 : if (note)
851 : 0 : return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
852 : 0 : insn_info) == 0;
853 : :
854 : : /* Punt on stack pushes, those don't have REG_INC notes and we are
855 : : unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */
856 : 83148 : subrtx_iterator::array_type array;
857 : 567224 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
858 : : {
859 : 484076 : const_rtx x = *iter;
860 : 484076 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
861 : 0 : return false;
862 : : }
863 : :
864 : 83148 : return true;
865 : 83148 : }
866 : :
867 : :
868 : : /* Entry point for postreload. If you work on reload_cse, or you need this
869 : : anywhere else, consider if you can provide register liveness information
870 : : and add a parameter to this function so that it can be passed down in
871 : : insn_info.fixed_regs_live. */
872 : : bool
873 : 211182 : check_for_inc_dec (rtx_insn *insn)
874 : : {
875 : 211182 : insn_info_type insn_info;
876 : 211182 : rtx note;
877 : :
878 : 211182 : insn_info.insn = insn;
879 : 211182 : insn_info.fixed_regs_live = NULL;
880 : 211182 : note = find_reg_note (insn, REG_INC, NULL_RTX);
881 : 211182 : if (note)
882 : 0 : return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
883 : 0 : &insn_info) == 0;
884 : :
885 : : /* Punt on stack pushes, those don't have REG_INC notes and we are
886 : : unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */
887 : 211182 : subrtx_iterator::array_type array;
888 : 1045109 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
889 : : {
890 : 833927 : const_rtx x = *iter;
891 : 833927 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
892 : 0 : return false;
893 : : }
894 : :
895 : 211182 : return true;
896 : 211182 : }
897 : :
898 : : /* Delete the insn and free all of the fields inside INSN_INFO. */
899 : :
900 : : static void
901 : 31197 : delete_dead_store_insn (insn_info_t insn_info)
902 : : {
903 : 31197 : read_info_t read_info;
904 : :
905 : 31197 : if (!dbg_cnt (dse))
906 : : return;
907 : :
908 : 31197 : if (!check_for_inc_dec_1 (insn_info))
909 : : return;
910 : 31197 : if (dump_file && (dump_flags & TDF_DETAILS))
911 : 0 : fprintf (dump_file, "Locally deleting insn %d\n",
912 : 0 : INSN_UID (insn_info->insn));
913 : :
914 : 31197 : free_store_info (insn_info);
915 : 31197 : read_info = insn_info->read_rec;
916 : :
917 : 31259 : while (read_info)
918 : : {
919 : 62 : read_info_t next = read_info->next;
920 : 62 : read_info_type_pool.remove (read_info);
921 : 62 : read_info = next;
922 : : }
923 : 31197 : insn_info->read_rec = NULL;
924 : :
925 : 31197 : delete_insn (insn_info->insn);
926 : 31197 : locally_deleted++;
927 : 31197 : insn_info->insn = NULL;
928 : :
929 : 31197 : insn_info->wild_read = false;
930 : : }
931 : :
932 : : /* Return whether DECL, a local variable, can possibly escape the current
933 : : function scope. */
934 : :
935 : : static bool
936 : 1129596 : local_variable_can_escape (tree decl)
937 : : {
938 : 1129596 : if (TREE_ADDRESSABLE (decl))
939 : : return true;
940 : :
941 : : /* If this is a partitioned variable, we need to consider all the variables
942 : : in the partition. This is necessary because a store into one of them can
943 : : be replaced with a store into another and this may not change the outcome
944 : : of the escape analysis. */
945 : 1129596 : if (cfun->gimple_df->decls_to_pointers != NULL)
946 : : {
947 : 257705 : tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
948 : 257705 : if (namep)
949 : 0 : return TREE_ADDRESSABLE (*namep);
950 : : }
951 : :
952 : : return false;
953 : : }
954 : :
955 : : /* Return whether EXPR can possibly escape the current function scope. */
956 : :
957 : : static bool
958 : 5050428 : can_escape (tree expr)
959 : : {
960 : 5050428 : tree base;
961 : 5050428 : if (!expr)
962 : : return true;
963 : 4797397 : base = get_base_address (expr);
964 : 4797397 : if (DECL_P (base)
965 : 3790073 : && !may_be_aliased (base)
966 : 7280889 : && !(VAR_P (base)
967 : 1342627 : && !DECL_EXTERNAL (base)
968 : 1342623 : && !TREE_STATIC (base)
969 : 1129596 : && local_variable_can_escape (base)))
970 : 1353896 : return false;
971 : : return true;
972 : : }
973 : :
974 : : /* Set the store* bitmaps offset_map_size* fields in GROUP based on
975 : : OFFSET and WIDTH. */
976 : :
977 : : static void
978 : 5050428 : set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
979 : : tree expr)
980 : : {
981 : : /* Non-constant offsets and widths act as global kills, so there's no point
982 : : trying to use them to derive global DSE candidates. */
983 : 5050428 : HOST_WIDE_INT i, const_offset, const_width;
984 : 5050428 : bool expr_escapes = can_escape (expr);
985 : 5050428 : if (offset.is_constant (&const_offset)
986 : 5050428 : && width.is_constant (&const_width)
987 : 5050428 : && const_offset > -MAX_OFFSET
988 : 5037187 : && const_offset + const_width < MAX_OFFSET)
989 : 58555992 : for (i = const_offset; i < const_offset + const_width; ++i)
990 : : {
991 : 53522382 : bitmap store1;
992 : 53522382 : bitmap store2;
993 : 53522382 : bitmap escaped;
994 : 53522382 : int ai;
995 : 53522382 : if (i < 0)
996 : : {
997 : 40000465 : store1 = group->store1_n;
998 : 40000465 : store2 = group->store2_n;
999 : 40000465 : escaped = group->escaped_n;
1000 : 40000465 : ai = -i;
1001 : : }
1002 : : else
1003 : : {
1004 : 13521917 : store1 = group->store1_p;
1005 : 13521917 : store2 = group->store2_p;
1006 : 13521917 : escaped = group->escaped_p;
1007 : 13521917 : ai = i;
1008 : : }
1009 : :
1010 : 53522382 : if (!bitmap_set_bit (store1, ai))
1011 : 16795846 : bitmap_set_bit (store2, ai);
1012 : : else
1013 : : {
1014 : 36726536 : if (i < 0)
1015 : : {
1016 : 28746101 : if (group->offset_map_size_n < ai)
1017 : 387129 : group->offset_map_size_n = ai;
1018 : : }
1019 : : else
1020 : : {
1021 : 7980435 : if (group->offset_map_size_p < ai)
1022 : 7339045 : group->offset_map_size_p = ai;
1023 : : }
1024 : : }
1025 : 53522382 : if (expr_escapes)
1026 : 41677146 : bitmap_set_bit (escaped, ai);
1027 : : }
1028 : 5050428 : }
1029 : :
1030 : : static void
1031 : 11339418 : reset_active_stores (void)
1032 : : {
1033 : 11339418 : active_local_stores = NULL;
1034 : 11339418 : active_local_stores_len = 0;
1035 : 0 : }
1036 : :
1037 : : /* Free all READ_REC of the LAST_INSN of BB_INFO. */
1038 : :
1039 : : static void
1040 : 11339418 : free_read_records (bb_info_t bb_info)
1041 : : {
1042 : 11339418 : insn_info_t insn_info = bb_info->last_insn;
1043 : 11339418 : read_info_t *ptr = &insn_info->read_rec;
1044 : 20116671 : while (*ptr)
1045 : : {
1046 : 8777253 : read_info_t next = (*ptr)->next;
1047 : 8777253 : read_info_type_pool.remove (*ptr);
1048 : 8777253 : *ptr = next;
1049 : : }
1050 : 11339418 : }
1051 : :
1052 : : /* Set the BB_INFO so that the last insn is marked as a wild read. */
1053 : :
1054 : : static void
1055 : 2703931 : add_wild_read (bb_info_t bb_info)
1056 : : {
1057 : 2703931 : insn_info_t insn_info = bb_info->last_insn;
1058 : 2703931 : insn_info->wild_read = true;
1059 : 2703931 : free_read_records (bb_info);
1060 : 2703931 : reset_active_stores ();
1061 : 2703931 : }
1062 : :
1063 : : /* Set the BB_INFO so that the last insn is marked as a wild read of
1064 : : non-frame locations. */
1065 : :
1066 : : static void
1067 : 8635487 : add_non_frame_wild_read (bb_info_t bb_info)
1068 : : {
1069 : 8635487 : insn_info_t insn_info = bb_info->last_insn;
1070 : 8635487 : insn_info->non_frame_wild_read = true;
1071 : 8635487 : free_read_records (bb_info);
1072 : 8635487 : reset_active_stores ();
1073 : 8635487 : }
1074 : :
1075 : : /* Return true if X is a constant or one of the registers that behave
1076 : : as a constant over the life of a function. This is equivalent to
1077 : : !rtx_varies_p for memory addresses. */
1078 : :
1079 : : static bool
1080 : 71388792 : const_or_frame_p (rtx x)
1081 : : {
1082 : 71388792 : if (CONSTANT_P (x))
1083 : : return true;
1084 : :
1085 : 59611514 : if (GET_CODE (x) == REG)
1086 : : {
1087 : : /* Note that we have to test for the actual rtx used for the frame
1088 : : and arg pointers and not just the register number in case we have
1089 : : eliminated the frame and/or arg pointer and are using it
1090 : : for pseudos. */
1091 : 43600829 : if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1092 : : /* The arg pointer varies if it is not a fixed register. */
1093 : 37071500 : || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1094 : 36636128 : || x == pic_offset_table_rtx)
1095 : : return true;
1096 : 36636128 : return false;
1097 : : }
1098 : :
1099 : : return false;
1100 : : }
1101 : :
1102 : : /* Take all reasonable action to put the address of MEM into the form
1103 : : that we can do analysis on.
1104 : :
1105 : : The gold standard is to get the address into the form: address +
1106 : : OFFSET where address is something that rtx_varies_p considers a
1107 : : constant. When we can get the address in this form, we can do
1108 : : global analysis on it. Note that for constant bases, address is
1109 : : not actually returned, only the group_id. The address can be
1110 : : obtained from that.
1111 : :
1112 : : If that fails, we try cselib to get a value we can at least use
1113 : : locally. If that fails we return false.
1114 : :
1115 : : The GROUP_ID is set to -1 for cselib bases and the index of the
1116 : : group for non_varying bases.
1117 : :
1118 : : FOR_READ is true if this is a mem read and false if not. */
1119 : :
1120 : : static bool
1121 : 45055513 : canon_address (rtx mem,
1122 : : int *group_id,
1123 : : poly_int64 *offset,
1124 : : cselib_val **base)
1125 : : {
1126 : 45055513 : machine_mode address_mode = get_address_mode (mem);
1127 : 45055513 : rtx mem_address = XEXP (mem, 0);
1128 : 45055513 : rtx expanded_address, address;
1129 : 45055513 : int expanded;
1130 : :
1131 : 45055513 : cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1132 : :
1133 : 45055513 : if (dump_file && (dump_flags & TDF_DETAILS))
1134 : : {
1135 : 0 : fprintf (dump_file, " mem: ");
1136 : 0 : print_inline_rtx (dump_file, mem_address, 0);
1137 : 0 : fprintf (dump_file, "\n");
1138 : : }
1139 : :
1140 : : /* First see if just canon_rtx (mem_address) is const or frame,
1141 : : if not, try cselib_expand_value_rtx and call canon_rtx on that. */
1142 : : address = NULL_RTX;
1143 : 97727460 : for (expanded = 0; expanded < 2; expanded++)
1144 : : {
1145 : 71413938 : if (expanded)
1146 : : {
1147 : : /* Use cselib to replace all of the reg references with the full
1148 : : expression. This will take care of the case where we have
1149 : :
1150 : : r_x = base + offset;
1151 : : val = *r_x;
1152 : :
1153 : : by making it into
1154 : :
1155 : : val = *(base + offset); */
1156 : :
1157 : 26358425 : expanded_address = cselib_expand_value_rtx (mem_address,
1158 : : scratch, 5);
1159 : :
1160 : : /* If this fails, just go with the address from first
1161 : : iteration. */
1162 : 26358425 : if (!expanded_address)
1163 : : break;
1164 : : }
1165 : : else
1166 : : expanded_address = mem_address;
1167 : :
1168 : : /* Split the address into canonical BASE + OFFSET terms. */
1169 : 71413926 : address = canon_rtx (expanded_address);
1170 : :
1171 : 71413926 : *offset = 0;
1172 : :
1173 : 71413926 : if (dump_file && (dump_flags & TDF_DETAILS))
1174 : : {
1175 : 0 : if (expanded)
1176 : : {
1177 : 0 : fprintf (dump_file, "\n after cselib_expand address: ");
1178 : 0 : print_inline_rtx (dump_file, expanded_address, 0);
1179 : 0 : fprintf (dump_file, "\n");
1180 : : }
1181 : :
1182 : 0 : fprintf (dump_file, "\n after canon_rtx address: ");
1183 : 0 : print_inline_rtx (dump_file, address, 0);
1184 : 0 : fprintf (dump_file, "\n");
1185 : : }
1186 : :
1187 : 71413926 : if (GET_CODE (address) == CONST)
1188 : 1013137 : address = XEXP (address, 0);
1189 : :
1190 : 71413926 : address = strip_offset_and_add (address, offset);
1191 : :
1192 : 71413926 : if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1193 : 71413926 : && const_or_frame_p (address))
1194 : : {
1195 : 18741979 : group_info *group = get_group_info (address);
1196 : :
1197 : 18741979 : if (dump_file && (dump_flags & TDF_DETAILS))
1198 : : {
1199 : 0 : fprintf (dump_file, " gid=%d offset=", group->id);
1200 : 0 : print_dec (*offset, dump_file);
1201 : 0 : fprintf (dump_file, "\n");
1202 : : }
1203 : 18741979 : *base = NULL;
1204 : 18741979 : *group_id = group->id;
1205 : 18741979 : return true;
1206 : : }
1207 : : }
1208 : :
1209 : 26313534 : *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1210 : 26313534 : *group_id = -1;
1211 : :
1212 : 26313534 : if (*base == NULL)
1213 : : {
1214 : 174630 : if (dump_file && (dump_flags & TDF_DETAILS))
1215 : 0 : fprintf (dump_file, " no cselib val - should be a wild read.\n");
1216 : 174630 : return false;
1217 : : }
1218 : 26138904 : if (dump_file && (dump_flags & TDF_DETAILS))
1219 : : {
1220 : 0 : fprintf (dump_file, " varying cselib base=%u:%u offset = ",
1221 : : (*base)->uid, (*base)->hash);
1222 : 0 : print_dec (*offset, dump_file);
1223 : 0 : fprintf (dump_file, "\n");
1224 : : }
1225 : : return true;
1226 : : }
1227 : :
1228 : :
1229 : : /* Clear the rhs field from the active_local_stores array. */
1230 : :
1231 : : static void
1232 : 83698 : clear_rhs_from_active_local_stores (void)
1233 : : {
1234 : 83698 : insn_info_t ptr = active_local_stores;
1235 : :
1236 : 118682 : while (ptr)
1237 : : {
1238 : 34984 : store_info *store_info = ptr->store_rec;
1239 : : /* Skip the clobbers. */
1240 : 34984 : while (!store_info->is_set)
1241 : 0 : store_info = store_info->next;
1242 : :
1243 : 34984 : store_info->rhs = NULL;
1244 : 34984 : store_info->const_rhs = NULL;
1245 : :
1246 : 34984 : ptr = ptr->next_local_store;
1247 : : }
1248 : 83698 : }
1249 : :
1250 : :
1251 : : /* Mark byte POS bytes from the beginning of store S_INFO as unneeded. */
1252 : :
1253 : : static inline void
1254 : 746733 : set_position_unneeded (store_info *s_info, int pos)
1255 : : {
1256 : 746733 : if (UNLIKELY (s_info->is_large))
1257 : : {
1258 : 413698 : if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1259 : 413622 : s_info->positions_needed.large.count++;
1260 : : }
1261 : : else
1262 : 333035 : s_info->positions_needed.small_bitmask
1263 : 333035 : &= ~(HOST_WIDE_INT_1U << pos);
1264 : 746733 : }
1265 : :
1266 : : /* Mark the whole store S_INFO as unneeded. */
1267 : :
1268 : : static inline void
1269 : 245178 : set_all_positions_unneeded (store_info *s_info)
1270 : : {
1271 : 245178 : if (UNLIKELY (s_info->is_large))
1272 : : {
1273 : 0 : HOST_WIDE_INT width;
1274 : 0 : if (s_info->width.is_constant (&width))
1275 : : {
1276 : 0 : bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1277 : 0 : s_info->positions_needed.large.count = width;
1278 : : }
1279 : : else
1280 : : {
1281 : : gcc_checking_assert (!s_info->positions_needed.large.bmap);
1282 : : s_info->positions_needed.large.count = 1;
1283 : : }
1284 : : }
1285 : : else
1286 : 245178 : s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1287 : 245178 : }
1288 : :
1289 : : /* Return TRUE if any bytes from S_INFO store are needed. */
1290 : :
1291 : : static inline bool
1292 : 97113999 : any_positions_needed_p (store_info *s_info)
1293 : : {
1294 : 97113999 : if (UNLIKELY (s_info->is_large))
1295 : : {
1296 : 107201 : HOST_WIDE_INT width;
1297 : 107201 : if (s_info->width.is_constant (&width))
1298 : : {
1299 : 107201 : gcc_checking_assert (s_info->positions_needed.large.bmap);
1300 : 107201 : return s_info->positions_needed.large.count < width;
1301 : : }
1302 : : else
1303 : : {
1304 : : gcc_checking_assert (!s_info->positions_needed.large.bmap);
1305 : : return s_info->positions_needed.large.count == 0;
1306 : : }
1307 : : }
1308 : : else
1309 : 97006798 : return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1310 : : }
1311 : :
1312 : : /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1313 : : store are known to be needed. */
1314 : :
1315 : : static inline bool
1316 : 209165 : all_positions_needed_p (store_info *s_info, poly_int64 start,
1317 : : poly_int64 width)
1318 : : {
1319 : 209165 : gcc_assert (s_info->rhs);
1320 : 209165 : if (!s_info->width.is_constant ())
1321 : : {
1322 : : gcc_assert (s_info->is_large
1323 : : && !s_info->positions_needed.large.bmap);
1324 : : return s_info->positions_needed.large.count == 0;
1325 : : }
1326 : :
1327 : : /* Otherwise, if START and WIDTH are non-constant, we're asking about
1328 : : a non-constant region of a constant-sized store. We can't say for
1329 : : sure that all positions are needed. */
1330 : 209165 : HOST_WIDE_INT const_start, const_width;
1331 : 209165 : if (!start.is_constant (&const_start)
1332 : 209165 : || !width.is_constant (&const_width))
1333 : : return false;
1334 : :
1335 : 209165 : if (UNLIKELY (s_info->is_large))
1336 : : {
1337 : 140853 : for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1338 : 127012 : if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1339 : : return false;
1340 : : return true;
1341 : : }
1342 : : else
1343 : : {
1344 : 195271 : unsigned HOST_WIDE_INT mask
1345 : 195271 : = lowpart_bitmask (const_width) << const_start;
1346 : 195271 : return (s_info->positions_needed.small_bitmask & mask) == mask;
1347 : : }
1348 : : }
1349 : :
1350 : :
1351 : : static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1352 : : poly_int64, basic_block, bool);
1353 : :
1354 : :
1355 : : /* BODY is an instruction pattern that belongs to INSN. Return 1 if
1356 : : there is a candidate store, after adding it to the appropriate
1357 : : local store group if so. */
1358 : :
1359 : : static int
1360 : 120914394 : record_store (rtx body, bb_info_t bb_info)
1361 : : {
1362 : 120914394 : rtx mem, rhs, const_rhs, mem_addr;
1363 : 120914394 : poly_int64 offset = 0;
1364 : 120914394 : poly_int64 width = 0;
1365 : 120914394 : insn_info_t insn_info = bb_info->last_insn;
1366 : 120914394 : store_info *store_info = NULL;
1367 : 120914394 : int group_id;
1368 : 120914394 : cselib_val *base = NULL;
1369 : 120914394 : insn_info_t ptr, last, redundant_reason;
1370 : 120914394 : bool store_is_unused;
1371 : :
1372 : 120914394 : if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1373 : : return 0;
1374 : :
1375 : 117627896 : mem = SET_DEST (body);
1376 : :
1377 : : /* If this is not used, then this cannot be used to keep the insn
1378 : : from being deleted. On the other hand, it does provide something
1379 : : that can be used to prove that another store is dead. */
1380 : 117627896 : store_is_unused
1381 : 117627896 : = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1382 : :
1383 : : /* Check whether that value is a suitable memory location. */
1384 : 117627896 : if (!MEM_P (mem))
1385 : : {
1386 : : /* If the set or clobber is unused, then it does not effect our
1387 : : ability to get rid of the entire insn. */
1388 : 97514954 : if (!store_is_unused)
1389 : 82594752 : insn_info->cannot_delete = true;
1390 : 97514954 : return 0;
1391 : : }
1392 : :
1393 : : /* At this point we know mem is a mem. */
1394 : 20112942 : if (GET_MODE (mem) == BLKmode)
1395 : : {
1396 : 1414859 : HOST_WIDE_INT const_size;
1397 : 1414859 : if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1398 : : {
1399 : 1335122 : if (dump_file && (dump_flags & TDF_DETAILS))
1400 : 0 : fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1401 : 1335122 : add_wild_read (bb_info);
1402 : 1335122 : insn_info->cannot_delete = true;
1403 : 1335122 : return 0;
1404 : : }
1405 : : /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1406 : : as memset (addr, 0, 36); */
1407 : 79737 : else if (!MEM_SIZE_KNOWN_P (mem)
1408 : 49810 : || maybe_le (MEM_SIZE (mem), 0)
1409 : : /* This is a limit on the bitmap size, which is only relevant
1410 : : for constant-sized MEMs. */
1411 : 49810 : || (MEM_SIZE (mem).is_constant (&const_size)
1412 : 49810 : && const_size > MAX_OFFSET)
1413 : 49311 : || GET_CODE (body) != SET
1414 : 129048 : || !CONST_INT_P (SET_SRC (body)))
1415 : : {
1416 : 54416 : if (!store_is_unused)
1417 : : {
1418 : : /* If the set or clobber is unused, then it does not effect our
1419 : : ability to get rid of the entire insn. */
1420 : 54416 : insn_info->cannot_delete = true;
1421 : 54416 : clear_rhs_from_active_local_stores ();
1422 : : }
1423 : 54416 : return 0;
1424 : : }
1425 : : }
1426 : :
1427 : : /* We can still process a volatile mem, we just cannot delete it. */
1428 : 18723404 : if (MEM_VOLATILE_P (mem))
1429 : 628185 : insn_info->cannot_delete = true;
1430 : :
1431 : 18723404 : if (!canon_address (mem, &group_id, &offset, &base))
1432 : : {
1433 : 7341 : clear_rhs_from_active_local_stores ();
1434 : 7341 : return 0;
1435 : : }
1436 : :
1437 : 18716063 : if (GET_MODE (mem) == BLKmode)
1438 : 25312 : width = MEM_SIZE (mem);
1439 : : else
1440 : 37381502 : width = GET_MODE_SIZE (GET_MODE (mem));
1441 : :
1442 : 37432126 : if (!endpoint_representable_p (offset, width))
1443 : : {
1444 : 22 : clear_rhs_from_active_local_stores ();
1445 : 22 : return 0;
1446 : : }
1447 : :
1448 : 18716041 : if (known_eq (width, 0))
1449 : : return 0;
1450 : :
1451 : 18716011 : if (group_id >= 0)
1452 : : {
1453 : : /* In the restrictive case where the base is a constant or the
1454 : : frame pointer we can do global analysis. */
1455 : :
1456 : 5050428 : group_info *group
1457 : 5050428 : = rtx_group_vec[group_id];
1458 : 5050428 : tree expr = MEM_EXPR (mem);
1459 : :
1460 : 5050428 : store_info = rtx_store_info_pool.allocate ();
1461 : 5050428 : set_usage_bits (group, offset, width, expr);
1462 : :
1463 : 5050428 : if (dump_file && (dump_flags & TDF_DETAILS))
1464 : : {
1465 : 0 : fprintf (dump_file, " processing const base store gid=%d",
1466 : : group_id);
1467 : 0 : print_range (dump_file, offset, width);
1468 : 0 : fprintf (dump_file, "\n");
1469 : : }
1470 : : }
1471 : : else
1472 : : {
1473 : 13665583 : if (may_be_sp_based_p (XEXP (mem, 0)))
1474 : 12746513 : insn_info->stack_pointer_based = true;
1475 : 13665583 : insn_info->contains_cselib_groups = true;
1476 : :
1477 : 13665583 : store_info = cse_store_info_pool.allocate ();
1478 : 13665583 : group_id = -1;
1479 : :
1480 : 13665583 : if (dump_file && (dump_flags & TDF_DETAILS))
1481 : : {
1482 : 0 : fprintf (dump_file, " processing cselib store ");
1483 : 0 : print_range (dump_file, offset, width);
1484 : 0 : fprintf (dump_file, "\n");
1485 : : }
1486 : : }
1487 : :
1488 : 18716011 : const_rhs = rhs = NULL_RTX;
1489 : 18716011 : if (GET_CODE (body) == SET
1490 : : /* No place to keep the value after ra. */
1491 : 18714113 : && !reload_completed
1492 : 7730867 : && (REG_P (SET_SRC (body))
1493 : 2410692 : || GET_CODE (SET_SRC (body)) == SUBREG
1494 : 2319784 : || CONSTANT_P (SET_SRC (body)))
1495 : 7558902 : && !MEM_VOLATILE_P (mem)
1496 : : /* Sometimes the store and reload is used for truncation and
1497 : : rounding. */
1498 : 26014397 : && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1499 : : {
1500 : 7297568 : rhs = SET_SRC (body);
1501 : 7297568 : if (CONSTANT_P (rhs))
1502 : : const_rhs = rhs;
1503 : 5282227 : else if (body == PATTERN (insn_info->insn))
1504 : : {
1505 : 5282223 : rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1506 : 5282223 : if (tem && CONSTANT_P (XEXP (tem, 0)))
1507 : : const_rhs = XEXP (tem, 0);
1508 : : }
1509 : 5282227 : if (const_rhs == NULL_RTX && REG_P (rhs))
1510 : : {
1511 : 5201801 : rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1512 : :
1513 : 5201801 : if (tem && CONSTANT_P (tem))
1514 : : const_rhs = tem;
1515 : : else
1516 : : {
1517 : : /* If RHS is set only once to a constant, set CONST_RHS
1518 : : to the constant. */
1519 : 4766941 : rtx def_src = df_find_single_def_src (rhs);
1520 : 4766941 : if (def_src != nullptr && CONSTANT_P (def_src))
1521 : 184523 : const_rhs = def_src;
1522 : : }
1523 : : }
1524 : : }
1525 : :
1526 : : /* Check to see if this stores causes some other stores to be
1527 : : dead. */
1528 : 18716011 : ptr = active_local_stores;
1529 : 18716011 : last = NULL;
1530 : 18716011 : redundant_reason = NULL;
1531 : 18716011 : unsigned char addrspace = MEM_ADDR_SPACE (mem);
1532 : 18716011 : mem = canon_rtx (mem);
1533 : :
1534 : 18716011 : if (group_id < 0)
1535 : 13665583 : mem_addr = base->val_rtx;
1536 : : else
1537 : : {
1538 : 5050428 : group_info *group = rtx_group_vec[group_id];
1539 : 5050428 : mem_addr = group->canon_base_addr;
1540 : : }
1541 : 18716011 : if (maybe_ne (offset, 0))
1542 : 10952072 : mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1543 : :
1544 : 115830010 : while (ptr)
1545 : : {
1546 : 97113999 : insn_info_t next = ptr->next_local_store;
1547 : 97113999 : class store_info *s_info = ptr->store_rec;
1548 : 97113999 : bool del = true;
1549 : :
1550 : : /* Skip the clobbers. We delete the active insn if this insn
1551 : : shadows the set. To have been put on the active list, it
1552 : : has exactly on set. */
1553 : 97113999 : while (!s_info->is_set)
1554 : 0 : s_info = s_info->next;
1555 : :
1556 : 97113999 : if (s_info->group_id == group_id
1557 : 92902151 : && s_info->cse_base == base
1558 : 77709512 : && s_info->addrspace == addrspace)
1559 : : {
1560 : 77709510 : HOST_WIDE_INT i;
1561 : 77709510 : if (dump_file && (dump_flags & TDF_DETAILS))
1562 : : {
1563 : 0 : fprintf (dump_file, " trying store in insn=%d gid=%d",
1564 : 0 : INSN_UID (ptr->insn), s_info->group_id);
1565 : 0 : print_range (dump_file, s_info->offset, s_info->width);
1566 : 0 : fprintf (dump_file, "\n");
1567 : : }
1568 : :
1569 : : /* Even if PTR won't be eliminated as unneeded, if both
1570 : : PTR and this insn store the same constant value, we might
1571 : : eliminate this insn instead. */
1572 : 77709510 : if (s_info->const_rhs
1573 : 20426006 : && const_rhs
1574 : 17615908 : && known_subrange_p (offset, width,
1575 : 17615908 : s_info->offset, s_info->width)
1576 : 39223 : && all_positions_needed_p (s_info, offset - s_info->offset,
1577 : : width)
1578 : : /* We can only remove the later store if the earlier aliases
1579 : : at least all accesses the later one. */
1580 : 77748570 : && mems_same_for_tbaa_p (s_info->mem, mem))
1581 : : {
1582 : 39002 : if (GET_MODE (mem) == BLKmode)
1583 : : {
1584 : 4 : if (GET_MODE (s_info->mem) == BLKmode
1585 : 4 : && s_info->const_rhs == const_rhs)
1586 : 315 : redundant_reason = ptr;
1587 : : }
1588 : 38998 : else if (s_info->const_rhs == const0_rtx
1589 : 33502 : && const_rhs == const0_rtx)
1590 : : redundant_reason = ptr;
1591 : : else
1592 : : {
1593 : 38940 : rtx val;
1594 : 38940 : start_sequence ();
1595 : 77880 : val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1596 : 38940 : BLOCK_FOR_INSN (insn_info->insn),
1597 : : true);
1598 : 38940 : if (get_insns () != NULL)
1599 : 0 : val = NULL_RTX;
1600 : 38940 : end_sequence ();
1601 : 38940 : if (val && rtx_equal_p (val, const_rhs))
1602 : : redundant_reason = ptr;
1603 : : }
1604 : : }
1605 : :
1606 : 77709510 : HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1607 : 77709510 : if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1608 : : /* The new store touches every byte that S_INFO does. */
1609 : 245178 : set_all_positions_unneeded (s_info);
1610 : 77464332 : else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1611 : 77464332 : && s_info->width.is_constant (&const_s_width)
1612 : 77464332 : && width.is_constant (&const_width))
1613 : : {
1614 : 77464332 : HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1615 : 77464332 : begin_unneeded = MAX (begin_unneeded, 0);
1616 : 77464332 : end_unneeded = MIN (end_unneeded, const_s_width);
1617 : 78211065 : for (i = begin_unneeded; i < end_unneeded; ++i)
1618 : 746733 : set_position_unneeded (s_info, i);
1619 : : }
1620 : : else
1621 : : {
1622 : : /* We don't know which parts of S_INFO are needed and
1623 : : which aren't, so invalidate the RHS. */
1624 : : s_info->rhs = NULL;
1625 : : s_info->const_rhs = NULL;
1626 : : }
1627 : : }
1628 : 19404489 : else if (s_info->rhs)
1629 : : /* Need to see if it is possible for this store to overwrite
1630 : : the value of store_info. If it is, set the rhs to NULL to
1631 : : keep it from being used to remove a load. */
1632 : : {
1633 : 3185647 : if (canon_output_dependence (s_info->mem, true,
1634 : 3185647 : mem, GET_MODE (mem),
1635 : : mem_addr))
1636 : : {
1637 : 1619529 : s_info->rhs = NULL;
1638 : 1619529 : s_info->const_rhs = NULL;
1639 : : }
1640 : : }
1641 : :
1642 : : /* An insn can be deleted if every position of every one of
1643 : : its s_infos is zero. */
1644 : 97113999 : if (any_positions_needed_p (s_info))
1645 : : del = false;
1646 : :
1647 : 246761 : if (del)
1648 : : {
1649 : 246761 : insn_info_t insn_to_delete = ptr;
1650 : :
1651 : 246761 : active_local_stores_len--;
1652 : 246761 : if (last)
1653 : 19312 : last->next_local_store = ptr->next_local_store;
1654 : : else
1655 : 227449 : active_local_stores = ptr->next_local_store;
1656 : :
1657 : 246761 : if (!insn_to_delete->cannot_delete)
1658 : 19948 : delete_dead_store_insn (insn_to_delete);
1659 : : }
1660 : : else
1661 : : last = ptr;
1662 : :
1663 : : ptr = next;
1664 : : }
1665 : :
1666 : : /* Finish filling in the store_info. */
1667 : 18716011 : store_info->next = insn_info->store_rec;
1668 : 18716011 : insn_info->store_rec = store_info;
1669 : 18716011 : store_info->mem = mem;
1670 : 18716011 : store_info->mem_addr = mem_addr;
1671 : 18716011 : store_info->cse_base = base;
1672 : 18716011 : HOST_WIDE_INT const_width;
1673 : 18716011 : if (!width.is_constant (&const_width))
1674 : : {
1675 : : store_info->is_large = true;
1676 : : store_info->positions_needed.large.count = 0;
1677 : : store_info->positions_needed.large.bmap = NULL;
1678 : : }
1679 : 18716011 : else if (const_width > HOST_BITS_PER_WIDE_INT)
1680 : : {
1681 : 23763 : store_info->is_large = true;
1682 : 23763 : store_info->positions_needed.large.count = 0;
1683 : 23763 : store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1684 : : }
1685 : : else
1686 : : {
1687 : 18692248 : store_info->is_large = false;
1688 : 18692248 : store_info->positions_needed.small_bitmask
1689 : 18692248 : = lowpart_bitmask (const_width);
1690 : : }
1691 : 18716011 : store_info->group_id = group_id;
1692 : 18716011 : store_info->offset = offset;
1693 : 18716011 : store_info->width = width;
1694 : 18716011 : store_info->is_set = GET_CODE (body) == SET;
1695 : 18716011 : store_info->rhs = rhs;
1696 : 18716011 : store_info->const_rhs = const_rhs;
1697 : 18716011 : store_info->redundant_reason = redundant_reason;
1698 : 18716011 : store_info->addrspace = addrspace;
1699 : :
1700 : : /* If this is a clobber, we return 0. We will only be able to
1701 : : delete this insn if there is only one store USED store, but we
1702 : : can use the clobber to delete other stores earlier. */
1703 : 18716011 : return store_info->is_set ? 1 : 0;
1704 : : }
1705 : :
1706 : :
1707 : : static void
1708 : 0 : dump_insn_info (const char * start, insn_info_t insn_info)
1709 : : {
1710 : 0 : fprintf (dump_file, "%s insn=%d %s\n", start,
1711 : 0 : INSN_UID (insn_info->insn),
1712 : 0 : insn_info->store_rec ? "has store" : "naked");
1713 : 0 : }
1714 : :
1715 : :
1716 : : /* If the modes are different and the value's source and target do not
1717 : : line up, we need to extract the value from lower part of the rhs of
1718 : : the store, shift it, and then put it into a form that can be shoved
1719 : : into the read_insn. This function generates a right SHIFT of a
1720 : : value that is at least ACCESS_SIZE bytes wide of READ_MODE. The
1721 : : shift sequence is returned or NULL if we failed to find a
1722 : : shift. */
1723 : :
1724 : : static rtx
1725 : 83998 : find_shift_sequence (poly_int64 access_size,
1726 : : store_info *store_info,
1727 : : machine_mode read_mode,
1728 : : poly_int64 shift, bool speed, bool require_cst)
1729 : : {
1730 : 83998 : machine_mode store_mode = GET_MODE (store_info->mem);
1731 : 83998 : scalar_int_mode new_mode;
1732 : 83998 : rtx read_reg = NULL;
1733 : :
1734 : : /* If a constant was stored into memory, try to simplify it here,
1735 : : otherwise the cost of the shift might preclude this optimization
1736 : : e.g. at -Os, even when no actual shift will be needed. */
1737 : 83998 : if (store_info->const_rhs
1738 : 107993 : && known_le (access_size, GET_MODE_SIZE (MAX_MODE_INT)))
1739 : : {
1740 : 23995 : auto new_mode = smallest_int_mode_for_size (access_size * BITS_PER_UNIT);
1741 : 23995 : auto byte = subreg_lowpart_offset (new_mode, store_mode);
1742 : 23995 : rtx ret
1743 : 23995 : = simplify_subreg (new_mode, store_info->const_rhs, store_mode, byte);
1744 : 23995 : if (ret && CONSTANT_P (ret))
1745 : : {
1746 : 23995 : rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1747 : 23995 : ret = simplify_const_binary_operation (LSHIFTRT, new_mode, ret,
1748 : : shift_rtx);
1749 : 23995 : if (ret && CONSTANT_P (ret))
1750 : : {
1751 : 23995 : byte = subreg_lowpart_offset (read_mode, new_mode);
1752 : 23995 : ret = simplify_subreg (read_mode, ret, new_mode, byte);
1753 : 23995 : if (ret && CONSTANT_P (ret)
1754 : 47990 : && (set_src_cost (ret, read_mode, speed)
1755 : : <= COSTS_N_INSNS (1)))
1756 : 23926 : return ret;
1757 : : }
1758 : : }
1759 : : }
1760 : :
1761 : 60072 : if (require_cst)
1762 : : return NULL_RTX;
1763 : :
1764 : : /* Some machines like the x86 have shift insns for each size of
1765 : : operand. Other machines like the ppc or the ia-64 may only have
1766 : : shift insns that shift values within 32 or 64 bit registers.
1767 : : This loop tries to find the smallest shift insn that will right
1768 : : justify the value we want to read but is available in one insn on
1769 : : the machine. */
1770 : :
1771 : 60056 : opt_scalar_int_mode new_mode_iter;
1772 : 268841 : FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)
1773 : : {
1774 : 268841 : rtx target, new_reg, new_lhs;
1775 : 268841 : rtx_insn *shift_seq, *insn;
1776 : 268841 : int cost;
1777 : :
1778 : 268841 : new_mode = new_mode_iter.require ();
1779 : 645848 : if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1780 : : break;
1781 : 632847 : if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (read_mode)))
1782 : 137588 : continue;
1783 : :
1784 : : /* Try a wider mode if truncating the store mode to NEW_MODE
1785 : : requires a real instruction. */
1786 : 146722 : if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1787 : 73361 : && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1788 : 0 : continue;
1789 : :
1790 : : /* Also try a wider mode if the necessary punning is either not
1791 : : desirable or not possible. */
1792 : 140817 : if (!CONSTANT_P (store_info->rhs)
1793 : 73361 : && !targetm.modes_tieable_p (new_mode, store_mode))
1794 : 67456 : continue;
1795 : :
1796 : 5905 : if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
1797 : 12821 : && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
1798 : : {
1799 : : /* Try to implement the shift using a subreg. */
1800 : 3458 : poly_int64 offset
1801 : 3458 : = subreg_offset_from_lsb (new_mode, store_mode, shift);
1802 : 3458 : rtx rhs_subreg = simplify_gen_subreg (new_mode, store_info->rhs,
1803 : : store_mode, offset);
1804 : 3458 : if (rhs_subreg)
1805 : : {
1806 : 0 : read_reg
1807 : 0 : = extract_low_bits (read_mode, new_mode, copy_rtx (rhs_subreg));
1808 : 0 : break;
1809 : : }
1810 : : }
1811 : :
1812 : 11810 : if (maybe_lt (GET_MODE_SIZE (new_mode), access_size))
1813 : 3601 : continue;
1814 : :
1815 : 2304 : new_reg = gen_reg_rtx (new_mode);
1816 : :
1817 : 2304 : start_sequence ();
1818 : :
1819 : : /* In theory we could also check for an ashr. Ian Taylor knows
1820 : : of one dsp where the cost of these two was not the same. But
1821 : : this really is a rare case anyway. */
1822 : 2304 : target = expand_binop (new_mode, lshr_optab, new_reg,
1823 : : gen_int_shift_amount (new_mode, shift),
1824 : : new_reg, 1, OPTAB_DIRECT);
1825 : :
1826 : 2304 : shift_seq = get_insns ();
1827 : 2304 : end_sequence ();
1828 : :
1829 : 2304 : if (target != new_reg || shift_seq == NULL)
1830 : 0 : continue;
1831 : :
1832 : : cost = 0;
1833 : 4608 : for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1834 : 2304 : if (INSN_P (insn))
1835 : 2304 : cost += insn_cost (insn, speed);
1836 : :
1837 : : /* The computation up to here is essentially independent
1838 : : of the arguments and could be precomputed. It may
1839 : : not be worth doing so. We could precompute if
1840 : : worthwhile or at least cache the results. The result
1841 : : technically depends on both SHIFT and ACCESS_SIZE,
1842 : : but in practice the answer will depend only on ACCESS_SIZE. */
1843 : :
1844 : 2304 : if (cost > COSTS_N_INSNS (1))
1845 : 140 : continue;
1846 : :
1847 : 2164 : new_lhs = extract_low_bits (new_mode, store_mode,
1848 : : copy_rtx (store_info->rhs));
1849 : 2164 : if (new_lhs == NULL_RTX)
1850 : 0 : continue;
1851 : :
1852 : : /* We found an acceptable shift. Generate a move to
1853 : : take the value from the store and put it into the
1854 : : shift pseudo, then shift it, then generate another
1855 : : move to put in into the target of the read. */
1856 : 2164 : emit_move_insn (new_reg, new_lhs);
1857 : 2164 : emit_insn (shift_seq);
1858 : 2164 : read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1859 : 2164 : break;
1860 : : }
1861 : :
1862 : : return read_reg;
1863 : : }
1864 : :
1865 : :
1866 : : /* Call back for note_stores to find the hard regs set or clobbered by
1867 : : insn. Data is a bitmap of the hardregs set so far. */
1868 : :
1869 : : static void
1870 : 147359 : look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1871 : : {
1872 : 147359 : bitmap regs_set = (bitmap) data;
1873 : :
1874 : 147359 : if (REG_P (x)
1875 : 147359 : && HARD_REGISTER_P (x))
1876 : 2139 : bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1877 : 147359 : }
1878 : :
1879 : : /* Helper function for replace_read and record_store.
1880 : : Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1881 : : consisting of READ_WIDTH bytes starting from READ_OFFSET. Return NULL
1882 : : if not successful. If REQUIRE_CST is true, return always constant. */
1883 : :
1884 : : static rtx
1885 : 206509 : get_stored_val (store_info *store_info, machine_mode read_mode,
1886 : : poly_int64 read_offset, poly_int64 read_width,
1887 : : basic_block bb, bool require_cst)
1888 : : {
1889 : 206509 : machine_mode store_mode = GET_MODE (store_info->mem);
1890 : 206509 : poly_int64 gap;
1891 : 206509 : rtx read_reg;
1892 : :
1893 : : /* To get here the read is within the boundaries of the write so
1894 : : shift will never be negative. Start out with the shift being in
1895 : : bytes. */
1896 : 206509 : if (store_mode == BLKmode)
1897 : 13880 : gap = 0;
1898 : 192629 : else if (BYTES_BIG_ENDIAN)
1899 : : gap = ((store_info->offset + store_info->width)
1900 : : - (read_offset + read_width));
1901 : : else
1902 : 192629 : gap = read_offset - store_info->offset;
1903 : :
1904 : 206509 : if (maybe_ne (gap, 0))
1905 : : {
1906 : 83998 : if (!gap.is_constant ())
1907 : 0 : return NULL_RTX;
1908 : :
1909 : 83998 : poly_int64 shift = gap * BITS_PER_UNIT;
1910 : 83998 : poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1911 : 83998 : read_reg = find_shift_sequence (access_size, store_info, read_mode,
1912 : 83998 : shift, optimize_bb_for_speed_p (bb),
1913 : : require_cst);
1914 : : }
1915 : 122511 : else if (store_mode == BLKmode)
1916 : : {
1917 : : /* The store is a memset (addr, const_val, const_size). */
1918 : 13880 : gcc_assert (CONST_INT_P (store_info->rhs));
1919 : 13880 : scalar_int_mode int_store_mode;
1920 : 13880 : if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1921 : : read_reg = NULL_RTX;
1922 : 13880 : else if (store_info->rhs == const0_rtx)
1923 : 13879 : read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1924 : 2 : else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1925 : : || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1926 : : read_reg = NULL_RTX;
1927 : : else
1928 : : {
1929 : 1 : unsigned HOST_WIDE_INT c
1930 : 1 : = INTVAL (store_info->rhs)
1931 : 1 : & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1932 : 1 : int shift = BITS_PER_UNIT;
1933 : 4 : while (shift < HOST_BITS_PER_WIDE_INT)
1934 : : {
1935 : 3 : c |= (c << shift);
1936 : 3 : shift <<= 1;
1937 : : }
1938 : 1 : read_reg = gen_int_mode (c, int_store_mode);
1939 : 1 : read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1940 : : }
1941 : : }
1942 : 108631 : else if (store_info->const_rhs
1943 : 9774 : && (require_cst
1944 : 6824 : || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1945 : 6106 : read_reg = extract_low_bits (read_mode, store_mode,
1946 : : copy_rtx (store_info->const_rhs));
1947 : 102525 : else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
1948 : 15422 : && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
1949 : 110236 : && targetm.modes_tieable_p (read_mode, store_mode))
1950 : 7311 : read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
1951 : : else
1952 : 95214 : read_reg = extract_low_bits (read_mode, store_mode,
1953 : : copy_rtx (store_info->rhs));
1954 : 206509 : if (require_cst && read_reg && !CONSTANT_P (read_reg))
1955 : 0 : read_reg = NULL_RTX;
1956 : : return read_reg;
1957 : : }
1958 : :
1959 : : /* Take a sequence of:
1960 : : A <- r1
1961 : : ...
1962 : : ... <- A
1963 : :
1964 : : and change it into
1965 : : r2 <- r1
1966 : : A <- r1
1967 : : ...
1968 : : ... <- r2
1969 : :
1970 : : or
1971 : :
1972 : : r3 <- extract (r1)
1973 : : r3 <- r3 >> shift
1974 : : r2 <- extract (r3)
1975 : : ... <- r2
1976 : :
1977 : : or
1978 : :
1979 : : r2 <- extract (r1)
1980 : : ... <- r2
1981 : :
1982 : : Depending on the alignment and the mode of the store and
1983 : : subsequent load.
1984 : :
1985 : :
1986 : : The STORE_INFO and STORE_INSN are for the store and READ_INFO
1987 : : and READ_INSN are for the read. Return true if the replacement
1988 : : went ok. */
1989 : :
1990 : : static bool
1991 : 167569 : replace_read (store_info *store_info, insn_info_t store_insn,
1992 : : read_info_t read_info, insn_info_t read_insn, rtx *loc)
1993 : : {
1994 : 167569 : machine_mode store_mode = GET_MODE (store_info->mem);
1995 : 167569 : machine_mode read_mode = GET_MODE (read_info->mem);
1996 : 167569 : rtx_insn *insns, *this_insn;
1997 : 167569 : rtx read_reg;
1998 : 167569 : basic_block bb;
1999 : :
2000 : 167569 : if (!dbg_cnt (dse))
2001 : : return false;
2002 : :
2003 : : /* Create a sequence of instructions to set up the read register.
2004 : : This sequence goes immediately before the store and its result
2005 : : is read by the load.
2006 : :
2007 : : We need to keep this in perspective. We are replacing a read
2008 : : with a sequence of insns, but the read will almost certainly be
2009 : : in cache, so it is not going to be an expensive one. Thus, we
2010 : : are not willing to do a multi insn shift or worse a subroutine
2011 : : call to get rid of the read. */
2012 : 167569 : if (dump_file && (dump_flags & TDF_DETAILS))
2013 : 0 : fprintf (dump_file, "trying to replace %smode load in insn %d"
2014 : : " from %smode store in insn %d\n",
2015 : 0 : GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
2016 : 0 : GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
2017 : 167569 : start_sequence ();
2018 : 167569 : bb = BLOCK_FOR_INSN (read_insn->insn);
2019 : 167569 : read_reg = get_stored_val (store_info,
2020 : : read_mode, read_info->offset, read_info->width,
2021 : : bb, false);
2022 : 167569 : if (read_reg == NULL_RTX)
2023 : : {
2024 : 61014 : end_sequence ();
2025 : 61014 : if (dump_file && (dump_flags & TDF_DETAILS))
2026 : 0 : fprintf (dump_file, " -- could not extract bits of stored value\n");
2027 : 61014 : return false;
2028 : : }
2029 : : /* Force the value into a new register so that it won't be clobbered
2030 : : between the store and the load. */
2031 : 106555 : if (WORD_REGISTER_OPERATIONS
2032 : : && GET_CODE (read_reg) == SUBREG
2033 : : && REG_P (SUBREG_REG (read_reg))
2034 : : && GET_MODE (SUBREG_REG (read_reg)) == word_mode)
2035 : : {
2036 : : /* For WORD_REGISTER_OPERATIONS with subreg of word_mode register
2037 : : force SUBREG_REG into a new register rather than the SUBREG. */
2038 : : rtx r = copy_to_mode_reg (word_mode, SUBREG_REG (read_reg));
2039 : : read_reg = shallow_copy_rtx (read_reg);
2040 : : SUBREG_REG (read_reg) = r;
2041 : : }
2042 : : else
2043 : 106555 : read_reg = copy_to_mode_reg (read_mode, read_reg);
2044 : 106555 : insns = get_insns ();
2045 : 106555 : end_sequence ();
2046 : :
2047 : 106555 : if (insns != NULL_RTX)
2048 : : {
2049 : : /* Now we have to scan the set of new instructions to see if the
2050 : : sequence contains and sets of hardregs that happened to be
2051 : : live at this point. For instance, this can happen if one of
2052 : : the insns sets the CC and the CC happened to be live at that
2053 : : point. This does occasionally happen, see PR 37922. */
2054 : 106555 : bitmap regs_set = BITMAP_ALLOC (®_obstack);
2055 : :
2056 : 251775 : for (this_insn = insns;
2057 : 251775 : this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
2058 : : {
2059 : 145223 : if (insn_invalid_p (this_insn, false))
2060 : : {
2061 : 3 : if (dump_file && (dump_flags & TDF_DETAILS))
2062 : : {
2063 : 0 : fprintf (dump_file, " -- replacing the loaded MEM with ");
2064 : 0 : print_simple_rtl (dump_file, read_reg);
2065 : 0 : fprintf (dump_file, " led to an invalid instruction\n");
2066 : : }
2067 : 3 : BITMAP_FREE (regs_set);
2068 : 3 : return false;
2069 : : }
2070 : 145220 : note_stores (this_insn, look_for_hardregs, regs_set);
2071 : : }
2072 : :
2073 : 106552 : if (store_insn->fixed_regs_live)
2074 : 106552 : bitmap_and_into (regs_set, store_insn->fixed_regs_live);
2075 : 106552 : if (!bitmap_empty_p (regs_set))
2076 : : {
2077 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
2078 : : {
2079 : 0 : fprintf (dump_file, "abandoning replacement because sequence "
2080 : : "clobbers live hardregs:");
2081 : 0 : df_print_regset (dump_file, regs_set);
2082 : : }
2083 : :
2084 : 0 : BITMAP_FREE (regs_set);
2085 : 0 : return false;
2086 : : }
2087 : 106552 : BITMAP_FREE (regs_set);
2088 : : }
2089 : :
2090 : 106552 : subrtx_iterator::array_type array;
2091 : 527199 : FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
2092 : : {
2093 : 420657 : const_rtx x = *iter;
2094 : 420657 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2095 : : {
2096 : 10 : if (dump_file && (dump_flags & TDF_DETAILS))
2097 : 0 : fprintf (dump_file, " -- replacing the MEM failed due to address "
2098 : : "side-effects\n");
2099 : 10 : return false;
2100 : : }
2101 : : }
2102 : :
2103 : 106542 : if (validate_change (read_insn->insn, loc, read_reg, 0))
2104 : : {
2105 : 85117 : deferred_change *change = deferred_change_pool.allocate ();
2106 : :
2107 : : /* Insert this right before the store insn where it will be safe
2108 : : from later insns that might change it before the read. */
2109 : 85117 : emit_insn_before (insns, store_insn->insn);
2110 : :
2111 : : /* And now for the kludge part: cselib croaks if you just
2112 : : return at this point. There are two reasons for this:
2113 : :
2114 : : 1) Cselib has an idea of how many pseudos there are and
2115 : : that does not include the new ones we just added.
2116 : :
2117 : : 2) Cselib does not know about the move insn we added
2118 : : above the store_info, and there is no way to tell it
2119 : : about it, because it has "moved on".
2120 : :
2121 : : Problem (1) is fixable with a certain amount of engineering.
2122 : : Problem (2) is requires starting the bb from scratch. This
2123 : : could be expensive.
2124 : :
2125 : : So we are just going to have to lie. The move/extraction
2126 : : insns are not really an issue, cselib did not see them. But
2127 : : the use of the new pseudo read_insn is a real problem because
2128 : : cselib has not scanned this insn. The way that we solve this
2129 : : problem is that we are just going to put the mem back for now
2130 : : and when we are finished with the block, we undo this. We
2131 : : keep a table of mems to get rid of. At the end of the basic
2132 : : block we can put them back. */
2133 : :
2134 : 85117 : *loc = read_info->mem;
2135 : 85117 : change->next = deferred_change_list;
2136 : 85117 : deferred_change_list = change;
2137 : 85117 : change->loc = loc;
2138 : 85117 : change->reg = read_reg;
2139 : :
2140 : : /* Get rid of the read_info, from the point of view of the
2141 : : rest of dse, play like this read never happened. */
2142 : 85117 : read_insn->read_rec = read_info->next;
2143 : 85117 : read_info_type_pool.remove (read_info);
2144 : 85117 : if (dump_file && (dump_flags & TDF_DETAILS))
2145 : : {
2146 : 0 : fprintf (dump_file, " -- replaced the loaded MEM with ");
2147 : 0 : print_simple_rtl (dump_file, read_reg);
2148 : 0 : fprintf (dump_file, "\n");
2149 : : }
2150 : 85117 : return true;
2151 : : }
2152 : : else
2153 : : {
2154 : 21425 : if (dump_file && (dump_flags & TDF_DETAILS))
2155 : : {
2156 : 0 : fprintf (dump_file, " -- replacing the loaded MEM with ");
2157 : 0 : print_simple_rtl (dump_file, read_reg);
2158 : 0 : fprintf (dump_file, " led to an invalid instruction\n");
2159 : : }
2160 : 21425 : return false;
2161 : : }
2162 : 106552 : }
2163 : :
2164 : : /* Check the address of MEM *LOC and kill any appropriate stores that may
2165 : : be active. */
2166 : :
2167 : : static void
2168 : 29039133 : check_mem_read_rtx (rtx *loc, bb_info_t bb_info, bool used_in_call = false)
2169 : : {
2170 : 29039133 : rtx mem = *loc, mem_addr;
2171 : 29039133 : insn_info_t insn_info;
2172 : 29039133 : poly_int64 offset = 0;
2173 : 29039133 : poly_int64 width = 0;
2174 : 29039133 : cselib_val *base = NULL;
2175 : 29039133 : int group_id;
2176 : 29039133 : read_info_t read_info;
2177 : :
2178 : 29039133 : insn_info = bb_info->last_insn;
2179 : :
2180 : 29039133 : if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2181 : 29039133 : || MEM_VOLATILE_P (mem))
2182 : : {
2183 : 1085942 : if (crtl->stack_protect_guard
2184 : 994 : && (MEM_EXPR (mem) == crtl->stack_protect_guard
2185 : 662 : || (crtl->stack_protect_guard_decl
2186 : 662 : && MEM_EXPR (mem) == crtl->stack_protect_guard_decl))
2187 : 1086936 : && MEM_VOLATILE_P (mem))
2188 : : {
2189 : : /* This is either the stack protector canary on the stack,
2190 : : which ought to be written by a MEM_VOLATILE_P store and
2191 : : thus shouldn't be deleted and is read at the very end of
2192 : : function, but shouldn't conflict with any other store.
2193 : : Or it is __stack_chk_guard variable or TLS or whatever else
2194 : : MEM holding the canary value, which really shouldn't be
2195 : : ever modified in -fstack-protector* protected functions,
2196 : : otherwise the prologue store wouldn't match the epilogue
2197 : : check. */
2198 : 994 : if (dump_file && (dump_flags & TDF_DETAILS))
2199 : 0 : fprintf (dump_file, " stack protector canary read ignored.\n");
2200 : 994 : insn_info->cannot_delete = true;
2201 : 2959503 : return;
2202 : : }
2203 : :
2204 : 1084948 : if (dump_file && (dump_flags & TDF_DETAILS))
2205 : 0 : fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2206 : 1084948 : add_wild_read (bb_info);
2207 : 1084948 : insn_info->cannot_delete = true;
2208 : 1084948 : return;
2209 : : }
2210 : :
2211 : : /* If it is reading readonly mem, then there can be no conflict with
2212 : : another write. */
2213 : 27953191 : if (MEM_READONLY_P (mem))
2214 : : return;
2215 : :
2216 : 26332109 : if (!canon_address (mem, &group_id, &offset, &base))
2217 : : {
2218 : 167289 : if (dump_file && (dump_flags & TDF_DETAILS))
2219 : 0 : fprintf (dump_file, " adding wild read, canon_address failure.\n");
2220 : 167289 : add_wild_read (bb_info);
2221 : 167289 : return;
2222 : : }
2223 : :
2224 : 26164820 : if (GET_MODE (mem) == BLKmode)
2225 : 62140 : width = -1;
2226 : : else
2227 : 52205360 : width = GET_MODE_SIZE (GET_MODE (mem));
2228 : :
2229 : 52329640 : if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2230 : : {
2231 : 73 : if (dump_file && (dump_flags & TDF_DETAILS))
2232 : 0 : fprintf (dump_file, " adding wild read, due to overflow.\n");
2233 : 73 : add_wild_read (bb_info);
2234 : 73 : return;
2235 : : }
2236 : :
2237 : 26164747 : read_info = read_info_type_pool.allocate ();
2238 : 26164747 : read_info->group_id = group_id;
2239 : 26164747 : read_info->mem = mem;
2240 : 26164747 : read_info->offset = offset;
2241 : 26164747 : read_info->width = width;
2242 : 26164747 : read_info->next = insn_info->read_rec;
2243 : 26164747 : insn_info->read_rec = read_info;
2244 : 26164747 : if (group_id < 0)
2245 : 12473262 : mem_addr = base->val_rtx;
2246 : : else
2247 : : {
2248 : 13691485 : group_info *group = rtx_group_vec[group_id];
2249 : 13691485 : mem_addr = group->canon_base_addr;
2250 : : }
2251 : 26164747 : if (maybe_ne (offset, 0))
2252 : 11171296 : mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2253 : : /* Avoid passing VALUE RTXen as mem_addr to canon_true_dependence
2254 : : which will over and over re-create proper RTL and re-apply the
2255 : : offset above. See PR80960 where we almost allocate 1.6GB of PLUS
2256 : : RTXen that way. */
2257 : 26164747 : mem_addr = get_addr (mem_addr);
2258 : :
2259 : 26164747 : if (group_id >= 0)
2260 : : {
2261 : : /* This is the restricted case where the base is a constant or
2262 : : the frame pointer and offset is a constant. */
2263 : 13691485 : insn_info_t i_ptr = active_local_stores;
2264 : 13691485 : insn_info_t last = NULL;
2265 : :
2266 : 13691485 : if (dump_file && (dump_flags & TDF_DETAILS))
2267 : : {
2268 : 0 : if (!known_size_p (width))
2269 : 0 : fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2270 : : group_id);
2271 : : else
2272 : : {
2273 : 0 : fprintf (dump_file, " processing const load gid=%d", group_id);
2274 : 0 : print_range (dump_file, offset, width);
2275 : 0 : fprintf (dump_file, "\n");
2276 : : }
2277 : : }
2278 : :
2279 : 31825831 : while (i_ptr)
2280 : : {
2281 : 18218552 : bool remove = false;
2282 : 18218552 : store_info *store_info = i_ptr->store_rec;
2283 : :
2284 : : /* Skip the clobbers. */
2285 : 18218552 : while (!store_info->is_set)
2286 : 0 : store_info = store_info->next;
2287 : :
2288 : : /* There are three cases here. */
2289 : 18218552 : if (store_info->group_id < 0)
2290 : : /* We have a cselib store followed by a read from a
2291 : : const base. */
2292 : 10122946 : remove
2293 : 10122946 : = canon_true_dependence (store_info->mem,
2294 : 10122946 : GET_MODE (store_info->mem),
2295 : : store_info->mem_addr,
2296 : : mem, mem_addr);
2297 : :
2298 : 8095606 : else if (group_id == store_info->group_id)
2299 : : {
2300 : : /* This is a block mode load. We may get lucky and
2301 : : canon_true_dependence may save the day. */
2302 : 4426505 : if (!known_size_p (width))
2303 : 19564 : remove
2304 : 19564 : = canon_true_dependence (store_info->mem,
2305 : 19564 : GET_MODE (store_info->mem),
2306 : : store_info->mem_addr,
2307 : : mem, mem_addr);
2308 : :
2309 : : /* If this read is just reading back something that we just
2310 : : stored, rewrite the read. */
2311 : : else
2312 : : {
2313 : 4406941 : if (!used_in_call
2314 : 4406728 : && store_info->rhs
2315 : 3295649 : && known_subrange_p (offset, width, store_info->offset,
2316 : 3295649 : store_info->width)
2317 : 168110 : && all_positions_needed_p (store_info,
2318 : 168110 : offset - store_info->offset,
2319 : : width)
2320 : 4572712 : && replace_read (store_info, i_ptr, read_info,
2321 : : insn_info, loc))
2322 : : return;
2323 : :
2324 : : /* The bases are the same, just see if the offsets
2325 : : could overlap. */
2326 : 4322735 : if (ranges_maybe_overlap_p (offset, width,
2327 : 4322735 : store_info->offset,
2328 : 4322735 : store_info->width))
2329 : : remove = true;
2330 : : }
2331 : : }
2332 : :
2333 : : /* else
2334 : : The else case that is missing here is that the
2335 : : bases are constant but different. There is nothing
2336 : : to do here because there is no overlap. */
2337 : :
2338 : 10142510 : if (remove)
2339 : : {
2340 : 5560777 : if (dump_file && (dump_flags & TDF_DETAILS))
2341 : 0 : dump_insn_info ("removing from active", i_ptr);
2342 : :
2343 : 5560777 : active_local_stores_len--;
2344 : 5560777 : if (last)
2345 : 442059 : last->next_local_store = i_ptr->next_local_store;
2346 : : else
2347 : 5118718 : active_local_stores = i_ptr->next_local_store;
2348 : : }
2349 : : else
2350 : : last = i_ptr;
2351 : 18134346 : i_ptr = i_ptr->next_local_store;
2352 : : }
2353 : : }
2354 : : else
2355 : : {
2356 : 12473262 : insn_info_t i_ptr = active_local_stores;
2357 : 12473262 : insn_info_t last = NULL;
2358 : 12473262 : if (dump_file && (dump_flags & TDF_DETAILS))
2359 : : {
2360 : 0 : fprintf (dump_file, " processing cselib load mem:");
2361 : 0 : print_inline_rtx (dump_file, mem, 0);
2362 : 0 : fprintf (dump_file, "\n");
2363 : : }
2364 : :
2365 : 31274991 : while (i_ptr)
2366 : : {
2367 : 18802640 : bool remove = false;
2368 : 18802640 : store_info *store_info = i_ptr->store_rec;
2369 : :
2370 : 18802640 : if (dump_file && (dump_flags & TDF_DETAILS))
2371 : 0 : fprintf (dump_file, " processing cselib load against insn %d\n",
2372 : 0 : INSN_UID (i_ptr->insn));
2373 : :
2374 : : /* Skip the clobbers. */
2375 : 18802640 : while (!store_info->is_set)
2376 : 0 : store_info = store_info->next;
2377 : :
2378 : : /* If this read is just reading back something that we just
2379 : : stored, rewrite the read. */
2380 : 18802640 : if (!used_in_call
2381 : 18802578 : && store_info->rhs
2382 : 1289510 : && store_info->group_id == -1
2383 : 541260 : && store_info->cse_base == base
2384 : 81239 : && known_subrange_p (offset, width, store_info->offset,
2385 : 81239 : store_info->width)
2386 : 1832 : && all_positions_needed_p (store_info,
2387 : 1832 : offset - store_info->offset, width)
2388 : 18804438 : && replace_read (store_info, i_ptr, read_info, insn_info, loc))
2389 : : return;
2390 : :
2391 : 37603458 : remove = canon_true_dependence (store_info->mem,
2392 : 18801729 : GET_MODE (store_info->mem),
2393 : : store_info->mem_addr,
2394 : : mem, mem_addr);
2395 : :
2396 : 18801729 : if (remove)
2397 : : {
2398 : 1405835 : if (dump_file && (dump_flags & TDF_DETAILS))
2399 : 0 : dump_insn_info ("removing from active", i_ptr);
2400 : :
2401 : 1405835 : active_local_stores_len--;
2402 : 1405835 : if (last)
2403 : 281485 : last->next_local_store = i_ptr->next_local_store;
2404 : : else
2405 : 1124350 : active_local_stores = i_ptr->next_local_store;
2406 : : }
2407 : : else
2408 : : last = i_ptr;
2409 : 18801729 : i_ptr = i_ptr->next_local_store;
2410 : : }
2411 : : }
2412 : : }
2413 : :
2414 : : /* A note_uses callback in which DATA points the INSN_INFO for
2415 : : as check_mem_read_rtx. Nullify the pointer if i_m_r_m_r returns
2416 : : true for any part of *LOC. */
2417 : :
2418 : : static void
2419 : 135194549 : check_mem_read_use (rtx *loc, void *data)
2420 : : {
2421 : 135194549 : subrtx_ptr_iterator::array_type array;
2422 : 487211865 : FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2423 : : {
2424 : 352017316 : rtx *loc = *iter;
2425 : 352017316 : if (MEM_P (*loc))
2426 : 28936190 : check_mem_read_rtx (loc, (bb_info_t) data);
2427 : : }
2428 : 135194549 : }
2429 : :
2430 : :
2431 : : /* Get arguments passed to CALL_INSN. Return TRUE if successful.
2432 : : So far it only handles arguments passed in registers. */
2433 : :
2434 : : static bool
2435 : 22776 : get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2436 : : {
2437 : 22776 : CUMULATIVE_ARGS args_so_far_v;
2438 : 22776 : cumulative_args_t args_so_far;
2439 : 22776 : tree arg;
2440 : 22776 : int idx;
2441 : :
2442 : 22776 : INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2443 : 22776 : args_so_far = pack_cumulative_args (&args_so_far_v);
2444 : :
2445 : 22776 : arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2446 : 22776 : for (idx = 0;
2447 : 83136 : arg != void_list_node && idx < nargs;
2448 : 60360 : arg = TREE_CHAIN (arg), idx++)
2449 : : {
2450 : 63016 : scalar_int_mode mode;
2451 : 63016 : rtx reg, link, tmp;
2452 : :
2453 : 63016 : if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2454 : 2656 : return false;
2455 : :
2456 : 63016 : function_arg_info arg (mode, /*named=*/true);
2457 : 63016 : reg = targetm.calls.function_arg (args_so_far, arg);
2458 : 63016 : if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2459 : : return false;
2460 : :
2461 : 60370 : for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2462 : 176575 : link;
2463 : 116205 : link = XEXP (link, 1))
2464 : 176565 : if (GET_CODE (XEXP (link, 0)) == USE)
2465 : : {
2466 : 120750 : scalar_int_mode arg_mode;
2467 : 120750 : args[idx] = XEXP (XEXP (link, 0), 0);
2468 : 120750 : if (REG_P (args[idx])
2469 : 120750 : && REGNO (args[idx]) == REGNO (reg)
2470 : 181120 : && (GET_MODE (args[idx]) == mode
2471 : 10 : || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2472 : 10 : && (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2473 : 20 : && (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2474 : : break;
2475 : : }
2476 : 60360 : if (!link)
2477 : : return false;
2478 : :
2479 : 60360 : tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2480 : 60360 : if (GET_MODE (args[idx]) != mode)
2481 : : {
2482 : 0 : if (!tmp || !CONST_INT_P (tmp))
2483 : : return false;
2484 : 0 : tmp = gen_int_mode (INTVAL (tmp), mode);
2485 : : }
2486 : 60360 : if (tmp)
2487 : 60360 : args[idx] = tmp;
2488 : :
2489 : 60360 : targetm.calls.function_arg_advance (args_so_far, arg);
2490 : : }
2491 : 20120 : if (arg != void_list_node || idx != nargs)
2492 : : return false;
2493 : : return true;
2494 : : }
2495 : :
2496 : : /* Return a bitmap of the fixed registers contained in IN. */
2497 : :
2498 : : static bitmap
2499 : 18653097 : copy_fixed_regs (const_bitmap in)
2500 : : {
2501 : 18653097 : bitmap ret;
2502 : :
2503 : 18653097 : ret = ALLOC_REG_SET (NULL);
2504 : 18653097 : bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set));
2505 : 18653097 : return ret;
2506 : : }
2507 : :
2508 : : /* Apply record_store to all candidate stores in INSN. Mark INSN
2509 : : if some part of it is not a candidate store and assigns to a
2510 : : non-register target. */
2511 : :
2512 : : static void
2513 : 190446988 : scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores)
2514 : : {
2515 : 190446988 : rtx body;
2516 : 190446988 : insn_info_type *insn_info = insn_info_type_pool.allocate ();
2517 : 190446988 : int mems_found = 0;
2518 : 190446988 : memset (insn_info, 0, sizeof (struct insn_info_type));
2519 : :
2520 : 190446988 : if (dump_file && (dump_flags & TDF_DETAILS))
2521 : 0 : fprintf (dump_file, "\n**scanning insn=%d\n",
2522 : 0 : INSN_UID (insn));
2523 : :
2524 : 190446988 : insn_info->prev_insn = bb_info->last_insn;
2525 : 190446988 : insn_info->insn = insn;
2526 : 190446988 : bb_info->last_insn = insn_info;
2527 : :
2528 : 190446988 : if (DEBUG_INSN_P (insn))
2529 : : {
2530 : 76758564 : insn_info->cannot_delete = true;
2531 : 76758564 : return;
2532 : : }
2533 : :
2534 : : /* Look at all of the uses in the insn. */
2535 : 113688424 : note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2536 : :
2537 : 113688424 : if (CALL_P (insn))
2538 : : {
2539 : 9072657 : bool const_call;
2540 : 9072657 : rtx call, sym;
2541 : 9072657 : tree memset_call = NULL_TREE;
2542 : :
2543 : 9072657 : insn_info->cannot_delete = true;
2544 : :
2545 : : /* Const functions cannot do anything bad i.e. read memory,
2546 : : however, they can read their parameters which may have
2547 : : been pushed onto the stack.
2548 : : memset and bzero don't read memory either. */
2549 : 9072657 : const_call = RTL_CONST_CALL_P (insn);
2550 : 9072657 : if (!const_call
2551 : 8774762 : && (call = get_call_rtx_from (insn))
2552 : 8774762 : && (sym = XEXP (XEXP (call, 0), 0))
2553 : 8774762 : && GET_CODE (sym) == SYMBOL_REF
2554 : 8446411 : && SYMBOL_REF_DECL (sym)
2555 : 8201971 : && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2556 : 17274593 : && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET))
2557 : : memset_call = SYMBOL_REF_DECL (sym);
2558 : :
2559 : 9072657 : if (const_call || memset_call)
2560 : : {
2561 : 320671 : insn_info_t i_ptr = active_local_stores;
2562 : 320671 : insn_info_t last = NULL;
2563 : :
2564 : 320671 : if (dump_file && (dump_flags & TDF_DETAILS))
2565 : 0 : fprintf (dump_file, "%s call %d\n",
2566 : 0 : const_call ? "const" : "memset", INSN_UID (insn));
2567 : :
2568 : : /* See the head comment of the frame_read field. */
2569 : 320671 : if (reload_completed
2570 : : /* Tail calls are storing their arguments using
2571 : : arg pointer. If it is a frame pointer on the target,
2572 : : even before reload we need to kill frame pointer based
2573 : : stores. */
2574 : 320671 : || (SIBLING_CALL_P (insn)
2575 : : && HARD_FRAME_POINTER_IS_ARG_POINTER))
2576 : 159442 : insn_info->frame_read = true;
2577 : :
2578 : : /* Loop over the active stores and remove those which are
2579 : : killed by the const function call. */
2580 : 359783 : while (i_ptr)
2581 : : {
2582 : 39112 : bool remove_store = false;
2583 : :
2584 : : /* The stack pointer based stores are always killed. */
2585 : 39112 : if (i_ptr->stack_pointer_based)
2586 : : remove_store = true;
2587 : :
2588 : : /* If the frame is read, the frame related stores are killed. */
2589 : 9368 : else if (insn_info->frame_read)
2590 : : {
2591 : 3486 : store_info *store_info = i_ptr->store_rec;
2592 : :
2593 : : /* Skip the clobbers. */
2594 : 3486 : while (!store_info->is_set)
2595 : 0 : store_info = store_info->next;
2596 : :
2597 : 3486 : if (store_info->group_id >= 0
2598 : 3486 : && rtx_group_vec[store_info->group_id]->frame_related)
2599 : : remove_store = true;
2600 : : }
2601 : :
2602 : : if (remove_store)
2603 : : {
2604 : 32108 : if (dump_file && (dump_flags & TDF_DETAILS))
2605 : 0 : dump_insn_info ("removing from active", i_ptr);
2606 : :
2607 : 32108 : active_local_stores_len--;
2608 : 32108 : if (last)
2609 : 40 : last->next_local_store = i_ptr->next_local_store;
2610 : : else
2611 : 32068 : active_local_stores = i_ptr->next_local_store;
2612 : : }
2613 : : else
2614 : : last = i_ptr;
2615 : :
2616 : 39112 : i_ptr = i_ptr->next_local_store;
2617 : : }
2618 : :
2619 : 320671 : if (memset_call)
2620 : : {
2621 : 22776 : rtx args[3];
2622 : 22776 : if (get_call_args (insn, memset_call, args, 3)
2623 : 20120 : && CONST_INT_P (args[1])
2624 : 19050 : && CONST_INT_P (args[2])
2625 : 23685 : && INTVAL (args[2]) > 0)
2626 : : {
2627 : 857 : rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2628 : 857 : set_mem_size (mem, INTVAL (args[2]));
2629 : 857 : body = gen_rtx_SET (mem, args[1]);
2630 : 857 : mems_found += record_store (body, bb_info);
2631 : 857 : if (dump_file && (dump_flags & TDF_DETAILS))
2632 : 0 : fprintf (dump_file, "handling memset as BLKmode store\n");
2633 : 857 : if (mems_found == 1)
2634 : : {
2635 : 528 : if (active_local_stores_len++ >= max_active_local_stores)
2636 : : {
2637 : 0 : active_local_stores_len = 1;
2638 : 0 : active_local_stores = NULL;
2639 : : }
2640 : 528 : insn_info->fixed_regs_live
2641 : 528 : = copy_fixed_regs (bb_info->regs_live);
2642 : 528 : insn_info->next_local_store = active_local_stores;
2643 : 528 : active_local_stores = insn_info;
2644 : : }
2645 : : }
2646 : : else
2647 : 21919 : clear_rhs_from_active_local_stores ();
2648 : : }
2649 : : }
2650 : 8751986 : else if (SIBLING_CALL_P (insn)
2651 : 8751986 : && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER))
2652 : : /* Arguments for a sibling call that are pushed to memory are passed
2653 : : using the incoming argument pointer of the current function. After
2654 : : reload that might be (and likely is) frame pointer based. And, if
2655 : : it is a frame pointer on the target, even before reload we need to
2656 : : kill frame pointer based stores. */
2657 : 116499 : add_wild_read (bb_info);
2658 : : else
2659 : : /* Every other call, including pure functions, may read any memory
2660 : : that is not relative to the frame. */
2661 : 8635487 : add_non_frame_wild_read (bb_info);
2662 : :
2663 : 9072657 : for (rtx link = CALL_INSN_FUNCTION_USAGE (insn);
2664 : 26073333 : link != NULL_RTX;
2665 : 17000676 : link = XEXP (link, 1))
2666 : 17000676 : if (GET_CODE (XEXP (link, 0)) == USE && MEM_P (XEXP (XEXP (link, 0),0)))
2667 : 102943 : check_mem_read_rtx (&XEXP (XEXP (link, 0),0), bb_info, true);
2668 : :
2669 : : return;
2670 : : }
2671 : :
2672 : : /* Assuming that there are sets in these insns, we cannot delete
2673 : : them. */
2674 : 104615767 : if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2675 : 104566107 : || volatile_refs_p (PATTERN (insn))
2676 : 101976223 : || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2677 : 99097184 : || (RTX_FRAME_RELATED_P (insn))
2678 : 200468440 : || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2679 : 8763094 : insn_info->cannot_delete = true;
2680 : :
2681 : 104615767 : body = PATTERN (insn);
2682 : 104615767 : if (GET_CODE (body) == PARALLEL)
2683 : : {
2684 : : int i;
2685 : 44703806 : for (i = 0; i < XVECLEN (body, 0); i++)
2686 : 30500788 : mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2687 : : }
2688 : : else
2689 : 90412749 : mems_found += record_store (body, bb_info);
2690 : :
2691 : 104615767 : if (dump_file && (dump_flags & TDF_DETAILS))
2692 : 0 : fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2693 : 0 : mems_found, insn_info->cannot_delete ? "true" : "false");
2694 : :
2695 : : /* If we found some sets of mems, add it into the active_local_stores so
2696 : : that it can be locally deleted if found dead or used for
2697 : : replace_read and redundant constant store elimination. Otherwise mark
2698 : : it as cannot delete. This simplifies the processing later. */
2699 : 104615767 : if (mems_found == 1)
2700 : : {
2701 : 18652569 : if (active_local_stores_len++ >= max_active_local_stores)
2702 : : {
2703 : 16 : active_local_stores_len = 1;
2704 : 16 : active_local_stores = NULL;
2705 : : }
2706 : 18652569 : insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2707 : 18652569 : insn_info->next_local_store = active_local_stores;
2708 : 18652569 : active_local_stores = insn_info;
2709 : : }
2710 : : else
2711 : 85963198 : insn_info->cannot_delete = true;
2712 : : }
2713 : :
2714 : :
2715 : : /* Remove BASE from the set of active_local_stores. This is a
2716 : : callback from cselib that is used to get rid of the stores in
2717 : : active_local_stores. */
2718 : :
2719 : : static void
2720 : 551874 : remove_useless_values (cselib_val *base)
2721 : : {
2722 : 551874 : insn_info_t insn_info = active_local_stores;
2723 : 551874 : insn_info_t last = NULL;
2724 : :
2725 : 5689240 : while (insn_info)
2726 : : {
2727 : 5137366 : store_info *store_info = insn_info->store_rec;
2728 : 5137366 : bool del = false;
2729 : :
2730 : : /* If ANY of the store_infos match the cselib group that is
2731 : : being deleted, then the insn cannot be deleted. */
2732 : 10267523 : while (store_info)
2733 : : {
2734 : 5137366 : if ((store_info->group_id == -1)
2735 : 4735320 : && (store_info->cse_base == base))
2736 : : {
2737 : : del = true;
2738 : : break;
2739 : : }
2740 : 5130157 : store_info = store_info->next;
2741 : : }
2742 : :
2743 : 5137366 : if (del)
2744 : : {
2745 : 7209 : active_local_stores_len--;
2746 : 7209 : if (last)
2747 : 6819 : last->next_local_store = insn_info->next_local_store;
2748 : : else
2749 : 390 : active_local_stores = insn_info->next_local_store;
2750 : 7209 : free_store_info (insn_info);
2751 : : }
2752 : : else
2753 : : last = insn_info;
2754 : :
2755 : 5137366 : insn_info = insn_info->next_local_store;
2756 : : }
2757 : 551874 : }
2758 : :
2759 : :
2760 : : /* Do all of step 1. */
2761 : :
2762 : : static void
2763 : 1969986 : dse_step1 (void)
2764 : : {
2765 : 1969986 : basic_block bb;
2766 : 1969986 : bitmap regs_live = BITMAP_ALLOC (®_obstack);
2767 : :
2768 : 1969986 : cselib_init (0);
2769 : 1969986 : all_blocks = BITMAP_ALLOC (NULL);
2770 : 1969986 : bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2771 : 1969986 : bitmap_set_bit (all_blocks, EXIT_BLOCK);
2772 : :
2773 : : /* For -O1 reduce the maximum number of active local stores for RTL DSE
2774 : : since this can consume huge amounts of memory (PR89115). */
2775 : 1969986 : int max_active_local_stores = param_max_dse_active_local_stores;
2776 : 1969986 : if (optimize < 2)
2777 : 141264 : max_active_local_stores /= 10;
2778 : :
2779 : 25698563 : FOR_ALL_BB_FN (bb, cfun)
2780 : : {
2781 : 23728577 : insn_info_t ptr;
2782 : 23728577 : bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2783 : :
2784 : 23728577 : memset (bb_info, 0, sizeof (dse_bb_info_type));
2785 : 23728577 : bitmap_set_bit (all_blocks, bb->index);
2786 : 23728577 : bb_info->regs_live = regs_live;
2787 : :
2788 : 47457154 : bitmap_copy (regs_live, DF_LR_IN (bb));
2789 : 23728577 : df_simulate_initialize_forwards (bb, regs_live);
2790 : :
2791 : 23728577 : bb_table[bb->index] = bb_info;
2792 : 23728577 : cselib_discard_hook = remove_useless_values;
2793 : :
2794 : 23728577 : if (bb->index >= NUM_FIXED_BLOCKS)
2795 : : {
2796 : 19788605 : rtx_insn *insn;
2797 : :
2798 : 19788605 : active_local_stores = NULL;
2799 : 19788605 : active_local_stores_len = 0;
2800 : 19788605 : cselib_clear_table ();
2801 : :
2802 : : /* Scan the insns. */
2803 : 248321633 : FOR_BB_INSNS (bb, insn)
2804 : : {
2805 : 228533028 : if (INSN_P (insn))
2806 : 190446988 : scan_insn (bb_info, insn, max_active_local_stores);
2807 : 228533028 : cselib_process_insn (insn);
2808 : 228533028 : if (INSN_P (insn))
2809 : 190446988 : df_simulate_one_insn_forwards (bb, insn, regs_live);
2810 : : }
2811 : :
2812 : : /* This is something of a hack, because the global algorithm
2813 : : is supposed to take care of the case where stores go dead
2814 : : at the end of the function. However, the global
2815 : : algorithm must take a more conservative view of block
2816 : : mode reads than the local alg does. So to get the case
2817 : : where you have a store to the frame followed by a non
2818 : : overlapping block more read, we look at the active local
2819 : : stores at the end of the function and delete all of the
2820 : : frame and spill based ones. */
2821 : 19788605 : if (stores_off_frame_dead_at_return
2822 : 19788605 : && (EDGE_COUNT (bb->succs) == 0
2823 : 18639757 : || (single_succ_p (bb)
2824 : 8755527 : && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2825 : 2056961 : && ! crtl->calls_eh_return)))
2826 : : {
2827 : 3112006 : insn_info_t i_ptr = active_local_stores;
2828 : 3480217 : while (i_ptr)
2829 : : {
2830 : 368211 : store_info *store_info = i_ptr->store_rec;
2831 : :
2832 : : /* Skip the clobbers. */
2833 : 368214 : while (!store_info->is_set)
2834 : 3 : store_info = store_info->next;
2835 : 368211 : if (store_info->group_id >= 0)
2836 : : {
2837 : 78000 : group_info *group = rtx_group_vec[store_info->group_id];
2838 : 78000 : if (group->frame_related && !i_ptr->cannot_delete)
2839 : 11116 : delete_dead_store_insn (i_ptr);
2840 : : }
2841 : :
2842 : 368211 : i_ptr = i_ptr->next_local_store;
2843 : : }
2844 : : }
2845 : :
2846 : : /* Get rid of the loads that were discovered in
2847 : : replace_read. Cselib is finished with this block. */
2848 : 19873722 : while (deferred_change_list)
2849 : : {
2850 : 85117 : deferred_change *next = deferred_change_list->next;
2851 : :
2852 : : /* There is no reason to validate this change. That was
2853 : : done earlier. */
2854 : 85117 : *deferred_change_list->loc = deferred_change_list->reg;
2855 : 85117 : deferred_change_pool.remove (deferred_change_list);
2856 : 85117 : deferred_change_list = next;
2857 : : }
2858 : :
2859 : : /* Get rid of all of the cselib based store_infos in this
2860 : : block and mark the containing insns as not being
2861 : : deletable. */
2862 : 19788605 : ptr = bb_info->last_insn;
2863 : 210235593 : while (ptr)
2864 : : {
2865 : 190446988 : if (ptr->contains_cselib_groups)
2866 : : {
2867 : 13602840 : store_info *s_info = ptr->store_rec;
2868 : 13602855 : while (s_info && !s_info->is_set)
2869 : 15 : s_info = s_info->next;
2870 : 13602840 : if (s_info
2871 : 13602840 : && s_info->redundant_reason
2872 : 54 : && s_info->redundant_reason->insn
2873 : 47 : && !ptr->cannot_delete)
2874 : : {
2875 : 41 : if (dump_file && (dump_flags & TDF_DETAILS))
2876 : 0 : fprintf (dump_file, "Locally deleting insn %d "
2877 : : "because insn %d stores the "
2878 : : "same value and couldn't be "
2879 : : "eliminated\n",
2880 : 0 : INSN_UID (ptr->insn),
2881 : 0 : INSN_UID (s_info->redundant_reason->insn));
2882 : 41 : delete_dead_store_insn (ptr);
2883 : : }
2884 : 13602840 : free_store_info (ptr);
2885 : : }
2886 : : else
2887 : : {
2888 : 176844148 : store_info *s_info;
2889 : :
2890 : : /* Free at least positions_needed bitmaps. */
2891 : 181870977 : for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2892 : 5026829 : if (s_info->is_large)
2893 : : {
2894 : 14427 : BITMAP_FREE (s_info->positions_needed.large.bmap);
2895 : 14427 : s_info->is_large = false;
2896 : : }
2897 : : }
2898 : 190446988 : ptr = ptr->prev_insn;
2899 : : }
2900 : :
2901 : 19788605 : cse_store_info_pool.release ();
2902 : : }
2903 : 23728577 : bb_info->regs_live = NULL;
2904 : : }
2905 : :
2906 : 1969986 : BITMAP_FREE (regs_live);
2907 : 1969986 : cselib_finish ();
2908 : 1969986 : rtx_group_table->empty ();
2909 : 1969986 : }
2910 : :
2911 : :
2912 : : /*----------------------------------------------------------------------------
2913 : : Second step.
2914 : :
2915 : : Assign each byte position in the stores that we are going to
2916 : : analyze globally to a position in the bitmaps. Returns true if
2917 : : there are any bit positions assigned.
2918 : : ----------------------------------------------------------------------------*/
2919 : :
2920 : : static void
2921 : 1969986 : dse_step2_init (void)
2922 : : {
2923 : 1969986 : unsigned int i;
2924 : 1969986 : group_info *group;
2925 : :
2926 : 7713996 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2927 : : {
2928 : : /* For all non stack related bases, we only consider a store to
2929 : : be deletable if there are two or more stores for that
2930 : : position. This is because it takes one store to make the
2931 : : other store redundant. However, for the stores that are
2932 : : stack related, we consider them if there is only one store
2933 : : for the position. We do this because the stack related
2934 : : stores can be deleted if their is no read between them and
2935 : : the end of the function.
2936 : :
2937 : : To make this work in the current framework, we take the stack
2938 : : related bases add all of the bits from store1 into store2.
2939 : : This has the effect of making the eligible even if there is
2940 : : only one store. */
2941 : :
2942 : 5744010 : if (stores_off_frame_dead_at_return && group->frame_related)
2943 : : {
2944 : 394454 : bitmap_ior_into (group->store2_n, group->store1_n);
2945 : 394454 : bitmap_ior_into (group->store2_p, group->store1_p);
2946 : 394454 : if (dump_file && (dump_flags & TDF_DETAILS))
2947 : 0 : fprintf (dump_file, "group %d is frame related ", i);
2948 : : }
2949 : :
2950 : 5744010 : group->offset_map_size_n++;
2951 : 5744010 : group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2952 : : group->offset_map_size_n);
2953 : 5744010 : group->offset_map_size_p++;
2954 : 5744010 : group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2955 : : group->offset_map_size_p);
2956 : 5744010 : group->process_globally = false;
2957 : 5744010 : if (dump_file && (dump_flags & TDF_DETAILS))
2958 : : {
2959 : 0 : fprintf (dump_file, "group %d(%d+%d): ", i,
2960 : 0 : (int)bitmap_count_bits (group->store2_n),
2961 : 0 : (int)bitmap_count_bits (group->store2_p));
2962 : 0 : bitmap_print (dump_file, group->store2_n, "n ", " ");
2963 : 0 : bitmap_print (dump_file, group->store2_p, "p ", "\n");
2964 : : }
2965 : : }
2966 : 1969986 : }
2967 : :
2968 : :
2969 : : /* Init the offset tables. */
2970 : :
2971 : : static bool
2972 : 1969986 : dse_step2 (void)
2973 : : {
2974 : 1969986 : unsigned int i;
2975 : 1969986 : group_info *group;
2976 : : /* Position 0 is unused because 0 is used in the maps to mean
2977 : : unused. */
2978 : 1969986 : current_position = 1;
2979 : 7713996 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2980 : : {
2981 : 5744010 : bitmap_iterator bi;
2982 : 5744010 : unsigned int j;
2983 : :
2984 : 5744010 : memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2985 : 5744010 : memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2986 : 5744010 : bitmap_clear (group->group_kill);
2987 : :
2988 : 34084439 : EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2989 : : {
2990 : 28340429 : bitmap_set_bit (group->group_kill, current_position);
2991 : 28340429 : if (bitmap_bit_p (group->escaped_n, j))
2992 : 20427146 : bitmap_set_bit (kill_on_calls, current_position);
2993 : 28340429 : group->offset_map_n[j] = current_position++;
2994 : 28340429 : group->process_globally = true;
2995 : : }
2996 : 7115319 : EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
2997 : : {
2998 : 1371309 : bitmap_set_bit (group->group_kill, current_position);
2999 : 1371309 : if (bitmap_bit_p (group->escaped_p, j))
3000 : 1282141 : bitmap_set_bit (kill_on_calls, current_position);
3001 : 1371309 : group->offset_map_p[j] = current_position++;
3002 : 1371309 : group->process_globally = true;
3003 : : }
3004 : : }
3005 : 1969986 : return current_position != 1;
3006 : : }
3007 : :
3008 : :
3009 : :
3010 : : /*----------------------------------------------------------------------------
3011 : : Third step.
3012 : :
3013 : : Build the bit vectors for the transfer functions.
3014 : : ----------------------------------------------------------------------------*/
3015 : :
3016 : :
3017 : : /* Look up the bitmap index for OFFSET in GROUP_INFO. If it is not
3018 : : there, return 0. */
3019 : :
3020 : : static int
3021 : 138464496 : get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
3022 : : {
3023 : 138464496 : if (offset < 0)
3024 : : {
3025 : 121519484 : HOST_WIDE_INT offset_p = -offset;
3026 : 121519484 : if (offset_p >= group_info->offset_map_size_n)
3027 : : return 0;
3028 : 118953997 : return group_info->offset_map_n[offset_p];
3029 : : }
3030 : : else
3031 : : {
3032 : 16945012 : if (offset >= group_info->offset_map_size_p)
3033 : : return 0;
3034 : 16516468 : return group_info->offset_map_p[offset];
3035 : : }
3036 : : }
3037 : :
3038 : :
3039 : : /* Process the STORE_INFOs into the bitmaps into GEN and KILL. KILL
3040 : : may be NULL. */
3041 : :
3042 : : static void
3043 : 152606714 : scan_stores (store_info *store_info, bitmap gen, bitmap kill)
3044 : : {
3045 : 161529865 : while (store_info)
3046 : : {
3047 : 8923151 : HOST_WIDE_INT i, offset, width;
3048 : 8923151 : group_info *group_info
3049 : 8923151 : = rtx_group_vec[store_info->group_id];
3050 : : /* We can (conservatively) ignore stores whose bounds aren't known;
3051 : : they simply don't generate new global dse opportunities. */
3052 : 8923151 : if (group_info->process_globally
3053 : 8727393 : && store_info->offset.is_constant (&offset)
3054 : 8923151 : && store_info->width.is_constant (&width))
3055 : : {
3056 : 8727393 : HOST_WIDE_INT end = offset + width;
3057 : 100703856 : for (i = offset; i < end; i++)
3058 : : {
3059 : 91976463 : int index = get_bitmap_index (group_info, i);
3060 : 91976463 : if (index != 0)
3061 : : {
3062 : 89036599 : bitmap_set_bit (gen, index);
3063 : 89036599 : if (kill)
3064 : 39888395 : bitmap_clear_bit (kill, index);
3065 : : }
3066 : : }
3067 : : }
3068 : 8923151 : store_info = store_info->next;
3069 : : }
3070 : 152606714 : }
3071 : :
3072 : :
3073 : : /* Process the READ_INFOs into the bitmaps into GEN and KILL. KILL
3074 : : may be NULL. */
3075 : :
3076 : : static void
3077 : 84490615 : scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
3078 : : {
3079 : 84490615 : read_info_t read_info = insn_info->read_rec;
3080 : 84490615 : int i;
3081 : 84490615 : group_info *group;
3082 : :
3083 : : /* If this insn reads the frame, kill all the frame related stores. */
3084 : 84490615 : if (insn_info->frame_read)
3085 : : {
3086 : 231748 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3087 : 220659 : if (group->process_globally && group->frame_related)
3088 : : {
3089 : 7564 : if (kill)
3090 : 3368 : bitmap_ior_into (kill, group->group_kill);
3091 : 7564 : bitmap_and_compl_into (gen, group->group_kill);
3092 : : }
3093 : : }
3094 : 84490615 : if (insn_info->non_frame_wild_read)
3095 : : {
3096 : : /* Kill all non-frame related stores. Kill all stores of variables that
3097 : : escape. */
3098 : 6616378 : if (kill)
3099 : 3153111 : bitmap_ior_into (kill, kill_on_calls);
3100 : 6616378 : bitmap_and_compl_into (gen, kill_on_calls);
3101 : 151900834 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3102 : 138668078 : if (group->process_globally && !group->frame_related)
3103 : : {
3104 : 4252863 : if (kill)
3105 : 1925220 : bitmap_ior_into (kill, group->group_kill);
3106 : 4252863 : bitmap_and_compl_into (gen, group->group_kill);
3107 : : }
3108 : : }
3109 : 97514977 : while (read_info)
3110 : : {
3111 : 354960342 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3112 : : {
3113 : 341935980 : if (group->process_globally)
3114 : : {
3115 : 21514247 : if (i == read_info->group_id)
3116 : : {
3117 : 5127009 : HOST_WIDE_INT offset, width;
3118 : : /* Reads with non-constant size kill all DSE opportunities
3119 : : in the group. */
3120 : 5127009 : if (!read_info->offset.is_constant (&offset)
3121 : 5127009 : || !read_info->width.is_constant (&width)
3122 : 5127009 : || !known_size_p (width))
3123 : : {
3124 : : /* Handle block mode reads. */
3125 : 26584 : if (kill)
3126 : 13048 : bitmap_ior_into (kill, group->group_kill);
3127 : 26584 : bitmap_and_compl_into (gen, group->group_kill);
3128 : : }
3129 : : else
3130 : : {
3131 : : /* The groups are the same, just process the
3132 : : offsets. */
3133 : 5100425 : HOST_WIDE_INT j;
3134 : 5100425 : HOST_WIDE_INT end = offset + width;
3135 : 47846800 : for (j = offset; j < end; j++)
3136 : : {
3137 : 42746375 : int index = get_bitmap_index (group, j);
3138 : 42746375 : if (index != 0)
3139 : : {
3140 : 33229503 : if (kill)
3141 : 15935801 : bitmap_set_bit (kill, index);
3142 : 33229503 : bitmap_clear_bit (gen, index);
3143 : : }
3144 : : }
3145 : : }
3146 : : }
3147 : : else
3148 : : {
3149 : : /* The groups are different, if the alias sets
3150 : : conflict, clear the entire group. We only need
3151 : : to apply this test if the read_info is a cselib
3152 : : read. Anything with a constant base cannot alias
3153 : : something else with a different constant
3154 : : base. */
3155 : 16387238 : if ((read_info->group_id < 0)
3156 : 25002750 : && canon_true_dependence (group->base_mem,
3157 : 8615512 : GET_MODE (group->base_mem),
3158 : : group->canon_base_addr,
3159 : 8615512 : read_info->mem, NULL_RTX))
3160 : : {
3161 : 4547026 : if (kill)
3162 : 2153494 : bitmap_ior_into (kill, group->group_kill);
3163 : 4547026 : bitmap_and_compl_into (gen, group->group_kill);
3164 : : }
3165 : : }
3166 : : }
3167 : : }
3168 : :
3169 : 13024362 : read_info = read_info->next;
3170 : : }
3171 : 84490615 : }
3172 : :
3173 : :
3174 : : /* Return the insn in BB_INFO before the first wild read or if there
3175 : : are no wild reads in the block, return the last insn. */
3176 : :
3177 : : static insn_info_t
3178 : 7429947 : find_insn_before_first_wild_read (bb_info_t bb_info)
3179 : : {
3180 : 7429947 : insn_info_t insn_info = bb_info->last_insn;
3181 : 7429947 : insn_info_t last_wild_read = NULL;
3182 : :
3183 : 85521791 : while (insn_info)
3184 : : {
3185 : 78195691 : if (insn_info->wild_read)
3186 : : {
3187 : 653025 : last_wild_read = insn_info->prev_insn;
3188 : : /* Block starts with wild read. */
3189 : 653025 : if (!last_wild_read)
3190 : : return NULL;
3191 : : }
3192 : :
3193 : 78091844 : insn_info = insn_info->prev_insn;
3194 : : }
3195 : :
3196 : 7326100 : if (last_wild_read)
3197 : : return last_wild_read;
3198 : : else
3199 : 7053719 : return bb_info->last_insn;
3200 : : }
3201 : :
3202 : :
3203 : : /* Scan the insns in BB_INFO starting at PTR and going to the top of
3204 : : the block in order to build the gen and kill sets for the block.
3205 : : We start at ptr which may be the last insn in the block or may be
3206 : : the first insn with a wild read. In the latter case we are able to
3207 : : skip the rest of the block because it just does not matter:
3208 : : anything that happens is hidden by the wild read. */
3209 : :
3210 : : static void
3211 : 7429947 : dse_step3_scan (basic_block bb)
3212 : : {
3213 : 7429947 : bb_info_t bb_info = bb_table[bb->index];
3214 : 7429947 : insn_info_t insn_info;
3215 : :
3216 : 7429947 : insn_info = find_insn_before_first_wild_read (bb_info);
3217 : :
3218 : : /* In the spill case or in the no_spill case if there is no wild
3219 : : read in the block, we will need a kill set. */
3220 : 7429947 : if (insn_info == bb_info->last_insn)
3221 : : {
3222 : 7053719 : if (bb_info->kill)
3223 : 0 : bitmap_clear (bb_info->kill);
3224 : : else
3225 : 7053719 : bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3226 : : }
3227 : : else
3228 : 376228 : if (bb_info->kill)
3229 : 0 : BITMAP_FREE (bb_info->kill);
3230 : :
3231 : 81942438 : while (insn_info)
3232 : : {
3233 : : /* There may have been code deleted by the dce pass run before
3234 : : this phase. */
3235 : 74512491 : if (insn_info->insn && INSN_P (insn_info->insn))
3236 : : {
3237 : 74487869 : scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3238 : 74487869 : scan_reads (insn_info, bb_info->gen, bb_info->kill);
3239 : : }
3240 : :
3241 : 74512491 : insn_info = insn_info->prev_insn;
3242 : : }
3243 : 7429947 : }
3244 : :
3245 : :
3246 : : /* Set the gen set of the exit block, and also any block with no
3247 : : successors that does not have a wild read. */
3248 : :
3249 : : static void
3250 : 242410 : dse_step3_exit_block_scan (bb_info_t bb_info)
3251 : : {
3252 : : /* The gen set is all 0's for the exit block except for the
3253 : : frame_pointer_group. */
3254 : :
3255 : 242410 : if (stores_off_frame_dead_at_return)
3256 : : {
3257 : : unsigned int i;
3258 : : group_info *group;
3259 : :
3260 : 2206486 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3261 : : {
3262 : 1965349 : if (group->process_globally && group->frame_related)
3263 : 217120 : bitmap_ior_into (bb_info->gen, group->group_kill);
3264 : : }
3265 : : }
3266 : 242410 : }
3267 : :
3268 : :
3269 : : /* Find all of the blocks that are not backwards reachable from the
3270 : : exit block or any block with no successors (BB). These are the
3271 : : infinite loops or infinite self loops. These blocks will still
3272 : : have their bits set in UNREACHABLE_BLOCKS. */
3273 : :
3274 : : static void
3275 : 12108773 : mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3276 : : {
3277 : 12108773 : edge e;
3278 : 12108773 : edge_iterator ei;
3279 : :
3280 : 12108773 : if (bitmap_bit_p (unreachable_blocks, bb->index))
3281 : : {
3282 : 7903869 : bitmap_clear_bit (unreachable_blocks, bb->index);
3283 : 19348967 : FOR_EACH_EDGE (e, ei, bb->preds)
3284 : : {
3285 : 11445098 : mark_reachable_blocks (unreachable_blocks, e->src);
3286 : : }
3287 : : }
3288 : 12108773 : }
3289 : :
3290 : : /* Build the transfer functions for the function. */
3291 : :
3292 : : static void
3293 : 242410 : dse_step3 ()
3294 : : {
3295 : 242410 : basic_block bb;
3296 : 242410 : sbitmap_iterator sbi;
3297 : 242410 : bitmap all_ones = NULL;
3298 : 242410 : unsigned int i;
3299 : :
3300 : 242410 : auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3301 : 242410 : bitmap_ones (unreachable_blocks);
3302 : :
3303 : 8157177 : FOR_ALL_BB_FN (bb, cfun)
3304 : : {
3305 : 7914767 : bb_info_t bb_info = bb_table[bb->index];
3306 : 7914767 : if (bb_info->gen)
3307 : 0 : bitmap_clear (bb_info->gen);
3308 : : else
3309 : 7914767 : bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3310 : :
3311 : 7914767 : if (bb->index == ENTRY_BLOCK)
3312 : : ;
3313 : 7672357 : else if (bb->index == EXIT_BLOCK)
3314 : 242410 : dse_step3_exit_block_scan (bb_info);
3315 : : else
3316 : 7429947 : dse_step3_scan (bb);
3317 : 7914767 : if (EDGE_COUNT (bb->succs) == 0)
3318 : 663675 : mark_reachable_blocks (unreachable_blocks, bb);
3319 : :
3320 : : /* If this is the second time dataflow is run, delete the old
3321 : : sets. */
3322 : 7914767 : if (bb_info->in)
3323 : 0 : BITMAP_FREE (bb_info->in);
3324 : 7914767 : if (bb_info->out)
3325 : 0 : BITMAP_FREE (bb_info->out);
3326 : : }
3327 : :
3328 : : /* For any block in an infinite loop, we must initialize the out set
3329 : : to all ones. This could be expensive, but almost never occurs in
3330 : : practice. However, it is common in regression tests. */
3331 : 1038633 : EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3332 : : {
3333 : 553813 : if (bitmap_bit_p (all_blocks, i))
3334 : : {
3335 : 10898 : bb_info_t bb_info = bb_table[i];
3336 : 10898 : if (!all_ones)
3337 : : {
3338 : 1441 : unsigned int j;
3339 : 1441 : group_info *group;
3340 : :
3341 : 1441 : all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3342 : 12237 : FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3343 : 9355 : bitmap_ior_into (all_ones, group->group_kill);
3344 : : }
3345 : 10898 : if (!bb_info->out)
3346 : : {
3347 : 10898 : bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3348 : 10898 : bitmap_copy (bb_info->out, all_ones);
3349 : : }
3350 : : }
3351 : : }
3352 : :
3353 : 242410 : if (all_ones)
3354 : 1441 : BITMAP_FREE (all_ones);
3355 : 242410 : }
3356 : :
3357 : :
3358 : :
3359 : : /*----------------------------------------------------------------------------
3360 : : Fourth step.
3361 : :
3362 : : Solve the bitvector equations.
3363 : : ----------------------------------------------------------------------------*/
3364 : :
3365 : :
3366 : : /* Confluence function for blocks with no successors. Create an out
3367 : : set from the gen set of the exit block. This block logically has
3368 : : the exit block as a successor. */
3369 : :
3370 : :
3371 : :
3372 : : static void
3373 : 663675 : dse_confluence_0 (basic_block bb)
3374 : : {
3375 : 663675 : bb_info_t bb_info = bb_table[bb->index];
3376 : :
3377 : 663675 : if (bb->index == EXIT_BLOCK)
3378 : : return;
3379 : :
3380 : 421265 : if (!bb_info->out)
3381 : : {
3382 : 421265 : bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3383 : 421265 : bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3384 : : }
3385 : : }
3386 : :
3387 : : /* Propagate the information from the in set of the dest of E to the
3388 : : out set of the src of E. If the various in or out sets are not
3389 : : there, that means they are all ones. */
3390 : :
3391 : : static bool
3392 : 12161034 : dse_confluence_n (edge e)
3393 : : {
3394 : 12161034 : bb_info_t src_info = bb_table[e->src->index];
3395 : 12161034 : bb_info_t dest_info = bb_table[e->dest->index];
3396 : :
3397 : 12161034 : if (dest_info->in)
3398 : : {
3399 : 11749066 : if (src_info->out)
3400 : 4508872 : bitmap_and_into (src_info->out, dest_info->in);
3401 : : else
3402 : : {
3403 : 7240194 : src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3404 : 7240194 : bitmap_copy (src_info->out, dest_info->in);
3405 : : }
3406 : : }
3407 : 12161034 : return true;
3408 : : }
3409 : :
3410 : :
3411 : : /* Propagate the info from the out to the in set of BB_INDEX's basic
3412 : : block. There are three cases:
3413 : :
3414 : : 1) The block has no kill set. In this case the kill set is all
3415 : : ones. It does not matter what the out set of the block is, none of
3416 : : the info can reach the top. The only thing that reaches the top is
3417 : : the gen set and we just copy the set.
3418 : :
3419 : : 2) There is a kill set but no out set and bb has successors. In
3420 : : this case we just return. Eventually an out set will be created and
3421 : : it is better to wait than to create a set of ones.
3422 : :
3423 : : 3) There is both a kill and out set. We apply the obvious transfer
3424 : : function.
3425 : : */
3426 : :
3427 : : static bool
3428 : 8544540 : dse_transfer_function (int bb_index)
3429 : : {
3430 : 8544540 : bb_info_t bb_info = bb_table[bb_index];
3431 : :
3432 : 8544540 : if (bb_info->kill)
3433 : : {
3434 : 7655672 : if (bb_info->out)
3435 : : {
3436 : : /* Case 3 above. */
3437 : 7655672 : if (bb_info->in)
3438 : 601953 : return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3439 : 601953 : bb_info->out, bb_info->kill);
3440 : : else
3441 : : {
3442 : 7053719 : bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3443 : 7053719 : bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3444 : 7053719 : bb_info->out, bb_info->kill);
3445 : 7053719 : return true;
3446 : : }
3447 : : }
3448 : : else
3449 : : /* Case 2 above. */
3450 : : return false;
3451 : : }
3452 : : else
3453 : : {
3454 : : /* Case 1 above. If there is already an in set, nothing
3455 : : happens. */
3456 : 888868 : if (bb_info->in)
3457 : : return false;
3458 : : else
3459 : : {
3460 : 861048 : bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3461 : 861048 : bitmap_copy (bb_info->in, bb_info->gen);
3462 : 861048 : return true;
3463 : : }
3464 : : }
3465 : : }
3466 : :
3467 : : /* Solve the dataflow equations. */
3468 : :
3469 : : static void
3470 : 242410 : dse_step4 (void)
3471 : : {
3472 : 242410 : df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3473 : : dse_confluence_n, dse_transfer_function,
3474 : : all_blocks, df_get_postorder (DF_BACKWARD),
3475 : : df_get_n_blocks (DF_BACKWARD));
3476 : 242410 : if (dump_file && (dump_flags & TDF_DETAILS))
3477 : : {
3478 : 0 : basic_block bb;
3479 : :
3480 : 0 : fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3481 : 0 : FOR_ALL_BB_FN (bb, cfun)
3482 : : {
3483 : 0 : bb_info_t bb_info = bb_table[bb->index];
3484 : :
3485 : 0 : df_print_bb_index (bb, dump_file);
3486 : 0 : if (bb_info->in)
3487 : 0 : bitmap_print (dump_file, bb_info->in, " in: ", "\n");
3488 : : else
3489 : 0 : fprintf (dump_file, " in: *MISSING*\n");
3490 : 0 : if (bb_info->gen)
3491 : 0 : bitmap_print (dump_file, bb_info->gen, " gen: ", "\n");
3492 : : else
3493 : 0 : fprintf (dump_file, " gen: *MISSING*\n");
3494 : 0 : if (bb_info->kill)
3495 : 0 : bitmap_print (dump_file, bb_info->kill, " kill: ", "\n");
3496 : : else
3497 : 0 : fprintf (dump_file, " kill: *MISSING*\n");
3498 : 0 : if (bb_info->out)
3499 : 0 : bitmap_print (dump_file, bb_info->out, " out: ", "\n");
3500 : : else
3501 : 0 : fprintf (dump_file, " out: *MISSING*\n\n");
3502 : : }
3503 : : }
3504 : 242410 : }
3505 : :
3506 : :
3507 : :
3508 : : /*----------------------------------------------------------------------------
3509 : : Fifth step.
3510 : :
3511 : : Delete the stores that can only be deleted using the global information.
3512 : : ----------------------------------------------------------------------------*/
3513 : :
3514 : :
3515 : : static void
3516 : 242410 : dse_step5 (void)
3517 : : {
3518 : 242410 : basic_block bb;
3519 : 7672357 : FOR_EACH_BB_FN (bb, cfun)
3520 : : {
3521 : 7429947 : bb_info_t bb_info = bb_table[bb->index];
3522 : 7429947 : insn_info_t insn_info = bb_info->last_insn;
3523 : 7429947 : bitmap v = bb_info->out;
3524 : :
3525 : 85625638 : while (insn_info)
3526 : : {
3527 : 78195691 : bool deleted = false;
3528 : 78195691 : if (dump_file && insn_info->insn)
3529 : : {
3530 : 23 : fprintf (dump_file, "starting to process insn %d\n",
3531 : 23 : INSN_UID (insn_info->insn));
3532 : 23 : bitmap_print (dump_file, v, " v: ", "\n");
3533 : : }
3534 : :
3535 : : /* There may have been code deleted by the dce pass run before
3536 : : this phase. */
3537 : 78195691 : if (insn_info->insn
3538 : 78170796 : && INSN_P (insn_info->insn)
3539 : 78170796 : && (!insn_info->cannot_delete)
3540 : 82527368 : && (!bitmap_empty_p (v)))
3541 : : {
3542 : 2992676 : store_info *store_info = insn_info->store_rec;
3543 : :
3544 : : /* Try to delete the current insn. */
3545 : 2992676 : deleted = true;
3546 : :
3547 : : /* Skip the clobbers. */
3548 : 2992676 : while (!store_info->is_set)
3549 : 0 : store_info = store_info->next;
3550 : :
3551 : 2992676 : HOST_WIDE_INT i, offset, width;
3552 : 2992676 : group_info *group_info = rtx_group_vec[store_info->group_id];
3553 : :
3554 : 2992676 : if (!store_info->offset.is_constant (&offset)
3555 : 2992676 : || !store_info->width.is_constant (&width))
3556 : : deleted = false;
3557 : : else
3558 : : {
3559 : 2992676 : HOST_WIDE_INT end = offset + width;
3560 : 3793609 : for (i = offset; i < end; i++)
3561 : : {
3562 : 3741658 : int index = get_bitmap_index (group_info, i);
3563 : :
3564 : 3741658 : if (dump_file && (dump_flags & TDF_DETAILS))
3565 : 0 : fprintf (dump_file, "i = %d, index = %d\n",
3566 : : (int) i, index);
3567 : 3741658 : if (index == 0 || !bitmap_bit_p (v, index))
3568 : : {
3569 : 2940725 : if (dump_file && (dump_flags & TDF_DETAILS))
3570 : 0 : fprintf (dump_file, "failing at i = %d\n",
3571 : : (int) i);
3572 : : deleted = false;
3573 : : break;
3574 : : }
3575 : : }
3576 : : }
3577 : 51951 : if (deleted)
3578 : : {
3579 : 51951 : if (dbg_cnt (dse)
3580 : 51951 : && check_for_inc_dec_1 (insn_info))
3581 : : {
3582 : 51951 : delete_insn (insn_info->insn);
3583 : 51951 : insn_info->insn = NULL;
3584 : 51951 : globally_deleted++;
3585 : : }
3586 : : }
3587 : : }
3588 : : /* We do want to process the local info if the insn was
3589 : : deleted. For instance, if the insn did a wild read, we
3590 : : no longer need to trash the info. */
3591 : 78195691 : if (insn_info->insn
3592 : 78118845 : && INSN_P (insn_info->insn)
3593 : 78118845 : && (!deleted))
3594 : : {
3595 : 78118845 : scan_stores (insn_info->store_rec, v, NULL);
3596 : 78118845 : if (insn_info->wild_read)
3597 : : {
3598 : 653025 : if (dump_file && (dump_flags & TDF_DETAILS))
3599 : 0 : fprintf (dump_file, "wild read\n");
3600 : 653025 : bitmap_clear (v);
3601 : : }
3602 : 77465820 : else if (insn_info->read_rec
3603 : 70851578 : || insn_info->non_frame_wild_read
3604 : 67463074 : || insn_info->frame_read)
3605 : : {
3606 : 10002746 : if (dump_file && (dump_flags & TDF_DETAILS))
3607 : : {
3608 : 0 : if (!insn_info->non_frame_wild_read
3609 : 0 : && !insn_info->frame_read)
3610 : 0 : fprintf (dump_file, "regular read\n");
3611 : 0 : if (insn_info->non_frame_wild_read)
3612 : 0 : fprintf (dump_file, "non-frame wild read\n");
3613 : 0 : if (insn_info->frame_read)
3614 : 0 : fprintf (dump_file, "frame read\n");
3615 : : }
3616 : 10002746 : scan_reads (insn_info, v, NULL);
3617 : : }
3618 : : }
3619 : :
3620 : 78195691 : insn_info = insn_info->prev_insn;
3621 : : }
3622 : : }
3623 : 242410 : }
3624 : :
3625 : :
3626 : :
3627 : : /*----------------------------------------------------------------------------
3628 : : Sixth step.
3629 : :
3630 : : Delete stores made redundant by earlier stores (which store the same
3631 : : value) that couldn't be eliminated.
3632 : : ----------------------------------------------------------------------------*/
3633 : :
3634 : : static void
3635 : 1969986 : dse_step6 (void)
3636 : : {
3637 : 1969986 : basic_block bb;
3638 : :
3639 : 25698563 : FOR_ALL_BB_FN (bb, cfun)
3640 : : {
3641 : 23728577 : bb_info_t bb_info = bb_table[bb->index];
3642 : 23728577 : insn_info_t insn_info = bb_info->last_insn;
3643 : :
3644 : 214175565 : while (insn_info)
3645 : : {
3646 : : /* There may have been code deleted by the dce pass run before
3647 : : this phase. */
3648 : 190446988 : if (insn_info->insn
3649 : 190363932 : && INSN_P (insn_info->insn)
3650 : 190363932 : && !insn_info->cannot_delete)
3651 : : {
3652 : 4526427 : store_info *s_info = insn_info->store_rec;
3653 : :
3654 : 4526427 : while (s_info && !s_info->is_set)
3655 : 0 : s_info = s_info->next;
3656 : 4526427 : if (s_info
3657 : 4526427 : && s_info->redundant_reason
3658 : 227 : && s_info->redundant_reason->insn
3659 : 92 : && INSN_P (s_info->redundant_reason->insn))
3660 : : {
3661 : 92 : rtx_insn *rinsn = s_info->redundant_reason->insn;
3662 : 92 : if (dump_file && (dump_flags & TDF_DETAILS))
3663 : 0 : fprintf (dump_file, "Locally deleting insn %d "
3664 : : "because insn %d stores the "
3665 : : "same value and couldn't be "
3666 : : "eliminated\n",
3667 : 0 : INSN_UID (insn_info->insn),
3668 : 0 : INSN_UID (rinsn));
3669 : 92 : delete_dead_store_insn (insn_info);
3670 : : }
3671 : : }
3672 : 190446988 : insn_info = insn_info->prev_insn;
3673 : : }
3674 : : }
3675 : 1969986 : }
3676 : :
3677 : : /*----------------------------------------------------------------------------
3678 : : Seventh step.
3679 : :
3680 : : Destroy everything left standing.
3681 : : ----------------------------------------------------------------------------*/
3682 : :
3683 : : static void
3684 : 1969986 : dse_step7 (void)
3685 : : {
3686 : 1969986 : bitmap_obstack_release (&dse_bitmap_obstack);
3687 : 1969986 : obstack_free (&dse_obstack, NULL);
3688 : :
3689 : 1969986 : end_alias_analysis ();
3690 : 1969986 : free (bb_table);
3691 : 1969986 : delete rtx_group_table;
3692 : 1969986 : rtx_group_table = NULL;
3693 : 1969986 : rtx_group_vec.release ();
3694 : 1969986 : BITMAP_FREE (all_blocks);
3695 : 1969986 : BITMAP_FREE (scratch);
3696 : :
3697 : 1969986 : rtx_store_info_pool.release ();
3698 : 1969986 : read_info_type_pool.release ();
3699 : 1969986 : insn_info_type_pool.release ();
3700 : 1969986 : dse_bb_info_type_pool.release ();
3701 : 1969986 : group_info_pool.release ();
3702 : 1969986 : deferred_change_pool.release ();
3703 : 1969986 : }
3704 : :
3705 : :
3706 : : /* -------------------------------------------------------------------------
3707 : : DSE
3708 : : ------------------------------------------------------------------------- */
3709 : :
3710 : : /* Callback for running pass_rtl_dse. */
3711 : :
3712 : : static unsigned int
3713 : 1969986 : rest_of_handle_dse (void)
3714 : : {
3715 : 1969986 : df_set_flags (DF_DEFER_INSN_RESCAN);
3716 : :
3717 : : /* Need the notes since we must track live hardregs in the forwards
3718 : : direction. */
3719 : 1969986 : df_note_add_problem ();
3720 : 1969986 : df_analyze ();
3721 : :
3722 : 1969986 : dse_step0 ();
3723 : 1969986 : dse_step1 ();
3724 : : /* DSE can eliminate potentially-trapping MEMs.
3725 : : Remove any EH edges associated with them, since otherwise
3726 : : DF_LR_RUN_DCE will complain later. */
3727 : 1958737 : if ((locally_deleted || globally_deleted)
3728 : 11249 : && cfun->can_throw_non_call_exceptions
3729 : 1976446 : && purge_all_dead_edges ())
3730 : : {
3731 : 0 : free_dominance_info (CDI_DOMINATORS);
3732 : 0 : delete_unreachable_blocks ();
3733 : : }
3734 : 1969986 : dse_step2_init ();
3735 : 1969986 : if (dse_step2 ())
3736 : : {
3737 : 242410 : df_set_flags (DF_LR_RUN_DCE);
3738 : 242410 : df_analyze ();
3739 : 242410 : if (dump_file && (dump_flags & TDF_DETAILS))
3740 : 0 : fprintf (dump_file, "doing global processing\n");
3741 : 242410 : dse_step3 ();
3742 : 242410 : dse_step4 ();
3743 : 242410 : dse_step5 ();
3744 : : }
3745 : :
3746 : 1969986 : dse_step6 ();
3747 : 1969986 : dse_step7 ();
3748 : :
3749 : 1969986 : if (dump_file)
3750 : 54 : fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3751 : : locally_deleted, globally_deleted);
3752 : :
3753 : : /* DSE can eliminate potentially-trapping MEMs.
3754 : : Remove any EH edges associated with them. */
3755 : 1958710 : if ((locally_deleted || globally_deleted)
3756 : 28152 : && cfun->can_throw_non_call_exceptions
3757 : 1990000 : && purge_all_dead_edges ())
3758 : : {
3759 : 0 : free_dominance_info (CDI_DOMINATORS);
3760 : 0 : cleanup_cfg (0);
3761 : : }
3762 : :
3763 : 1969986 : return 0;
3764 : : }
3765 : :
3766 : : namespace {
3767 : :
3768 : : const pass_data pass_data_rtl_dse1 =
3769 : : {
3770 : : RTL_PASS, /* type */
3771 : : "dse1", /* name */
3772 : : OPTGROUP_NONE, /* optinfo_flags */
3773 : : TV_DSE1, /* tv_id */
3774 : : 0, /* properties_required */
3775 : : 0, /* properties_provided */
3776 : : 0, /* properties_destroyed */
3777 : : 0, /* todo_flags_start */
3778 : : TODO_df_finish, /* todo_flags_finish */
3779 : : };
3780 : :
3781 : : class pass_rtl_dse1 : public rtl_opt_pass
3782 : : {
3783 : : public:
3784 : 281914 : pass_rtl_dse1 (gcc::context *ctxt)
3785 : 563828 : : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3786 : : {}
3787 : :
3788 : : /* opt_pass methods: */
3789 : 1426773 : bool gate (function *) final override
3790 : : {
3791 : 1426773 : return optimize > 0 && flag_dse && dbg_cnt (dse1);
3792 : : }
3793 : :
3794 : 984993 : unsigned int execute (function *) final override
3795 : : {
3796 : 984993 : return rest_of_handle_dse ();
3797 : : }
3798 : :
3799 : : }; // class pass_rtl_dse1
3800 : :
3801 : : } // anon namespace
3802 : :
3803 : : rtl_opt_pass *
3804 : 281914 : make_pass_rtl_dse1 (gcc::context *ctxt)
3805 : : {
3806 : 281914 : return new pass_rtl_dse1 (ctxt);
3807 : : }
3808 : :
3809 : : namespace {
3810 : :
3811 : : const pass_data pass_data_rtl_dse2 =
3812 : : {
3813 : : RTL_PASS, /* type */
3814 : : "dse2", /* name */
3815 : : OPTGROUP_NONE, /* optinfo_flags */
3816 : : TV_DSE2, /* tv_id */
3817 : : 0, /* properties_required */
3818 : : 0, /* properties_provided */
3819 : : 0, /* properties_destroyed */
3820 : : 0, /* todo_flags_start */
3821 : : TODO_df_finish, /* todo_flags_finish */
3822 : : };
3823 : :
3824 : : class pass_rtl_dse2 : public rtl_opt_pass
3825 : : {
3826 : : public:
3827 : 281914 : pass_rtl_dse2 (gcc::context *ctxt)
3828 : 563828 : : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3829 : : {}
3830 : :
3831 : : /* opt_pass methods: */
3832 : 1426773 : bool gate (function *) final override
3833 : : {
3834 : 1426773 : return optimize > 0 && flag_dse && dbg_cnt (dse2);
3835 : : }
3836 : :
3837 : 984993 : unsigned int execute (function *) final override
3838 : : {
3839 : 984993 : return rest_of_handle_dse ();
3840 : : }
3841 : :
3842 : : }; // class pass_rtl_dse2
3843 : :
3844 : : } // anon namespace
3845 : :
3846 : : rtl_opt_pass *
3847 : 281914 : make_pass_rtl_dse2 (gcc::context *ctxt)
3848 : : {
3849 : 281914 : return new pass_rtl_dse2 (ctxt);
3850 : : }
|