Branch data Line data Source code
1 : : /* RTL dead store elimination.
2 : : Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 : :
4 : : Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5 : : and Kenneth Zadeck <zadeck@naturalbridge.com>
6 : :
7 : : This file is part of GCC.
8 : :
9 : : GCC is free software; you can redistribute it and/or modify it under
10 : : the terms of the GNU General Public License as published by the Free
11 : : Software Foundation; either version 3, or (at your option) any later
12 : : version.
13 : :
14 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 : : for more details.
18 : :
19 : : You should have received a copy of the GNU General Public License
20 : : along with GCC; see the file COPYING3. If not see
21 : : <http://www.gnu.org/licenses/>. */
22 : :
23 : : #undef BASELINE
24 : :
25 : : #include "config.h"
26 : : #include "system.h"
27 : : #include "coretypes.h"
28 : : #include "backend.h"
29 : : #include "target.h"
30 : : #include "rtl.h"
31 : : #include "tree.h"
32 : : #include "gimple.h"
33 : : #include "predict.h"
34 : : #include "df.h"
35 : : #include "memmodel.h"
36 : : #include "tm_p.h"
37 : : #include "gimple-ssa.h"
38 : : #include "expmed.h"
39 : : #include "optabs.h"
40 : : #include "emit-rtl.h"
41 : : #include "recog.h"
42 : : #include "alias.h"
43 : : #include "stor-layout.h"
44 : : #include "cfgrtl.h"
45 : : #include "cselib.h"
46 : : #include "tree-pass.h"
47 : : #include "explow.h"
48 : : #include "expr.h"
49 : : #include "dbgcnt.h"
50 : : #include "rtl-iter.h"
51 : : #include "cfgcleanup.h"
52 : : #include "calls.h"
53 : :
54 : : /* This file contains three techniques for performing Dead Store
55 : : Elimination (dse).
56 : :
57 : : * The first technique performs dse locally on any base address. It
58 : : is based on the cselib which is a local value numbering technique.
59 : : This technique is local to a basic block but deals with a fairly
60 : : general addresses.
61 : :
62 : : * The second technique performs dse globally but is restricted to
63 : : base addresses that are either constant or are relative to the
64 : : frame_pointer.
65 : :
66 : : * The third technique, (which is only done after register allocation)
67 : : processes the spill slots. This differs from the second
68 : : technique because it takes advantage of the fact that spilling is
69 : : completely free from the effects of aliasing.
70 : :
71 : : Logically, dse is a backwards dataflow problem. A store can be
72 : : deleted if it if cannot be reached in the backward direction by any
73 : : use of the value being stored. However, the local technique uses a
74 : : forwards scan of the basic block because cselib requires that the
75 : : block be processed in that order.
76 : :
77 : : The pass is logically broken into 7 steps:
78 : :
79 : : 0) Initialization.
80 : :
81 : : 1) The local algorithm, as well as scanning the insns for the two
82 : : global algorithms.
83 : :
84 : : 2) Analysis to see if the global algs are necessary. In the case
85 : : of stores base on a constant address, there must be at least two
86 : : stores to that address, to make it possible to delete some of the
87 : : stores. In the case of stores off of the frame or spill related
88 : : stores, only one store to an address is necessary because those
89 : : stores die at the end of the function.
90 : :
91 : : 3) Set up the global dataflow equations based on processing the
92 : : info parsed in the first step.
93 : :
94 : : 4) Solve the dataflow equations.
95 : :
96 : : 5) Delete the insns that the global analysis has indicated are
97 : : unnecessary.
98 : :
99 : : 6) Delete insns that store the same value as preceding store
100 : : where the earlier store couldn't be eliminated.
101 : :
102 : : 7) Cleanup.
103 : :
104 : : This step uses cselib and canon_rtx to build the largest expression
105 : : possible for each address. This pass is a forwards pass through
106 : : each basic block. From the point of view of the global technique,
107 : : the first pass could examine a block in either direction. The
108 : : forwards ordering is to accommodate cselib.
109 : :
110 : : We make a simplifying assumption: addresses fall into four broad
111 : : categories:
112 : :
113 : : 1) base has rtx_varies_p == false, offset is constant.
114 : : 2) base has rtx_varies_p == false, offset variable.
115 : : 3) base has rtx_varies_p == true, offset constant.
116 : : 4) base has rtx_varies_p == true, offset variable.
117 : :
118 : : The local passes are able to process all 4 kinds of addresses. The
119 : : global pass only handles 1).
120 : :
121 : : The global problem is formulated as follows:
122 : :
123 : : A store, S1, to address A, where A is not relative to the stack
124 : : frame, can be eliminated if all paths from S1 to the end of the
125 : : function contain another store to A before a read to A.
126 : :
127 : : If the address A is relative to the stack frame, a store S2 to A
128 : : can be eliminated if there are no paths from S2 that reach the
129 : : end of the function that read A before another store to A. In
130 : : this case S2 can be deleted if there are paths from S2 to the
131 : : end of the function that have no reads or writes to A. This
132 : : second case allows stores to the stack frame to be deleted that
133 : : would otherwise die when the function returns. This cannot be
134 : : done if stores_off_frame_dead_at_return is not true. See the doc
135 : : for that variable for when this variable is false.
136 : :
137 : : The global problem is formulated as a backwards set union
138 : : dataflow problem where the stores are the gens and reads are the
139 : : kills. Set union problems are rare and require some special
140 : : handling given our representation of bitmaps. A straightforward
141 : : implementation requires a lot of bitmaps filled with 1s.
142 : : These are expensive and cumbersome in our bitmap formulation so
143 : : care has been taken to avoid large vectors filled with 1s. See
144 : : the comments in bb_info and in the dataflow confluence functions
145 : : for details.
146 : :
147 : : There are two places for further enhancements to this algorithm:
148 : :
149 : : 1) The original dse which was embedded in a pass called flow also
150 : : did local address forwarding. For example in
151 : :
152 : : A <- r100
153 : : ... <- A
154 : :
155 : : flow would replace the right hand side of the second insn with a
156 : : reference to r100. Most of the information is available to add this
157 : : to this pass. It has not done it because it is a lot of work in
158 : : the case that either r100 is assigned to between the first and
159 : : second insn and/or the second insn is a load of part of the value
160 : : stored by the first insn.
161 : :
162 : : insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163 : : insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164 : : insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165 : : insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166 : :
167 : : 2) The cleaning up of spill code is quite profitable. It currently
168 : : depends on reading tea leaves and chicken entrails left by reload.
169 : : This pass depends on reload creating a singleton alias set for each
170 : : spill slot and telling the next dse pass which of these alias sets
171 : : are the singletons. Rather than analyze the addresses of the
172 : : spills, dse's spill processing just does analysis of the loads and
173 : : stores that use those alias sets. There are three cases where this
174 : : falls short:
175 : :
176 : : a) Reload sometimes creates the slot for one mode of access, and
177 : : then inserts loads and/or stores for a smaller mode. In this
178 : : case, the current code just punts on the slot. The proper thing
179 : : to do is to back out and use one bit vector position for each
180 : : byte of the entity associated with the slot. This depends on
181 : : KNOWING that reload always generates the accesses for each of the
182 : : bytes in some canonical (read that easy to understand several
183 : : passes after reload happens) way.
184 : :
185 : : b) Reload sometimes decides that spill slot it allocated was not
186 : : large enough for the mode and goes back and allocates more slots
187 : : with the same mode and alias set. The backout in this case is a
188 : : little more graceful than (a). In this case the slot is unmarked
189 : : as being a spill slot and if final address comes out to be based
190 : : off the frame pointer, the global algorithm handles this slot.
191 : :
192 : : c) For any pass that may prespill, there is currently no
193 : : mechanism to tell the dse pass that the slot being used has the
194 : : special properties that reload uses. It may be that all that is
195 : : required is to have those passes make the same calls that reload
196 : : does, assuming that the alias sets can be manipulated in the same
197 : : way. */
198 : :
199 : : /* There are limits to the size of constant offsets we model for the
200 : : global problem. There are certainly test cases, that exceed this
201 : : limit, however, it is unlikely that there are important programs
202 : : that really have constant offsets this size. */
203 : : #define MAX_OFFSET (64 * 1024)
204 : :
205 : : /* Obstack for the DSE dataflow bitmaps. We don't want to put these
206 : : on the default obstack because these bitmaps can grow quite large
207 : : (~2GB for the small (!) test case of PR54146) and we'll hold on to
208 : : all that memory until the end of the compiler run.
209 : : As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210 : : releasing the whole obstack. */
211 : : static bitmap_obstack dse_bitmap_obstack;
212 : :
213 : : /* Obstack for other data. As for above: Kinda nice to be able to
214 : : throw it all away at the end in one big sweep. */
215 : : static struct obstack dse_obstack;
216 : :
217 : : /* Scratch bitmap for cselib's cselib_expand_value_rtx. */
218 : : static bitmap scratch = NULL;
219 : :
220 : : struct insn_info_type;
221 : :
222 : : /* This structure holds information about a candidate store. */
223 : : class store_info
224 : : {
225 : : public:
226 : :
227 : : /* False means this is a clobber. */
228 : : bool is_set;
229 : :
230 : : /* False if a single HOST_WIDE_INT bitmap is used for positions_needed. */
231 : : bool is_large;
232 : :
233 : : /* The id of the mem group of the base address. If rtx_varies_p is
234 : : true, this is -1. Otherwise, it is the index into the group
235 : : table. */
236 : : int group_id;
237 : :
238 : : /* This is the cselib value. */
239 : : cselib_val *cse_base;
240 : :
241 : : /* This canonized mem. */
242 : : rtx mem;
243 : :
244 : : /* Canonized MEM address for use by canon_true_dependence. */
245 : : rtx mem_addr;
246 : :
247 : : /* The offset of the first byte associated with the operation. */
248 : : poly_int64 offset;
249 : :
250 : : /* The number of bytes covered by the operation. This is always exact
251 : : and known (rather than -1). */
252 : : poly_int64 width;
253 : :
254 : : /* The address space that the memory reference uses. */
255 : : unsigned char addrspace;
256 : :
257 : : union
258 : : {
259 : : /* A bitmask as wide as the number of bytes in the word that
260 : : contains a 1 if the byte may be needed. The store is unused if
261 : : all of the bits are 0. This is used if IS_LARGE is false. */
262 : : unsigned HOST_WIDE_INT small_bitmask;
263 : :
264 : : struct
265 : : {
266 : : /* A bitmap with one bit per byte, or null if the number of
267 : : bytes isn't known at compile time. A cleared bit means
268 : : the position is needed. Used if IS_LARGE is true. */
269 : : bitmap bmap;
270 : :
271 : : /* When BITMAP is nonnull, this counts the number of set bits
272 : : (i.e. unneeded bytes) in the bitmap. If it is equal to
273 : : WIDTH, the whole store is unused.
274 : :
275 : : When BITMAP is null:
276 : : - the store is definitely not needed when COUNT == 1
277 : : - all the store is needed when COUNT == 0 and RHS is nonnull
278 : : - otherwise we don't know which parts of the store are needed. */
279 : : int count;
280 : : } large;
281 : : } positions_needed;
282 : :
283 : : /* The next store info for this insn. */
284 : : class store_info *next;
285 : :
286 : : /* The right hand side of the store. This is used if there is a
287 : : subsequent reload of the mems address somewhere later in the
288 : : basic block. */
289 : : rtx rhs;
290 : :
291 : : /* If rhs is or holds a constant, this contains that constant,
292 : : otherwise NULL. */
293 : : rtx const_rhs;
294 : :
295 : : /* Set if this store stores the same constant value as REDUNDANT_REASON
296 : : insn stored. These aren't eliminated early, because doing that
297 : : might prevent the earlier larger store to be eliminated. */
298 : : struct insn_info_type *redundant_reason;
299 : : };
300 : :
301 : : /* Return a bitmask with the first N low bits set. */
302 : :
303 : : static unsigned HOST_WIDE_INT
304 : 19179462 : lowpart_bitmask (int n)
305 : : {
306 : 19179462 : unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
307 : 19179462 : return mask >> (HOST_BITS_PER_WIDE_INT - n);
308 : : }
309 : :
310 : : static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
311 : :
312 : : static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
313 : :
314 : : /* This structure holds information about a load. These are only
315 : : built for rtx bases. */
316 : : class read_info_type
317 : : {
318 : : public:
319 : : /* The id of the mem group of the base address. */
320 : : int group_id;
321 : :
322 : : /* The offset of the first byte associated with the operation. */
323 : : poly_int64 offset;
324 : :
325 : : /* The number of bytes covered by the operation, or -1 if not known. */
326 : : poly_int64 width;
327 : :
328 : : /* The mem being read. */
329 : : rtx mem;
330 : :
331 : : /* The next read_info for this insn. */
332 : : class read_info_type *next;
333 : : };
334 : : typedef class read_info_type *read_info_t;
335 : :
336 : : static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
337 : :
338 : : /* One of these records is created for each insn. */
339 : :
340 : : struct insn_info_type
341 : : {
342 : : /* Set true if the insn contains a store but the insn itself cannot
343 : : be deleted. This is set if the insn is a parallel and there is
344 : : more than one non dead output or if the insn is in some way
345 : : volatile. */
346 : : bool cannot_delete;
347 : :
348 : : /* This field is only used by the global algorithm. It is set true
349 : : if the insn contains any read of mem except for a (1). This is
350 : : also set if the insn is a call or has a clobber mem. If the insn
351 : : contains a wild read, the use_rec will be null. */
352 : : bool wild_read;
353 : :
354 : : /* This is true only for CALL instructions which could potentially read
355 : : any non-frame memory location. This field is used by the global
356 : : algorithm. */
357 : : bool non_frame_wild_read;
358 : :
359 : : /* This field is only used for the processing of const functions.
360 : : These functions cannot read memory, but they can read the stack
361 : : because that is where they may get their parms. We need to be
362 : : this conservative because, like the store motion pass, we don't
363 : : consider CALL_INSN_FUNCTION_USAGE when processing call insns.
364 : : Moreover, we need to distinguish two cases:
365 : : 1. Before reload (register elimination), the stores related to
366 : : outgoing arguments are stack pointer based and thus deemed
367 : : of non-constant base in this pass. This requires special
368 : : handling but also means that the frame pointer based stores
369 : : need not be killed upon encountering a const function call.
370 : : 2. After reload, the stores related to outgoing arguments can be
371 : : either stack pointer or hard frame pointer based. This means
372 : : that we have no other choice than also killing all the frame
373 : : pointer based stores upon encountering a const function call.
374 : : This field is set after reload for const function calls and before
375 : : reload for const tail function calls on targets where arg pointer
376 : : is the frame pointer. Having this set is less severe than a wild
377 : : read, it just means that all the frame related stores are killed
378 : : rather than all the stores. */
379 : : bool frame_read;
380 : :
381 : : /* This field is only used for the processing of const functions.
382 : : It is set if the insn may contain a stack pointer based store. */
383 : : bool stack_pointer_based;
384 : :
385 : : /* This is true if any of the sets within the store contains a
386 : : cselib base. Such stores can only be deleted by the local
387 : : algorithm. */
388 : : bool contains_cselib_groups;
389 : :
390 : : /* The insn. */
391 : : rtx_insn *insn;
392 : :
393 : : /* The list of mem sets or mem clobbers that are contained in this
394 : : insn. If the insn is deletable, it contains only one mem set.
395 : : But it could also contain clobbers. Insns that contain more than
396 : : one mem set are not deletable, but each of those mems are here in
397 : : order to provide info to delete other insns. */
398 : : store_info *store_rec;
399 : :
400 : : /* The linked list of mem uses in this insn. Only the reads from
401 : : rtx bases are listed here. The reads to cselib bases are
402 : : completely processed during the first scan and so are never
403 : : created. */
404 : : read_info_t read_rec;
405 : :
406 : : /* The live fixed registers. We assume only fixed registers can
407 : : cause trouble by being clobbered from an expanded pattern;
408 : : storing only the live fixed registers (rather than all registers)
409 : : means less memory needs to be allocated / copied for the individual
410 : : stores. */
411 : : regset fixed_regs_live;
412 : :
413 : : /* The prev insn in the basic block. */
414 : : struct insn_info_type * prev_insn;
415 : :
416 : : /* The linked list of insns that are in consideration for removal in
417 : : the forwards pass through the basic block. This pointer may be
418 : : trash as it is not cleared when a wild read occurs. The only
419 : : time it is guaranteed to be correct is when the traversal starts
420 : : at active_local_stores. */
421 : : struct insn_info_type * next_local_store;
422 : : };
423 : : typedef struct insn_info_type *insn_info_t;
424 : :
425 : : static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
426 : :
427 : : /* The linked list of stores that are under consideration in this
428 : : basic block. */
429 : : static insn_info_t active_local_stores;
430 : : static int active_local_stores_len;
431 : :
432 : : struct dse_bb_info_type
433 : : {
434 : : /* Pointer to the insn info for the last insn in the block. These
435 : : are linked so this is how all of the insns are reached. During
436 : : scanning this is the current insn being scanned. */
437 : : insn_info_t last_insn;
438 : :
439 : : /* The info for the global dataflow problem. */
440 : :
441 : :
442 : : /* This is set if the transfer function should and in the wild_read
443 : : bitmap before applying the kill and gen sets. That vector knocks
444 : : out most of the bits in the bitmap and thus speeds up the
445 : : operations. */
446 : : bool apply_wild_read;
447 : :
448 : : /* The following 4 bitvectors hold information about which positions
449 : : of which stores are live or dead. They are indexed by
450 : : get_bitmap_index. */
451 : :
452 : : /* The set of store positions that exist in this block before a wild read. */
453 : : bitmap gen;
454 : :
455 : : /* The set of load positions that exist in this block above the
456 : : same position of a store. */
457 : : bitmap kill;
458 : :
459 : : /* The set of stores that reach the top of the block without being
460 : : killed by a read.
461 : :
462 : : Do not represent the in if it is all ones. Note that this is
463 : : what the bitvector should logically be initialized to for a set
464 : : intersection problem. However, like the kill set, this is too
465 : : expensive. So initially, the in set will only be created for the
466 : : exit block and any block that contains a wild read. */
467 : : bitmap in;
468 : :
469 : : /* The set of stores that reach the bottom of the block from it's
470 : : successors.
471 : :
472 : : Do not represent the in if it is all ones. Note that this is
473 : : what the bitvector should logically be initialized to for a set
474 : : intersection problem. However, like the kill and in set, this is
475 : : too expensive. So what is done is that the confluence operator
476 : : just initializes the vector from one of the out sets of the
477 : : successors of the block. */
478 : : bitmap out;
479 : :
480 : : /* The following bitvector is indexed by the reg number. It
481 : : contains the set of regs that are live at the current instruction
482 : : being processed. While it contains info for all of the
483 : : registers, only the hard registers are actually examined. It is used
484 : : to assure that shift and/or add sequences that are inserted do not
485 : : accidentally clobber live hard regs. */
486 : : bitmap regs_live;
487 : : };
488 : :
489 : : typedef struct dse_bb_info_type *bb_info_t;
490 : :
491 : : static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
492 : : ("bb_info_pool");
493 : :
494 : : /* Table to hold all bb_infos. */
495 : : static bb_info_t *bb_table;
496 : :
497 : : /* There is a group_info for each rtx base that is used to reference
498 : : memory. There are also not many of the rtx bases because they are
499 : : very limited in scope. */
500 : :
501 : : struct group_info
502 : : {
503 : : /* The actual base of the address. */
504 : : rtx rtx_base;
505 : :
506 : : /* The sequential id of the base. This allows us to have a
507 : : canonical ordering of these that is not based on addresses. */
508 : : int id;
509 : :
510 : : /* True if there are any positions that are to be processed
511 : : globally. */
512 : : bool process_globally;
513 : :
514 : : /* True if the base of this group is either the frame_pointer or
515 : : hard_frame_pointer. */
516 : : bool frame_related;
517 : :
518 : : /* A mem wrapped around the base pointer for the group in order to do
519 : : read dependency. It must be given BLKmode in order to encompass all
520 : : the possible offsets from the base. */
521 : : rtx base_mem;
522 : :
523 : : /* Canonized version of base_mem's address. */
524 : : rtx canon_base_addr;
525 : :
526 : : /* These two sets of two bitmaps are used to keep track of how many
527 : : stores are actually referencing that position from this base. We
528 : : only do this for rtx bases as this will be used to assign
529 : : positions in the bitmaps for the global problem. Bit N is set in
530 : : store1 on the first store for offset N. Bit N is set in store2
531 : : for the second store to offset N. This is all we need since we
532 : : only care about offsets that have two or more stores for them.
533 : :
534 : : The "_n" suffix is for offsets less than 0 and the "_p" suffix is
535 : : for 0 and greater offsets.
536 : :
537 : : There is one special case here, for stores into the stack frame,
538 : : we will or store1 into store2 before deciding which stores look
539 : : at globally. This is because stores to the stack frame that have
540 : : no other reads before the end of the function can also be
541 : : deleted. */
542 : : bitmap store1_n, store1_p, store2_n, store2_p;
543 : :
544 : : /* These bitmaps keep track of offsets in this group escape this function.
545 : : An offset escapes if it corresponds to a named variable whose
546 : : addressable flag is set. */
547 : : bitmap escaped_n, escaped_p;
548 : :
549 : : /* The positions in this bitmap have the same assignments as the in,
550 : : out, gen and kill bitmaps. This bitmap is all zeros except for
551 : : the positions that are occupied by stores for this group. */
552 : : bitmap group_kill;
553 : :
554 : : /* The offset_map is used to map the offsets from this base into
555 : : positions in the global bitmaps. It is only created after all of
556 : : the all of stores have been scanned and we know which ones we
557 : : care about. */
558 : : int *offset_map_n, *offset_map_p;
559 : : int offset_map_size_n, offset_map_size_p;
560 : : };
561 : :
562 : : static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
563 : :
564 : : /* Index into the rtx_group_vec. */
565 : : static int rtx_group_next_id;
566 : :
567 : :
568 : : static vec<group_info *> rtx_group_vec;
569 : :
570 : :
571 : : /* This structure holds the set of changes that are being deferred
572 : : when removing read operation. See replace_read. */
573 : : struct deferred_change
574 : : {
575 : :
576 : : /* The mem that is being replaced. */
577 : : rtx *loc;
578 : :
579 : : /* The reg it is being replaced with. */
580 : : rtx reg;
581 : :
582 : : struct deferred_change *next;
583 : : };
584 : :
585 : : static object_allocator<deferred_change> deferred_change_pool
586 : : ("deferred_change_pool");
587 : :
588 : : static deferred_change *deferred_change_list = NULL;
589 : :
590 : : /* This is true except if cfun->stdarg -- i.e. we cannot do
591 : : this for vararg functions because they play games with the frame. */
592 : : static bool stores_off_frame_dead_at_return;
593 : :
594 : : /* Counter for stats. */
595 : : static int globally_deleted;
596 : : static int locally_deleted;
597 : :
598 : : static bitmap all_blocks;
599 : :
600 : : /* Locations that are killed by calls in the global phase. */
601 : : static bitmap kill_on_calls;
602 : :
603 : : /* The number of bits used in the global bitmaps. */
604 : : static unsigned int current_position;
605 : :
606 : : /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE. */
607 : :
608 : : static void
609 : 0 : print_range (FILE *file, poly_int64 offset, poly_int64 width)
610 : : {
611 : 0 : fprintf (file, "[");
612 : 0 : print_dec (offset, file, SIGNED);
613 : 0 : fprintf (file, "..");
614 : 0 : print_dec (offset + width, file, SIGNED);
615 : 0 : fprintf (file, ")");
616 : 0 : }
617 : :
618 : : /*----------------------------------------------------------------------------
619 : : Zeroth step.
620 : :
621 : : Initialization.
622 : : ----------------------------------------------------------------------------*/
623 : :
624 : :
625 : : /* Hashtable callbacks for maintaining the "bases" field of
626 : : store_group_info, given that the addresses are function invariants. */
627 : :
628 : : struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
629 : : {
630 : : static inline hashval_t hash (const group_info *);
631 : : static inline bool equal (const group_info *, const group_info *);
632 : : };
633 : :
634 : : inline bool
635 : 74730277 : invariant_group_base_hasher::equal (const group_info *gi1,
636 : : const group_info *gi2)
637 : : {
638 : 74730277 : return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
639 : : }
640 : :
641 : : inline hashval_t
642 : 86869664 : invariant_group_base_hasher::hash (const group_info *gi)
643 : : {
644 : 86869664 : int do_not_record;
645 : 86869664 : return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
646 : : }
647 : :
648 : : /* Tables of group_info structures, hashed by base value. */
649 : : static hash_table<invariant_group_base_hasher> *rtx_group_table;
650 : :
651 : :
652 : : /* Get the GROUP for BASE. Add a new group if it is not there. */
653 : :
654 : : static group_info *
655 : 18854591 : get_group_info (rtx base)
656 : : {
657 : 18854591 : struct group_info tmp_gi;
658 : 18854591 : group_info *gi;
659 : 18854591 : group_info **slot;
660 : :
661 : 18854591 : gcc_assert (base != NULL_RTX);
662 : :
663 : : /* Find the store_base_info structure for BASE, creating a new one
664 : : if necessary. */
665 : 18854591 : tmp_gi.rtx_base = base;
666 : 18854591 : slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
667 : 18854591 : gi = *slot;
668 : :
669 : 18854591 : if (gi == NULL)
670 : : {
671 : 5775864 : *slot = gi = group_info_pool.allocate ();
672 : 5775864 : gi->rtx_base = base;
673 : 5775864 : gi->id = rtx_group_next_id++;
674 : 5775864 : gi->base_mem = gen_rtx_MEM (BLKmode, base);
675 : 5775864 : gi->canon_base_addr = canon_rtx (base);
676 : 5775864 : gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
677 : 5775864 : gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
678 : 5775864 : gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
679 : 5775864 : gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
680 : 5775864 : gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
681 : 5775864 : gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
682 : 5775864 : gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
683 : 5775864 : gi->process_globally = false;
684 : 11551728 : gi->frame_related =
685 : 5558030 : (base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx)
686 : 11297718 : || (base == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM]);
687 : 5775864 : gi->offset_map_size_n = 0;
688 : 5775864 : gi->offset_map_size_p = 0;
689 : 5775864 : gi->offset_map_n = NULL;
690 : 5775864 : gi->offset_map_p = NULL;
691 : 5775864 : rtx_group_vec.safe_push (gi);
692 : : }
693 : :
694 : 18854591 : return gi;
695 : : }
696 : :
697 : :
698 : : /* Initialization of data structures. */
699 : :
700 : : static void
701 : 1991506 : dse_step0 (void)
702 : : {
703 : 1991506 : locally_deleted = 0;
704 : 1991506 : globally_deleted = 0;
705 : :
706 : 1991506 : bitmap_obstack_initialize (&dse_bitmap_obstack);
707 : 1991506 : gcc_obstack_init (&dse_obstack);
708 : :
709 : 1991506 : scratch = BITMAP_ALLOC (®_obstack);
710 : 1991506 : kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
711 : :
712 : :
713 : 1991506 : rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
714 : :
715 : 1991506 : bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
716 : 1991506 : rtx_group_next_id = 0;
717 : :
718 : 1991506 : stores_off_frame_dead_at_return = !cfun->stdarg;
719 : :
720 : 1991506 : init_alias_analysis ();
721 : 1991506 : }
722 : :
723 : :
724 : :
725 : : /*----------------------------------------------------------------------------
726 : : First step.
727 : :
728 : : Scan all of the insns. Any random ordering of the blocks is fine.
729 : : Each block is scanned in forward order to accommodate cselib which
730 : : is used to remove stores with non-constant bases.
731 : : ----------------------------------------------------------------------------*/
732 : :
733 : : /* Delete all of the store_info recs from INSN_INFO. */
734 : :
735 : : static void
736 : 13917447 : free_store_info (insn_info_t insn_info)
737 : : {
738 : 13917447 : store_info *cur = insn_info->store_rec;
739 : 27881732 : while (cur)
740 : : {
741 : 13964285 : store_info *next = cur->next;
742 : 13964285 : if (cur->is_large)
743 : 10719 : BITMAP_FREE (cur->positions_needed.large.bmap);
744 : 13964285 : if (cur->cse_base)
745 : 13938685 : cse_store_info_pool.remove (cur);
746 : : else
747 : 25600 : rtx_store_info_pool.remove (cur);
748 : : cur = next;
749 : : }
750 : :
751 : 13917447 : insn_info->cannot_delete = true;
752 : 13917447 : insn_info->contains_cselib_groups = false;
753 : 13917447 : insn_info->store_rec = NULL;
754 : 13917447 : }
755 : :
756 : : struct note_add_store_info
757 : : {
758 : : rtx_insn *first, *current;
759 : : regset fixed_regs_live;
760 : : bool failure;
761 : : };
762 : :
763 : : /* Callback for emit_inc_dec_insn_before via note_stores.
764 : : Check if a register is clobbered which is live afterwards. */
765 : :
766 : : static void
767 : 0 : note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
768 : : {
769 : 0 : rtx_insn *insn;
770 : 0 : note_add_store_info *info = (note_add_store_info *) data;
771 : :
772 : 0 : if (!REG_P (loc))
773 : : return;
774 : :
775 : : /* If this register is referenced by the current or an earlier insn,
776 : : that's OK. E.g. this applies to the register that is being incremented
777 : : with this addition. */
778 : 0 : for (insn = info->first;
779 : 0 : insn != NEXT_INSN (info->current);
780 : 0 : insn = NEXT_INSN (insn))
781 : 0 : if (reg_referenced_p (loc, PATTERN (insn)))
782 : : return;
783 : :
784 : : /* If we come here, we have a clobber of a register that's only OK
785 : : if that register is not live. If we don't have liveness information
786 : : available, fail now. */
787 : 0 : if (!info->fixed_regs_live)
788 : : {
789 : 0 : info->failure = true;
790 : 0 : return;
791 : : }
792 : : /* Now check if this is a live fixed register. */
793 : 0 : unsigned int end_regno = END_REGNO (loc);
794 : 0 : for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
795 : 0 : if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
796 : 0 : info->failure = true;
797 : : }
798 : :
799 : : /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
800 : : SRC + SRCOFF before insn ARG. */
801 : :
802 : : static int
803 : 0 : emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
804 : : rtx op ATTRIBUTE_UNUSED,
805 : : rtx dest, rtx src, rtx srcoff, void *arg)
806 : : {
807 : 0 : insn_info_t insn_info = (insn_info_t) arg;
808 : 0 : rtx_insn *insn = insn_info->insn, *new_insn, *cur;
809 : 0 : note_add_store_info info;
810 : :
811 : : /* We can reuse all operands without copying, because we are about
812 : : to delete the insn that contained it. */
813 : 0 : if (srcoff)
814 : : {
815 : 0 : start_sequence ();
816 : 0 : emit_insn (gen_add3_insn (dest, src, srcoff));
817 : 0 : new_insn = get_insns ();
818 : 0 : end_sequence ();
819 : : }
820 : : else
821 : 0 : new_insn = gen_move_insn (dest, src);
822 : 0 : info.first = new_insn;
823 : 0 : info.fixed_regs_live = insn_info->fixed_regs_live;
824 : 0 : info.failure = false;
825 : 0 : for (cur = new_insn; cur; cur = NEXT_INSN (cur))
826 : : {
827 : 0 : info.current = cur;
828 : 0 : note_stores (cur, note_add_store, &info);
829 : : }
830 : :
831 : : /* If a failure was flagged above, return 1 so that for_each_inc_dec will
832 : : return it immediately, communicating the failure to its caller. */
833 : 0 : if (info.failure)
834 : : return 1;
835 : :
836 : 0 : emit_insn_before (new_insn, insn);
837 : :
838 : 0 : return 0;
839 : : }
840 : :
841 : : /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
842 : : is there, is split into a separate insn.
843 : : Return true on success (or if there was nothing to do), false on failure. */
844 : :
845 : : static bool
846 : 75256 : check_for_inc_dec_1 (insn_info_t insn_info)
847 : : {
848 : 75256 : rtx_insn *insn = insn_info->insn;
849 : 75256 : rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
850 : 75256 : if (note)
851 : 0 : return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
852 : 0 : insn_info) == 0;
853 : :
854 : : /* Punt on stack pushes, those don't have REG_INC notes and we are
855 : : unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */
856 : 75256 : subrtx_iterator::array_type array;
857 : 511876 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
858 : : {
859 : 436620 : const_rtx x = *iter;
860 : 436620 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
861 : 0 : return false;
862 : : }
863 : :
864 : 75256 : return true;
865 : 75256 : }
866 : :
867 : :
868 : : /* Entry point for postreload. If you work on reload_cse, or you need this
869 : : anywhere else, consider if you can provide register liveness information
870 : : and add a parameter to this function so that it can be passed down in
871 : : insn_info.fixed_regs_live. */
872 : : bool
873 : 246936 : check_for_inc_dec (rtx_insn *insn)
874 : : {
875 : 246936 : insn_info_type insn_info;
876 : 246936 : rtx note;
877 : :
878 : 246936 : insn_info.insn = insn;
879 : 246936 : insn_info.fixed_regs_live = NULL;
880 : 246936 : note = find_reg_note (insn, REG_INC, NULL_RTX);
881 : 246936 : if (note)
882 : 0 : return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
883 : 0 : &insn_info) == 0;
884 : :
885 : : /* Punt on stack pushes, those don't have REG_INC notes and we are
886 : : unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */
887 : 246936 : subrtx_iterator::array_type array;
888 : 1239458 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
889 : : {
890 : 992522 : const_rtx x = *iter;
891 : 992522 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
892 : 0 : return false;
893 : : }
894 : :
895 : 246936 : return true;
896 : 246936 : }
897 : :
898 : : /* Delete the insn and free all of the fields inside INSN_INFO. */
899 : :
900 : : static void
901 : 35050 : delete_dead_store_insn (insn_info_t insn_info)
902 : : {
903 : 35050 : read_info_t read_info;
904 : :
905 : 35050 : if (!dbg_cnt (dse))
906 : : return;
907 : :
908 : 35050 : if (!check_for_inc_dec_1 (insn_info))
909 : : return;
910 : 35050 : if (dump_file && (dump_flags & TDF_DETAILS))
911 : 0 : fprintf (dump_file, "Locally deleting insn %d\n",
912 : 0 : INSN_UID (insn_info->insn));
913 : :
914 : 35050 : free_store_info (insn_info);
915 : 35050 : read_info = insn_info->read_rec;
916 : :
917 : 35112 : while (read_info)
918 : : {
919 : 62 : read_info_t next = read_info->next;
920 : 62 : read_info_type_pool.remove (read_info);
921 : 62 : read_info = next;
922 : : }
923 : 35050 : insn_info->read_rec = NULL;
924 : :
925 : 35050 : delete_insn (insn_info->insn);
926 : 35050 : locally_deleted++;
927 : 35050 : insn_info->insn = NULL;
928 : :
929 : 35050 : insn_info->wild_read = false;
930 : : }
931 : :
932 : : /* Return whether DECL, a local variable, can possibly escape the current
933 : : function scope. */
934 : :
935 : : static bool
936 : 1088276 : local_variable_can_escape (tree decl)
937 : : {
938 : 1088276 : if (TREE_ADDRESSABLE (decl))
939 : : return true;
940 : :
941 : : /* If this is a partitioned variable, we need to consider all the variables
942 : : in the partition. This is necessary because a store into one of them can
943 : : be replaced with a store into another and this may not change the outcome
944 : : of the escape analysis. */
945 : 1088276 : if (cfun->gimple_df->decls_to_pointers != NULL)
946 : : {
947 : 266699 : tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
948 : 266699 : if (namep)
949 : 0 : return TREE_ADDRESSABLE (*namep);
950 : : }
951 : :
952 : : return false;
953 : : }
954 : :
955 : : /* Return whether EXPR can possibly escape the current function scope. */
956 : :
957 : : static bool
958 : 5074807 : can_escape (tree expr)
959 : : {
960 : 5074807 : tree base;
961 : 5074807 : if (!expr)
962 : : return true;
963 : 4823969 : base = get_base_address (expr);
964 : 4823969 : if (DECL_P (base)
965 : 3784000 : && !may_be_aliased (base)
966 : 7226723 : && !(VAR_P (base)
967 : 1303094 : && !DECL_EXTERNAL (base)
968 : 1303090 : && !TREE_STATIC (base)
969 : 1088276 : && local_variable_can_escape (base)))
970 : 1314478 : return false;
971 : : return true;
972 : : }
973 : :
974 : : /* Set the store* bitmaps offset_map_size* fields in GROUP based on
975 : : OFFSET and WIDTH. */
976 : :
977 : : static void
978 : 5074807 : set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
979 : : tree expr)
980 : : {
981 : : /* Non-constant offsets and widths act as global kills, so there's no point
982 : : trying to use them to derive global DSE candidates. */
983 : 5074807 : HOST_WIDE_INT i, const_offset, const_width;
984 : 5074807 : bool expr_escapes = can_escape (expr);
985 : 5074807 : if (offset.is_constant (&const_offset)
986 : 5074807 : && width.is_constant (&const_width)
987 : 5074807 : && const_offset > -MAX_OFFSET
988 : 5061776 : && const_offset + const_width < MAX_OFFSET)
989 : 62502533 : for (i = const_offset; i < const_offset + const_width; ++i)
990 : : {
991 : 57444349 : bitmap store1;
992 : 57444349 : bitmap store2;
993 : 57444349 : bitmap escaped;
994 : 57444349 : int ai;
995 : 57444349 : if (i < 0)
996 : : {
997 : 43565454 : store1 = group->store1_n;
998 : 43565454 : store2 = group->store2_n;
999 : 43565454 : escaped = group->escaped_n;
1000 : 43565454 : ai = -i;
1001 : : }
1002 : : else
1003 : : {
1004 : 13878895 : store1 = group->store1_p;
1005 : 13878895 : store2 = group->store2_p;
1006 : 13878895 : escaped = group->escaped_p;
1007 : 13878895 : ai = i;
1008 : : }
1009 : :
1010 : 57444349 : if (!bitmap_set_bit (store1, ai))
1011 : 17143747 : bitmap_set_bit (store2, ai);
1012 : : else
1013 : : {
1014 : 40300602 : if (i < 0)
1015 : : {
1016 : 32295693 : if (group->offset_map_size_n < ai)
1017 : 413635 : group->offset_map_size_n = ai;
1018 : : }
1019 : : else
1020 : : {
1021 : 8004909 : if (group->offset_map_size_p < ai)
1022 : 7342005 : group->offset_map_size_p = ai;
1023 : : }
1024 : : }
1025 : 57444349 : if (expr_escapes)
1026 : 43048457 : bitmap_set_bit (escaped, ai);
1027 : : }
1028 : 5074807 : }
1029 : :
1030 : : static void
1031 : 11409717 : reset_active_stores (void)
1032 : : {
1033 : 11409717 : active_local_stores = NULL;
1034 : 11409717 : active_local_stores_len = 0;
1035 : 0 : }
1036 : :
1037 : : /* Free all READ_REC of the LAST_INSN of BB_INFO. */
1038 : :
1039 : : static void
1040 : 11409717 : free_read_records (bb_info_t bb_info)
1041 : : {
1042 : 11409717 : insn_info_t insn_info = bb_info->last_insn;
1043 : 11409717 : read_info_t *ptr = &insn_info->read_rec;
1044 : 20192342 : while (*ptr)
1045 : : {
1046 : 8782625 : read_info_t next = (*ptr)->next;
1047 : 8782625 : read_info_type_pool.remove (*ptr);
1048 : 8782625 : *ptr = next;
1049 : : }
1050 : 11409717 : }
1051 : :
1052 : : /* Set the BB_INFO so that the last insn is marked as a wild read. */
1053 : :
1054 : : static void
1055 : 2767932 : add_wild_read (bb_info_t bb_info)
1056 : : {
1057 : 2767932 : insn_info_t insn_info = bb_info->last_insn;
1058 : 2767932 : insn_info->wild_read = true;
1059 : 2767932 : free_read_records (bb_info);
1060 : 2767932 : reset_active_stores ();
1061 : 2767932 : }
1062 : :
1063 : : /* Set the BB_INFO so that the last insn is marked as a wild read of
1064 : : non-frame locations. */
1065 : :
1066 : : static void
1067 : 8641785 : add_non_frame_wild_read (bb_info_t bb_info)
1068 : : {
1069 : 8641785 : insn_info_t insn_info = bb_info->last_insn;
1070 : 8641785 : insn_info->non_frame_wild_read = true;
1071 : 8641785 : free_read_records (bb_info);
1072 : 8641785 : reset_active_stores ();
1073 : 8641785 : }
1074 : :
1075 : : /* Return true if X is a constant or one of the registers that behave
1076 : : as a constant over the life of a function. This is equivalent to
1077 : : !rtx_varies_p for memory addresses. */
1078 : :
1079 : : static bool
1080 : 72557605 : const_or_frame_p (rtx x)
1081 : : {
1082 : 72557605 : if (CONSTANT_P (x))
1083 : : return true;
1084 : :
1085 : 60720098 : if (GET_CODE (x) == REG)
1086 : : {
1087 : : /* Note that we have to test for the actual rtx used for the frame
1088 : : and arg pointers and not just the register number in case we have
1089 : : eliminated the frame and/or arg pointer and are using it
1090 : : for pseudos. */
1091 : 45336269 : if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1092 : : /* The arg pointer varies if it is not a fixed register. */
1093 : 38756399 : || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1094 : 38319185 : || x == pic_offset_table_rtx)
1095 : : return true;
1096 : 38319185 : return false;
1097 : : }
1098 : :
1099 : : return false;
1100 : : }
1101 : :
1102 : : /* Take all reasonable action to put the address of MEM into the form
1103 : : that we can do analysis on.
1104 : :
1105 : : The gold standard is to get the address into the form: address +
1106 : : OFFSET where address is something that rtx_varies_p considers a
1107 : : constant. When we can get the address in this form, we can do
1108 : : global analysis on it. Note that for constant bases, address is
1109 : : not actually returned, only the group_id. The address can be
1110 : : obtained from that.
1111 : :
1112 : : If that fails, we try cselib to get a value we can at least use
1113 : : locally. If that fails we return false.
1114 : :
1115 : : The GROUP_ID is set to -1 for cselib bases and the index of the
1116 : : group for non_varying bases.
1117 : :
1118 : : FOR_READ is true if this is a mem read and false if not. */
1119 : :
1120 : : static bool
1121 : 45696940 : canon_address (rtx mem,
1122 : : int *group_id,
1123 : : poly_int64 *offset,
1124 : : cselib_val **base)
1125 : : {
1126 : 45696940 : machine_mode address_mode = get_address_mode (mem);
1127 : 45696940 : rtx mem_address = XEXP (mem, 0);
1128 : 45696940 : rtx expanded_address, address;
1129 : 45696940 : int expanded;
1130 : :
1131 : 45696940 : cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1132 : :
1133 : 45696940 : if (dump_file && (dump_flags & TDF_DETAILS))
1134 : : {
1135 : 0 : fprintf (dump_file, " mem: ");
1136 : 0 : print_inline_rtx (dump_file, mem_address, 0);
1137 : 0 : fprintf (dump_file, "\n");
1138 : : }
1139 : :
1140 : : /* First see if just canon_rtx (mem_address) is const or frame,
1141 : : if not, try cselib_expand_value_rtx and call canon_rtx on that. */
1142 : : address = NULL_RTX;
1143 : 99425454 : for (expanded = 0; expanded < 2; expanded++)
1144 : : {
1145 : 72583117 : if (expanded)
1146 : : {
1147 : : /* Use cselib to replace all of the reg references with the full
1148 : : expression. This will take care of the case where we have
1149 : :
1150 : : r_x = base + offset;
1151 : : val = *r_x;
1152 : :
1153 : : by making it into
1154 : :
1155 : : val = *(base + offset); */
1156 : :
1157 : 26886177 : expanded_address = cselib_expand_value_rtx (mem_address,
1158 : : scratch, 5);
1159 : :
1160 : : /* If this fails, just go with the address from first
1161 : : iteration. */
1162 : 26886177 : if (!expanded_address)
1163 : : break;
1164 : : }
1165 : : else
1166 : : expanded_address = mem_address;
1167 : :
1168 : : /* Split the address into canonical BASE + OFFSET terms. */
1169 : 72583105 : address = canon_rtx (expanded_address);
1170 : :
1171 : 72583105 : *offset = 0;
1172 : :
1173 : 72583105 : if (dump_file && (dump_flags & TDF_DETAILS))
1174 : : {
1175 : 0 : if (expanded)
1176 : : {
1177 : 0 : fprintf (dump_file, "\n after cselib_expand address: ");
1178 : 0 : print_inline_rtx (dump_file, expanded_address, 0);
1179 : 0 : fprintf (dump_file, "\n");
1180 : : }
1181 : :
1182 : 0 : fprintf (dump_file, "\n after canon_rtx address: ");
1183 : 0 : print_inline_rtx (dump_file, address, 0);
1184 : 0 : fprintf (dump_file, "\n");
1185 : : }
1186 : :
1187 : 72583105 : if (GET_CODE (address) == CONST)
1188 : 1030673 : address = XEXP (address, 0);
1189 : :
1190 : 72583105 : address = strip_offset_and_add (address, offset);
1191 : :
1192 : 72583105 : if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1193 : 72583105 : && const_or_frame_p (address))
1194 : : {
1195 : 18854591 : group_info *group = get_group_info (address);
1196 : :
1197 : 18854591 : if (dump_file && (dump_flags & TDF_DETAILS))
1198 : : {
1199 : 0 : fprintf (dump_file, " gid=%d offset=", group->id);
1200 : 0 : print_dec (*offset, dump_file);
1201 : 0 : fprintf (dump_file, "\n");
1202 : : }
1203 : 18854591 : *base = NULL;
1204 : 18854591 : *group_id = group->id;
1205 : 18854591 : return true;
1206 : : }
1207 : : }
1208 : :
1209 : 26842349 : *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1210 : 26842349 : *group_id = -1;
1211 : :
1212 : 26842349 : if (*base == NULL)
1213 : : {
1214 : 176699 : if (dump_file && (dump_flags & TDF_DETAILS))
1215 : 0 : fprintf (dump_file, " no cselib val - should be a wild read.\n");
1216 : 176699 : return false;
1217 : : }
1218 : 26665650 : if (dump_file && (dump_flags & TDF_DETAILS))
1219 : : {
1220 : 0 : fprintf (dump_file, " varying cselib base=%u:%u offset = ",
1221 : : (*base)->uid, (*base)->hash);
1222 : 0 : print_dec (*offset, dump_file);
1223 : 0 : fprintf (dump_file, "\n");
1224 : : }
1225 : : return true;
1226 : : }
1227 : :
1228 : :
1229 : : /* Clear the rhs field from the active_local_stores array. */
1230 : :
1231 : : static void
1232 : 85474 : clear_rhs_from_active_local_stores (void)
1233 : : {
1234 : 85474 : insn_info_t ptr = active_local_stores;
1235 : :
1236 : 121606 : while (ptr)
1237 : : {
1238 : 36132 : store_info *store_info = ptr->store_rec;
1239 : : /* Skip the clobbers. */
1240 : 36132 : while (!store_info->is_set)
1241 : 0 : store_info = store_info->next;
1242 : :
1243 : 36132 : store_info->rhs = NULL;
1244 : 36132 : store_info->const_rhs = NULL;
1245 : :
1246 : 36132 : ptr = ptr->next_local_store;
1247 : : }
1248 : 85474 : }
1249 : :
1250 : :
1251 : : /* Mark byte POS bytes from the beginning of store S_INFO as unneeded. */
1252 : :
1253 : : static inline void
1254 : 757186 : set_position_unneeded (store_info *s_info, int pos)
1255 : : {
1256 : 757186 : if (UNLIKELY (s_info->is_large))
1257 : : {
1258 : 418688 : if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1259 : 418612 : s_info->positions_needed.large.count++;
1260 : : }
1261 : : else
1262 : 338498 : s_info->positions_needed.small_bitmask
1263 : 338498 : &= ~(HOST_WIDE_INT_1U << pos);
1264 : 757186 : }
1265 : :
1266 : : /* Mark the whole store S_INFO as unneeded. */
1267 : :
1268 : : static inline void
1269 : 249402 : set_all_positions_unneeded (store_info *s_info)
1270 : : {
1271 : 249402 : if (UNLIKELY (s_info->is_large))
1272 : : {
1273 : 0 : HOST_WIDE_INT width;
1274 : 0 : if (s_info->width.is_constant (&width))
1275 : : {
1276 : 0 : bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1277 : 0 : s_info->positions_needed.large.count = width;
1278 : : }
1279 : : else
1280 : : {
1281 : : gcc_checking_assert (!s_info->positions_needed.large.bmap);
1282 : : s_info->positions_needed.large.count = 1;
1283 : : }
1284 : : }
1285 : : else
1286 : 249402 : s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1287 : 249402 : }
1288 : :
1289 : : /* Return TRUE if any bytes from S_INFO store are needed. */
1290 : :
1291 : : static inline bool
1292 : 99647296 : any_positions_needed_p (store_info *s_info)
1293 : : {
1294 : 99647296 : if (UNLIKELY (s_info->is_large))
1295 : : {
1296 : 107950 : HOST_WIDE_INT width;
1297 : 107950 : if (s_info->width.is_constant (&width))
1298 : : {
1299 : 107950 : gcc_checking_assert (s_info->positions_needed.large.bmap);
1300 : 107950 : return s_info->positions_needed.large.count < width;
1301 : : }
1302 : : else
1303 : : {
1304 : : gcc_checking_assert (!s_info->positions_needed.large.bmap);
1305 : : return s_info->positions_needed.large.count == 0;
1306 : : }
1307 : : }
1308 : : else
1309 : 99539346 : return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1310 : : }
1311 : :
1312 : : /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1313 : : store are known to be needed. */
1314 : :
1315 : : static inline bool
1316 : 206966 : all_positions_needed_p (store_info *s_info, poly_int64 start,
1317 : : poly_int64 width)
1318 : : {
1319 : 206966 : gcc_assert (s_info->rhs);
1320 : 206966 : if (!s_info->width.is_constant ())
1321 : : {
1322 : : gcc_assert (s_info->is_large
1323 : : && !s_info->positions_needed.large.bmap);
1324 : : return s_info->positions_needed.large.count == 0;
1325 : : }
1326 : :
1327 : : /* Otherwise, if START and WIDTH are non-constant, we're asking about
1328 : : a non-constant region of a constant-sized store. We can't say for
1329 : : sure that all positions are needed. */
1330 : 206966 : HOST_WIDE_INT const_start, const_width;
1331 : 206966 : if (!start.is_constant (&const_start)
1332 : 206966 : || !width.is_constant (&const_width))
1333 : : return false;
1334 : :
1335 : 206966 : if (UNLIKELY (s_info->is_large))
1336 : : {
1337 : 148258 : for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1338 : 133635 : if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1339 : : return false;
1340 : : return true;
1341 : : }
1342 : : else
1343 : : {
1344 : 192306 : unsigned HOST_WIDE_INT mask
1345 : 192306 : = lowpart_bitmask (const_width) << const_start;
1346 : 192306 : return (s_info->positions_needed.small_bitmask & mask) == mask;
1347 : : }
1348 : : }
1349 : :
1350 : :
1351 : : static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1352 : : poly_int64, basic_block, bool);
1353 : :
1354 : :
1355 : : /* BODY is an instruction pattern that belongs to INSN. Return 1 if
1356 : : there is a candidate store, after adding it to the appropriate
1357 : : local store group if so. */
1358 : :
1359 : : static int
1360 : 123301664 : record_store (rtx body, bb_info_t bb_info)
1361 : : {
1362 : 123301664 : rtx mem, rhs, const_rhs, mem_addr;
1363 : 123301664 : poly_int64 offset = 0;
1364 : 123301664 : poly_int64 width = 0;
1365 : 123301664 : insn_info_t insn_info = bb_info->last_insn;
1366 : 123301664 : store_info *store_info = NULL;
1367 : 123301664 : int group_id;
1368 : 123301664 : cselib_val *base = NULL;
1369 : 123301664 : insn_info_t ptr, last, redundant_reason;
1370 : 123301664 : bool store_is_unused;
1371 : :
1372 : 123301664 : if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1373 : : return 0;
1374 : :
1375 : 119950544 : mem = SET_DEST (body);
1376 : :
1377 : : /* If this is not used, then this cannot be used to keep the insn
1378 : : from being deleted. On the other hand, it does provide something
1379 : : that can be used to prove that another store is dead. */
1380 : 119950544 : store_is_unused
1381 : 119950544 : = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1382 : :
1383 : : /* Check whether that value is a suitable memory location. */
1384 : 119950544 : if (!MEM_P (mem))
1385 : : {
1386 : : /* If the set or clobber is unused, then it does not effect our
1387 : : ability to get rid of the entire insn. */
1388 : 99484474 : if (!store_is_unused)
1389 : 84463968 : insn_info->cannot_delete = true;
1390 : 99484474 : return 0;
1391 : : }
1392 : :
1393 : : /* At this point we know mem is a mem. */
1394 : 20466070 : if (GET_MODE (mem) == BLKmode)
1395 : : {
1396 : 1473356 : HOST_WIDE_INT const_size;
1397 : 1473356 : if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1398 : : {
1399 : 1388803 : if (dump_file && (dump_flags & TDF_DETAILS))
1400 : 0 : fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1401 : 1388803 : add_wild_read (bb_info);
1402 : 1388803 : insn_info->cannot_delete = true;
1403 : 1388803 : return 0;
1404 : : }
1405 : : /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1406 : : as memset (addr, 0, 36); */
1407 : 84553 : else if (!MEM_SIZE_KNOWN_P (mem)
1408 : 54335 : || maybe_le (MEM_SIZE (mem), 0)
1409 : : /* This is a limit on the bitmap size, which is only relevant
1410 : : for constant-sized MEMs. */
1411 : 54335 : || (MEM_SIZE (mem).is_constant (&const_size)
1412 : 54335 : && const_size > MAX_OFFSET)
1413 : 53833 : || GET_CODE (body) != SET
1414 : 138384 : || !CONST_INT_P (SET_SRC (body)))
1415 : : {
1416 : 56436 : if (!store_is_unused)
1417 : : {
1418 : : /* If the set or clobber is unused, then it does not effect our
1419 : : ability to get rid of the entire insn. */
1420 : 56436 : insn_info->cannot_delete = true;
1421 : 56436 : clear_rhs_from_active_local_stores ();
1422 : : }
1423 : 56436 : return 0;
1424 : : }
1425 : : }
1426 : :
1427 : : /* We can still process a volatile mem, we just cannot delete it. */
1428 : 19020831 : if (MEM_VOLATILE_P (mem))
1429 : 630264 : insn_info->cannot_delete = true;
1430 : :
1431 : 19020831 : if (!canon_address (mem, &group_id, &offset, &base))
1432 : : {
1433 : 7296 : clear_rhs_from_active_local_stores ();
1434 : 7296 : return 0;
1435 : : }
1436 : :
1437 : 19013535 : if (GET_MODE (mem) == BLKmode)
1438 : 28109 : width = MEM_SIZE (mem);
1439 : : else
1440 : 37970852 : width = GET_MODE_SIZE (GET_MODE (mem));
1441 : :
1442 : 38027070 : if (!endpoint_representable_p (offset, width))
1443 : : {
1444 : 23 : clear_rhs_from_active_local_stores ();
1445 : 23 : return 0;
1446 : : }
1447 : :
1448 : 19013512 : if (known_eq (width, 0))
1449 : : return 0;
1450 : :
1451 : 19013492 : if (group_id >= 0)
1452 : : {
1453 : : /* In the restrictive case where the base is a constant or the
1454 : : frame pointer we can do global analysis. */
1455 : :
1456 : 5074807 : group_info *group
1457 : 5074807 : = rtx_group_vec[group_id];
1458 : 5074807 : tree expr = MEM_EXPR (mem);
1459 : :
1460 : 5074807 : store_info = rtx_store_info_pool.allocate ();
1461 : 5074807 : set_usage_bits (group, offset, width, expr);
1462 : :
1463 : 5074807 : if (dump_file && (dump_flags & TDF_DETAILS))
1464 : : {
1465 : 0 : fprintf (dump_file, " processing const base store gid=%d",
1466 : : group_id);
1467 : 0 : print_range (dump_file, offset, width);
1468 : 0 : fprintf (dump_file, "\n");
1469 : : }
1470 : : }
1471 : : else
1472 : : {
1473 : 13938685 : if (may_be_sp_based_p (XEXP (mem, 0)))
1474 : 12979761 : insn_info->stack_pointer_based = true;
1475 : 13938685 : insn_info->contains_cselib_groups = true;
1476 : :
1477 : 13938685 : store_info = cse_store_info_pool.allocate ();
1478 : 13938685 : group_id = -1;
1479 : :
1480 : 13938685 : if (dump_file && (dump_flags & TDF_DETAILS))
1481 : : {
1482 : 0 : fprintf (dump_file, " processing cselib store ");
1483 : 0 : print_range (dump_file, offset, width);
1484 : 0 : fprintf (dump_file, "\n");
1485 : : }
1486 : : }
1487 : :
1488 : 19013492 : const_rhs = rhs = NULL_RTX;
1489 : 19013492 : if (GET_CODE (body) == SET
1490 : : /* No place to keep the value after ra. */
1491 : 19012519 : && !reload_completed
1492 : 7788073 : && (REG_P (SET_SRC (body))
1493 : 2402700 : || GET_CODE (SET_SRC (body)) == SUBREG
1494 : 2323854 : || CONSTANT_P (SET_SRC (body)))
1495 : 7613699 : && !MEM_VOLATILE_P (mem)
1496 : : /* Sometimes the store and reload is used for truncation and
1497 : : rounding. */
1498 : 26367757 : && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1499 : : {
1500 : 7353402 : rhs = SET_SRC (body);
1501 : 7353402 : if (CONSTANT_P (rhs))
1502 : : const_rhs = rhs;
1503 : 5334566 : else if (body == PATTERN (insn_info->insn))
1504 : : {
1505 : 5334563 : rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1506 : 5334563 : if (tem && CONSTANT_P (XEXP (tem, 0)))
1507 : : const_rhs = XEXP (tem, 0);
1508 : : }
1509 : 5334566 : if (const_rhs == NULL_RTX && REG_P (rhs))
1510 : : {
1511 : 5266804 : rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1512 : :
1513 : 5266804 : if (tem && CONSTANT_P (tem))
1514 : : const_rhs = tem;
1515 : : else
1516 : : {
1517 : : /* If RHS is set only once to a constant, set CONST_RHS
1518 : : to the constant. */
1519 : 4811398 : rtx def_src = df_find_single_def_src (rhs);
1520 : 4811398 : if (def_src != nullptr && CONSTANT_P (def_src))
1521 : 184875 : const_rhs = def_src;
1522 : : }
1523 : : }
1524 : : }
1525 : :
1526 : : /* Check to see if this stores causes some other stores to be
1527 : : dead. */
1528 : 19013492 : ptr = active_local_stores;
1529 : 19013492 : last = NULL;
1530 : 19013492 : redundant_reason = NULL;
1531 : 19013492 : unsigned char addrspace = MEM_ADDR_SPACE (mem);
1532 : 19013492 : mem = canon_rtx (mem);
1533 : :
1534 : 19013492 : if (group_id < 0)
1535 : 13938685 : mem_addr = base->val_rtx;
1536 : : else
1537 : : {
1538 : 5074807 : group_info *group = rtx_group_vec[group_id];
1539 : 5074807 : mem_addr = group->canon_base_addr;
1540 : : }
1541 : 19013492 : if (maybe_ne (offset, 0))
1542 : 11326830 : mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1543 : :
1544 : 118660788 : while (ptr)
1545 : : {
1546 : 99647296 : insn_info_t next = ptr->next_local_store;
1547 : 99647296 : class store_info *s_info = ptr->store_rec;
1548 : 99647296 : bool del = true;
1549 : :
1550 : : /* Skip the clobbers. We delete the active insn if this insn
1551 : : shadows the set. To have been put on the active list, it
1552 : : has exactly on set. */
1553 : 99647296 : while (!s_info->is_set)
1554 : 0 : s_info = s_info->next;
1555 : :
1556 : 99647296 : if (s_info->group_id == group_id
1557 : 95421076 : && s_info->cse_base == base
1558 : 80463919 : && s_info->addrspace == addrspace)
1559 : : {
1560 : 80463917 : HOST_WIDE_INT i;
1561 : 80463917 : if (dump_file && (dump_flags & TDF_DETAILS))
1562 : : {
1563 : 0 : fprintf (dump_file, " trying store in insn=%d gid=%d",
1564 : 0 : INSN_UID (ptr->insn), s_info->group_id);
1565 : 0 : print_range (dump_file, s_info->offset, s_info->width);
1566 : 0 : fprintf (dump_file, "\n");
1567 : : }
1568 : :
1569 : : /* Even if PTR won't be eliminated as unneeded, if both
1570 : : PTR and this insn store the same constant value, we might
1571 : : eliminate this insn instead. */
1572 : 80463917 : if (s_info->const_rhs
1573 : 21000603 : && const_rhs
1574 : 18168193 : && known_subrange_p (offset, width,
1575 : 18168193 : s_info->offset, s_info->width)
1576 : 40769 : && all_positions_needed_p (s_info, offset - s_info->offset,
1577 : : width)
1578 : : /* We can only remove the later store if the earlier aliases
1579 : : at least all accesses the later one. */
1580 : 80504518 : && mems_same_for_tbaa_p (s_info->mem, mem))
1581 : : {
1582 : 40515 : if (GET_MODE (mem) == BLKmode)
1583 : : {
1584 : 4 : if (GET_MODE (s_info->mem) == BLKmode
1585 : 4 : && s_info->const_rhs == const_rhs)
1586 : 229 : redundant_reason = ptr;
1587 : : }
1588 : 40511 : else if (s_info->const_rhs == const0_rtx
1589 : 35316 : && const_rhs == const0_rtx)
1590 : : redundant_reason = ptr;
1591 : : else
1592 : : {
1593 : 40495 : rtx val;
1594 : 40495 : start_sequence ();
1595 : 80990 : val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1596 : 40495 : BLOCK_FOR_INSN (insn_info->insn),
1597 : : true);
1598 : 40495 : if (get_insns () != NULL)
1599 : 0 : val = NULL_RTX;
1600 : 40495 : end_sequence ();
1601 : 40495 : if (val && rtx_equal_p (val, const_rhs))
1602 : : redundant_reason = ptr;
1603 : : }
1604 : : }
1605 : :
1606 : 80463917 : HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1607 : 80463917 : if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1608 : : /* The new store touches every byte that S_INFO does. */
1609 : 249402 : set_all_positions_unneeded (s_info);
1610 : 80214515 : else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1611 : 80214515 : && s_info->width.is_constant (&const_s_width)
1612 : 80214515 : && width.is_constant (&const_width))
1613 : : {
1614 : 80214515 : HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1615 : 80214515 : begin_unneeded = MAX (begin_unneeded, 0);
1616 : 80214515 : end_unneeded = MIN (end_unneeded, const_s_width);
1617 : 80971701 : for (i = begin_unneeded; i < end_unneeded; ++i)
1618 : 757186 : set_position_unneeded (s_info, i);
1619 : : }
1620 : : else
1621 : : {
1622 : : /* We don't know which parts of S_INFO are needed and
1623 : : which aren't, so invalidate the RHS. */
1624 : : s_info->rhs = NULL;
1625 : : s_info->const_rhs = NULL;
1626 : : }
1627 : : }
1628 : 19183379 : else if (s_info->rhs)
1629 : : /* Need to see if it is possible for this store to overwrite
1630 : : the value of store_info. If it is, set the rhs to NULL to
1631 : : keep it from being used to remove a load. */
1632 : : {
1633 : 3199581 : if (canon_output_dependence (s_info->mem, true,
1634 : 3199581 : mem, GET_MODE (mem),
1635 : : mem_addr))
1636 : : {
1637 : 1619371 : s_info->rhs = NULL;
1638 : 1619371 : s_info->const_rhs = NULL;
1639 : : }
1640 : : }
1641 : :
1642 : : /* An insn can be deleted if every position of every one of
1643 : : its s_infos is zero. */
1644 : 99647296 : if (any_positions_needed_p (s_info))
1645 : : del = false;
1646 : :
1647 : 251174 : if (del)
1648 : : {
1649 : 251174 : insn_info_t insn_to_delete = ptr;
1650 : :
1651 : 251174 : active_local_stores_len--;
1652 : 251174 : if (last)
1653 : 21590 : last->next_local_store = ptr->next_local_store;
1654 : : else
1655 : 229584 : active_local_stores = ptr->next_local_store;
1656 : :
1657 : 251174 : if (!insn_to_delete->cannot_delete)
1658 : 24374 : delete_dead_store_insn (insn_to_delete);
1659 : : }
1660 : : else
1661 : : last = ptr;
1662 : :
1663 : : ptr = next;
1664 : : }
1665 : :
1666 : : /* Finish filling in the store_info. */
1667 : 19013492 : store_info->next = insn_info->store_rec;
1668 : 19013492 : insn_info->store_rec = store_info;
1669 : 19013492 : store_info->mem = mem;
1670 : 19013492 : store_info->mem_addr = mem_addr;
1671 : 19013492 : store_info->cse_base = base;
1672 : 19013492 : HOST_WIDE_INT const_width;
1673 : 19013492 : if (!width.is_constant (&const_width))
1674 : : {
1675 : : store_info->is_large = true;
1676 : : store_info->positions_needed.large.count = 0;
1677 : : store_info->positions_needed.large.bmap = NULL;
1678 : : }
1679 : 19013492 : else if (const_width > HOST_BITS_PER_WIDE_INT)
1680 : : {
1681 : 26336 : store_info->is_large = true;
1682 : 26336 : store_info->positions_needed.large.count = 0;
1683 : 26336 : store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1684 : : }
1685 : : else
1686 : : {
1687 : 18987156 : store_info->is_large = false;
1688 : 18987156 : store_info->positions_needed.small_bitmask
1689 : 18987156 : = lowpart_bitmask (const_width);
1690 : : }
1691 : 19013492 : store_info->group_id = group_id;
1692 : 19013492 : store_info->offset = offset;
1693 : 19013492 : store_info->width = width;
1694 : 19013492 : store_info->is_set = GET_CODE (body) == SET;
1695 : 19013492 : store_info->rhs = rhs;
1696 : 19013492 : store_info->const_rhs = const_rhs;
1697 : 19013492 : store_info->redundant_reason = redundant_reason;
1698 : 19013492 : store_info->addrspace = addrspace;
1699 : :
1700 : : /* If this is a clobber, we return 0. We will only be able to
1701 : : delete this insn if there is only one store USED store, but we
1702 : : can use the clobber to delete other stores earlier. */
1703 : 19013492 : return store_info->is_set ? 1 : 0;
1704 : : }
1705 : :
1706 : :
1707 : : static void
1708 : 0 : dump_insn_info (const char * start, insn_info_t insn_info)
1709 : : {
1710 : 0 : fprintf (dump_file, "%s insn=%d %s\n", start,
1711 : 0 : INSN_UID (insn_info->insn),
1712 : 0 : insn_info->store_rec ? "has store" : "naked");
1713 : 0 : }
1714 : :
1715 : :
1716 : : /* If the modes are different and the value's source and target do not
1717 : : line up, we need to extract the value from lower part of the rhs of
1718 : : the store, shift it, and then put it into a form that can be shoved
1719 : : into the read_insn. This function generates a right SHIFT of a
1720 : : value that is at least ACCESS_BYTES bytes wide of READ_MODE. The
1721 : : shift sequence is returned or NULL if we failed to find a
1722 : : shift. */
1723 : :
1724 : : static rtx
1725 : 83886 : find_shift_sequence (poly_int64 access_bytes,
1726 : : store_info *store_info,
1727 : : machine_mode read_mode,
1728 : : poly_int64 shift, bool speed, bool require_cst)
1729 : : {
1730 : 83886 : machine_mode store_mode = GET_MODE (store_info->mem);
1731 : 83886 : scalar_int_mode new_mode;
1732 : 83886 : rtx read_reg = NULL;
1733 : :
1734 : : /* If a constant was stored into memory, try to simplify it here,
1735 : : otherwise the cost of the shift might preclude this optimization
1736 : : e.g. at -Os, even when no actual shift will be needed. */
1737 : 83886 : auto access_bits = access_bytes * BITS_PER_UNIT;
1738 : 83886 : if (store_info->const_rhs
1739 : 25224 : && known_le (access_bytes, GET_MODE_SIZE (MAX_MODE_INT))
1740 : 109110 : && smallest_int_mode_for_size (access_bits).exists (&new_mode))
1741 : : {
1742 : 25224 : auto byte = subreg_lowpart_offset (new_mode, store_mode);
1743 : 25224 : rtx ret
1744 : 25224 : = simplify_subreg (new_mode, store_info->const_rhs, store_mode, byte);
1745 : 25224 : if (ret && CONSTANT_P (ret))
1746 : : {
1747 : 25224 : rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1748 : 25224 : ret = simplify_const_binary_operation (LSHIFTRT, new_mode, ret,
1749 : : shift_rtx);
1750 : 25224 : if (ret && CONSTANT_P (ret))
1751 : : {
1752 : 25224 : byte = subreg_lowpart_offset (read_mode, new_mode);
1753 : 25224 : ret = simplify_subreg (read_mode, ret, new_mode, byte);
1754 : 25224 : if (ret && CONSTANT_P (ret)
1755 : 50448 : && (set_src_cost (ret, read_mode, speed)
1756 : : <= COSTS_N_INSNS (1)))
1757 : 25108 : return ret;
1758 : : }
1759 : : }
1760 : : }
1761 : :
1762 : 58778 : if (require_cst)
1763 : : return NULL_RTX;
1764 : :
1765 : : /* Some machines like the x86 have shift insns for each size of
1766 : : operand. Other machines like the ppc or the ia-64 may only have
1767 : : shift insns that shift values within 32 or 64 bit registers.
1768 : : This loop tries to find the smallest shift insn that will right
1769 : : justify the value we want to read but is available in one insn on
1770 : : the machine. */
1771 : :
1772 : 58747 : opt_scalar_int_mode new_mode_iter;
1773 : 262007 : FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)
1774 : : {
1775 : 262007 : rtx target, new_reg, new_lhs;
1776 : 262007 : rtx_insn *shift_seq, *insn;
1777 : 262007 : int cost;
1778 : :
1779 : 262007 : new_mode = new_mode_iter.require ();
1780 : 631522 : if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1781 : : break;
1782 : 616725 : if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (read_mode)))
1783 : 136629 : continue;
1784 : :
1785 : : /* Try a wider mode if truncating the store mode to NEW_MODE
1786 : : requires a real instruction. */
1787 : 137892 : if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1788 : 68946 : && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1789 : 0 : continue;
1790 : :
1791 : : /* Also try a wider mode if the necessary punning is either not
1792 : : desirable or not possible. */
1793 : 131739 : if (!CONSTANT_P (store_info->rhs)
1794 : 68946 : && !targetm.modes_tieable_p (new_mode, store_mode))
1795 : 62793 : continue;
1796 : :
1797 : 12306 : if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
1798 : 13293 : && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
1799 : : {
1800 : : /* Try to implement the shift using a subreg. */
1801 : 3570 : poly_int64 offset
1802 : 3570 : = subreg_offset_from_lsb (new_mode, store_mode, shift);
1803 : 3570 : rtx rhs_subreg = simplify_gen_subreg (new_mode, store_info->rhs,
1804 : : store_mode, offset);
1805 : 3570 : if (rhs_subreg)
1806 : : {
1807 : 1 : read_reg
1808 : 1 : = extract_low_bits (read_mode, new_mode, copy_rtx (rhs_subreg));
1809 : 1 : break;
1810 : : }
1811 : : }
1812 : :
1813 : 12304 : if (maybe_lt (GET_MODE_SIZE (new_mode), access_bytes))
1814 : 3704 : continue;
1815 : :
1816 : 2448 : new_reg = gen_reg_rtx (new_mode);
1817 : :
1818 : 2448 : start_sequence ();
1819 : :
1820 : : /* In theory we could also check for an ashr. Ian Taylor knows
1821 : : of one dsp where the cost of these two was not the same. But
1822 : : this really is a rare case anyway. */
1823 : 2448 : target = expand_binop (new_mode, lshr_optab, new_reg,
1824 : : gen_int_shift_amount (new_mode, shift),
1825 : : new_reg, 1, OPTAB_DIRECT);
1826 : :
1827 : 2448 : shift_seq = get_insns ();
1828 : 2448 : end_sequence ();
1829 : :
1830 : 2448 : if (target != new_reg || shift_seq == NULL)
1831 : 0 : continue;
1832 : :
1833 : : cost = 0;
1834 : 4896 : for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1835 : 2448 : if (INSN_P (insn))
1836 : 2448 : cost += insn_cost (insn, speed);
1837 : :
1838 : : /* The computation up to here is essentially independent
1839 : : of the arguments and could be precomputed. It may
1840 : : not be worth doing so. We could precompute if
1841 : : worthwhile or at least cache the results. The result
1842 : : technically depends on both SHIFT and ACCESS_BYTES,
1843 : : but in practice the answer will depend only on ACCESS_BYTES. */
1844 : :
1845 : 2448 : if (cost > COSTS_N_INSNS (1))
1846 : 134 : continue;
1847 : :
1848 : 2314 : new_lhs = extract_low_bits (new_mode, store_mode,
1849 : : copy_rtx (store_info->rhs));
1850 : 2314 : if (new_lhs == NULL_RTX)
1851 : 0 : continue;
1852 : :
1853 : : /* We found an acceptable shift. Generate a move to
1854 : : take the value from the store and put it into the
1855 : : shift pseudo, then shift it, then generate another
1856 : : move to put in into the target of the read. */
1857 : 2314 : emit_move_insn (new_reg, new_lhs);
1858 : 2314 : emit_insn (shift_seq);
1859 : 2314 : read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1860 : 2314 : break;
1861 : : }
1862 : :
1863 : : return read_reg;
1864 : : }
1865 : :
1866 : :
1867 : : /* Call back for note_stores to find the hard regs set or clobbered by
1868 : : insn. Data is a bitmap of the hardregs set so far. */
1869 : :
1870 : : static void
1871 : 141955 : look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1872 : : {
1873 : 141955 : bitmap regs_set = (bitmap) data;
1874 : :
1875 : 141955 : if (REG_P (x)
1876 : 141955 : && HARD_REGISTER_P (x))
1877 : 2292 : bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1878 : 141955 : }
1879 : :
1880 : : /* Helper function for replace_read and record_store.
1881 : : Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1882 : : consisting of READ_WIDTH bytes starting from READ_OFFSET. Return NULL
1883 : : if not successful. If REQUIRE_CST is true, return always constant. */
1884 : :
1885 : : static rtx
1886 : 204308 : get_stored_val (store_info *store_info, machine_mode read_mode,
1887 : : poly_int64 read_offset, poly_int64 read_width,
1888 : : basic_block bb, bool require_cst)
1889 : : {
1890 : 204308 : machine_mode store_mode = GET_MODE (store_info->mem);
1891 : 204308 : poly_int64 gap;
1892 : 204308 : rtx read_reg;
1893 : :
1894 : : /* To get here the read is within the boundaries of the write so
1895 : : shift will never be negative. Start out with the shift being in
1896 : : bytes. */
1897 : 204308 : if (store_mode == BLKmode)
1898 : : gap = 0;
1899 : 189633 : else if (BYTES_BIG_ENDIAN)
1900 : : gap = ((store_info->offset + store_info->width)
1901 : : - (read_offset + read_width));
1902 : : else
1903 : 189633 : gap = read_offset - store_info->offset;
1904 : :
1905 : 204308 : if (maybe_ne (gap, 0))
1906 : : {
1907 : : if (!gap.is_constant ())
1908 : : return NULL_RTX;
1909 : :
1910 : 83886 : poly_int64 shift = gap * BITS_PER_UNIT;
1911 : 167772 : poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1912 : 83886 : read_reg = find_shift_sequence (access_size, store_info, read_mode,
1913 : 83886 : shift, optimize_bb_for_speed_p (bb),
1914 : : require_cst);
1915 : : }
1916 : 120422 : else if (store_mode == BLKmode)
1917 : : {
1918 : : /* The store is a memset (addr, const_val, const_size). */
1919 : 14675 : gcc_assert (CONST_INT_P (store_info->rhs));
1920 : 14675 : scalar_int_mode int_store_mode;
1921 : 14675 : if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1922 : : read_reg = NULL_RTX;
1923 : 14675 : else if (store_info->rhs == const0_rtx)
1924 : 14674 : read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1925 : 2 : else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1926 : : || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1927 : : read_reg = NULL_RTX;
1928 : : else
1929 : : {
1930 : 1 : unsigned HOST_WIDE_INT c
1931 : 1 : = INTVAL (store_info->rhs)
1932 : 1 : & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1933 : 1 : int shift = BITS_PER_UNIT;
1934 : 4 : while (shift < HOST_BITS_PER_WIDE_INT)
1935 : : {
1936 : 3 : c |= (c << shift);
1937 : 3 : shift <<= 1;
1938 : : }
1939 : 1 : read_reg = gen_int_mode (c, int_store_mode);
1940 : 1 : read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1941 : : }
1942 : : }
1943 : 105747 : else if (store_info->const_rhs
1944 : 12424 : && (require_cst
1945 : 9628 : || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1946 : 8814 : read_reg = extract_low_bits (read_mode, store_mode,
1947 : : copy_rtx (store_info->const_rhs));
1948 : 96933 : else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
1949 : 14336 : && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
1950 : 7168 : && targetm.modes_tieable_p (read_mode, store_mode)
1951 : 104041 : && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
1952 : : subreg_lowpart_offset (read_mode, store_mode)))
1953 : 7108 : read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
1954 : : else
1955 : 89825 : read_reg = extract_low_bits (read_mode, store_mode,
1956 : : copy_rtx (store_info->rhs));
1957 : 204308 : if (require_cst && read_reg && !CONSTANT_P (read_reg))
1958 : 204308 : read_reg = NULL_RTX;
1959 : 204308 : return read_reg;
1960 : : }
1961 : :
1962 : : /* Take a sequence of:
1963 : : A <- r1
1964 : : ...
1965 : : ... <- A
1966 : :
1967 : : and change it into
1968 : : r2 <- r1
1969 : : A <- r1
1970 : : ...
1971 : : ... <- r2
1972 : :
1973 : : or
1974 : :
1975 : : r3 <- extract (r1)
1976 : : r3 <- r3 >> shift
1977 : : r2 <- extract (r3)
1978 : : ... <- r2
1979 : :
1980 : : or
1981 : :
1982 : : r2 <- extract (r1)
1983 : : ... <- r2
1984 : :
1985 : : Depending on the alignment and the mode of the store and
1986 : : subsequent load.
1987 : :
1988 : :
1989 : : The STORE_INFO and STORE_INSN are for the store and READ_INFO
1990 : : and READ_INSN are for the read. Return true if the replacement
1991 : : went ok. */
1992 : :
1993 : : static bool
1994 : 163813 : replace_read (store_info *store_info, insn_info_t store_insn,
1995 : : read_info_t read_info, insn_info_t read_insn, rtx *loc)
1996 : : {
1997 : 163813 : machine_mode store_mode = GET_MODE (store_info->mem);
1998 : 163813 : machine_mode read_mode = GET_MODE (read_info->mem);
1999 : 163813 : rtx_insn *insns, *this_insn;
2000 : 163813 : rtx read_reg;
2001 : 163813 : basic_block bb;
2002 : :
2003 : 163813 : if (!dbg_cnt (dse))
2004 : : return false;
2005 : :
2006 : : /* Create a sequence of instructions to set up the read register.
2007 : : This sequence goes immediately before the store and its result
2008 : : is read by the load.
2009 : :
2010 : : We need to keep this in perspective. We are replacing a read
2011 : : with a sequence of insns, but the read will almost certainly be
2012 : : in cache, so it is not going to be an expensive one. Thus, we
2013 : : are not willing to do a multi insn shift or worse a subroutine
2014 : : call to get rid of the read. */
2015 : 163813 : if (dump_file && (dump_flags & TDF_DETAILS))
2016 : 0 : fprintf (dump_file, "trying to replace %smode load in insn %d"
2017 : : " from %smode store in insn %d\n",
2018 : 0 : GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
2019 : 0 : GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
2020 : 163813 : start_sequence ();
2021 : 163813 : bb = BLOCK_FOR_INSN (read_insn->insn);
2022 : 163813 : read_reg = get_stored_val (store_info,
2023 : : read_mode, read_info->offset, read_info->width,
2024 : : bb, false);
2025 : 163813 : if (read_reg == NULL_RTX)
2026 : : {
2027 : 59540 : end_sequence ();
2028 : 59540 : if (dump_file && (dump_flags & TDF_DETAILS))
2029 : 0 : fprintf (dump_file, " -- could not extract bits of stored value\n");
2030 : 59540 : return false;
2031 : : }
2032 : : /* Force the value into a new register so that it won't be clobbered
2033 : : between the store and the load. */
2034 : 104273 : if (WORD_REGISTER_OPERATIONS
2035 : : && GET_CODE (read_reg) == SUBREG
2036 : : && REG_P (SUBREG_REG (read_reg))
2037 : : && GET_MODE (SUBREG_REG (read_reg)) == word_mode)
2038 : : {
2039 : : /* For WORD_REGISTER_OPERATIONS with subreg of word_mode register
2040 : : force SUBREG_REG into a new register rather than the SUBREG. */
2041 : : rtx r = copy_to_mode_reg (word_mode, SUBREG_REG (read_reg));
2042 : : read_reg = shallow_copy_rtx (read_reg);
2043 : : SUBREG_REG (read_reg) = r;
2044 : : }
2045 : : else
2046 : 104273 : read_reg = copy_to_mode_reg (read_mode, read_reg);
2047 : 104273 : insns = get_insns ();
2048 : 104273 : end_sequence ();
2049 : :
2050 : 104273 : if (insns != NULL_RTX)
2051 : : {
2052 : : /* Now we have to scan the set of new instructions to see if the
2053 : : sequence contains and sets of hardregs that happened to be
2054 : : live at this point. For instance, this can happen if one of
2055 : : the insns sets the CC and the CC happened to be live at that
2056 : : point. This does occasionally happen, see PR 37922. */
2057 : 104273 : bitmap regs_set = BITMAP_ALLOC (®_obstack);
2058 : :
2059 : 243936 : for (this_insn = insns;
2060 : 243936 : this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
2061 : : {
2062 : 139666 : if (insn_invalid_p (this_insn, false))
2063 : : {
2064 : 3 : if (dump_file && (dump_flags & TDF_DETAILS))
2065 : : {
2066 : 0 : fprintf (dump_file, " -- replacing the loaded MEM with ");
2067 : 0 : print_simple_rtl (dump_file, read_reg);
2068 : 0 : fprintf (dump_file, " led to an invalid instruction\n");
2069 : : }
2070 : 3 : BITMAP_FREE (regs_set);
2071 : 3 : return false;
2072 : : }
2073 : 139663 : note_stores (this_insn, look_for_hardregs, regs_set);
2074 : : }
2075 : :
2076 : 104270 : if (store_insn->fixed_regs_live)
2077 : 104270 : bitmap_and_into (regs_set, store_insn->fixed_regs_live);
2078 : 104270 : if (!bitmap_empty_p (regs_set))
2079 : : {
2080 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
2081 : : {
2082 : 0 : fprintf (dump_file, "abandoning replacement because sequence "
2083 : : "clobbers live hardregs:");
2084 : 0 : df_print_regset (dump_file, regs_set);
2085 : : }
2086 : :
2087 : 0 : BITMAP_FREE (regs_set);
2088 : 0 : return false;
2089 : : }
2090 : 104270 : BITMAP_FREE (regs_set);
2091 : : }
2092 : :
2093 : 104270 : subrtx_iterator::array_type array;
2094 : 515272 : FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
2095 : : {
2096 : 411002 : const_rtx x = *iter;
2097 : 411002 : if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2098 : : {
2099 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
2100 : 0 : fprintf (dump_file, " -- replacing the MEM failed due to address "
2101 : : "side-effects\n");
2102 : 0 : return false;
2103 : : }
2104 : : }
2105 : :
2106 : 104270 : if (validate_change (read_insn->insn, loc, read_reg, 0))
2107 : : {
2108 : 82838 : deferred_change *change = deferred_change_pool.allocate ();
2109 : :
2110 : : /* Insert this right before the store insn where it will be safe
2111 : : from later insns that might change it before the read. */
2112 : 82838 : emit_insn_before (insns, store_insn->insn);
2113 : :
2114 : : /* And now for the kludge part: cselib croaks if you just
2115 : : return at this point. There are two reasons for this:
2116 : :
2117 : : 1) Cselib has an idea of how many pseudos there are and
2118 : : that does not include the new ones we just added.
2119 : :
2120 : : 2) Cselib does not know about the move insn we added
2121 : : above the store_info, and there is no way to tell it
2122 : : about it, because it has "moved on".
2123 : :
2124 : : Problem (1) is fixable with a certain amount of engineering.
2125 : : Problem (2) is requires starting the bb from scratch. This
2126 : : could be expensive.
2127 : :
2128 : : So we are just going to have to lie. The move/extraction
2129 : : insns are not really an issue, cselib did not see them. But
2130 : : the use of the new pseudo read_insn is a real problem because
2131 : : cselib has not scanned this insn. The way that we solve this
2132 : : problem is that we are just going to put the mem back for now
2133 : : and when we are finished with the block, we undo this. We
2134 : : keep a table of mems to get rid of. At the end of the basic
2135 : : block we can put them back. */
2136 : :
2137 : 82838 : *loc = read_info->mem;
2138 : 82838 : change->next = deferred_change_list;
2139 : 82838 : deferred_change_list = change;
2140 : 82838 : change->loc = loc;
2141 : 82838 : change->reg = read_reg;
2142 : :
2143 : : /* Get rid of the read_info, from the point of view of the
2144 : : rest of dse, play like this read never happened. */
2145 : 82838 : read_insn->read_rec = read_info->next;
2146 : 82838 : read_info_type_pool.remove (read_info);
2147 : 82838 : if (dump_file && (dump_flags & TDF_DETAILS))
2148 : : {
2149 : 0 : fprintf (dump_file, " -- replaced the loaded MEM with ");
2150 : 0 : print_simple_rtl (dump_file, read_reg);
2151 : 0 : fprintf (dump_file, "\n");
2152 : : }
2153 : 82838 : return true;
2154 : : }
2155 : : else
2156 : : {
2157 : 21432 : if (dump_file && (dump_flags & TDF_DETAILS))
2158 : : {
2159 : 0 : fprintf (dump_file, " -- replacing the loaded MEM with ");
2160 : 0 : print_simple_rtl (dump_file, read_reg);
2161 : 0 : fprintf (dump_file, " led to an invalid instruction\n");
2162 : : }
2163 : 21432 : return false;
2164 : : }
2165 : 104270 : }
2166 : :
2167 : : /* Check the address of MEM *LOC and kill any appropriate stores that may
2168 : : be active. */
2169 : :
2170 : : static void
2171 : 29460025 : check_mem_read_rtx (rtx *loc, bb_info_t bb_info, bool used_in_call = false)
2172 : : {
2173 : 29460025 : rtx mem = *loc, mem_addr;
2174 : 29460025 : insn_info_t insn_info;
2175 : 29460025 : poly_int64 offset = 0;
2176 : 29460025 : poly_int64 width = 0;
2177 : 29460025 : cselib_val *base = NULL;
2178 : 29460025 : int group_id;
2179 : 29460025 : read_info_t read_info;
2180 : :
2181 : 29460025 : insn_info = bb_info->last_insn;
2182 : :
2183 : 29460025 : if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2184 : 29460025 : || MEM_VOLATILE_P (mem))
2185 : : {
2186 : 1094568 : if (crtl->stack_protect_guard
2187 : 1068 : && (MEM_EXPR (mem) == crtl->stack_protect_guard
2188 : 712 : || (crtl->stack_protect_guard_decl
2189 : 712 : && MEM_EXPR (mem) == crtl->stack_protect_guard_decl))
2190 : 1095636 : && MEM_VOLATILE_P (mem))
2191 : : {
2192 : : /* This is either the stack protector canary on the stack,
2193 : : which ought to be written by a MEM_VOLATILE_P store and
2194 : : thus shouldn't be deleted and is read at the very end of
2195 : : function, but shouldn't conflict with any other store.
2196 : : Or it is __stack_chk_guard variable or TLS or whatever else
2197 : : MEM holding the canary value, which really shouldn't be
2198 : : ever modified in -fstack-protector* protected functions,
2199 : : otherwise the prologue store wouldn't match the epilogue
2200 : : check. */
2201 : 1068 : if (dump_file && (dump_flags & TDF_DETAILS))
2202 : 0 : fprintf (dump_file, " stack protector canary read ignored.\n");
2203 : 1068 : insn_info->cannot_delete = true;
2204 : 3036226 : return;
2205 : : }
2206 : :
2207 : 1093500 : if (dump_file && (dump_flags & TDF_DETAILS))
2208 : 0 : fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2209 : 1093500 : add_wild_read (bb_info);
2210 : 1093500 : insn_info->cannot_delete = true;
2211 : 1093500 : return;
2212 : : }
2213 : :
2214 : : /* If it is reading readonly mem, then there can be no conflict with
2215 : : another write. */
2216 : 28365457 : if (MEM_READONLY_P (mem))
2217 : : return;
2218 : :
2219 : 26676109 : if (!canon_address (mem, &group_id, &offset, &base))
2220 : : {
2221 : 169403 : if (dump_file && (dump_flags & TDF_DETAILS))
2222 : 0 : fprintf (dump_file, " adding wild read, canon_address failure.\n");
2223 : 169403 : add_wild_read (bb_info);
2224 : 169403 : return;
2225 : : }
2226 : :
2227 : 26506706 : if (GET_MODE (mem) == BLKmode)
2228 : 63800 : width = -1;
2229 : : else
2230 : 52885812 : width = GET_MODE_SIZE (GET_MODE (mem));
2231 : :
2232 : 53013412 : if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2233 : : {
2234 : 69 : if (dump_file && (dump_flags & TDF_DETAILS))
2235 : 0 : fprintf (dump_file, " adding wild read, due to overflow.\n");
2236 : 69 : add_wild_read (bb_info);
2237 : 69 : return;
2238 : : }
2239 : :
2240 : 26506637 : read_info = read_info_type_pool.allocate ();
2241 : 26506637 : read_info->group_id = group_id;
2242 : 26506637 : read_info->mem = mem;
2243 : 26506637 : read_info->offset = offset;
2244 : 26506637 : read_info->width = width;
2245 : 26506637 : read_info->next = insn_info->read_rec;
2246 : 26506637 : insn_info->read_rec = read_info;
2247 : 26506637 : if (group_id < 0)
2248 : 12726922 : mem_addr = base->val_rtx;
2249 : : else
2250 : : {
2251 : 13779715 : group_info *group = rtx_group_vec[group_id];
2252 : 13779715 : mem_addr = group->canon_base_addr;
2253 : : }
2254 : 26506637 : if (maybe_ne (offset, 0))
2255 : 11606534 : mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2256 : : /* Avoid passing VALUE RTXen as mem_addr to canon_true_dependence
2257 : : which will over and over re-create proper RTL and re-apply the
2258 : : offset above. See PR80960 where we almost allocate 1.6GB of PLUS
2259 : : RTXen that way. */
2260 : 26506637 : mem_addr = get_addr (mem_addr);
2261 : :
2262 : 26506637 : if (group_id >= 0)
2263 : : {
2264 : : /* This is the restricted case where the base is a constant or
2265 : : the frame pointer and offset is a constant. */
2266 : 13779715 : insn_info_t i_ptr = active_local_stores;
2267 : 13779715 : insn_info_t last = NULL;
2268 : :
2269 : 13779715 : if (dump_file && (dump_flags & TDF_DETAILS))
2270 : : {
2271 : 0 : if (!known_size_p (width))
2272 : 0 : fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2273 : : group_id);
2274 : : else
2275 : : {
2276 : 0 : fprintf (dump_file, " processing const load gid=%d", group_id);
2277 : 0 : print_range (dump_file, offset, width);
2278 : 0 : fprintf (dump_file, "\n");
2279 : : }
2280 : : }
2281 : :
2282 : 32204070 : while (i_ptr)
2283 : : {
2284 : 18506301 : bool remove = false;
2285 : 18506301 : store_info *store_info = i_ptr->store_rec;
2286 : :
2287 : : /* Skip the clobbers. */
2288 : 18506301 : while (!store_info->is_set)
2289 : 0 : store_info = store_info->next;
2290 : :
2291 : : /* There are three cases here. */
2292 : 18506301 : if (store_info->group_id < 0)
2293 : : /* We have a cselib store followed by a read from a
2294 : : const base. */
2295 : 10412653 : remove
2296 : 10412653 : = canon_true_dependence (store_info->mem,
2297 : 10412653 : GET_MODE (store_info->mem),
2298 : : store_info->mem_addr,
2299 : : mem, mem_addr);
2300 : :
2301 : 8093648 : else if (group_id == store_info->group_id)
2302 : : {
2303 : : /* This is a block mode load. We may get lucky and
2304 : : canon_true_dependence may save the day. */
2305 : 4384268 : if (!known_size_p (width))
2306 : 23221 : remove
2307 : 23221 : = canon_true_dependence (store_info->mem,
2308 : 23221 : GET_MODE (store_info->mem),
2309 : : store_info->mem_addr,
2310 : : mem, mem_addr);
2311 : :
2312 : : /* If this read is just reading back something that we just
2313 : : stored, rewrite the read. */
2314 : : else
2315 : : {
2316 : 4361047 : if (!used_in_call
2317 : 4360813 : && store_info->rhs
2318 : 3340855 : && known_subrange_p (offset, width, store_info->offset,
2319 : 3340855 : store_info->width)
2320 : 164347 : && all_positions_needed_p (store_info,
2321 : 164347 : offset - store_info->offset,
2322 : : width)
2323 : 4523039 : && replace_read (store_info, i_ptr, read_info,
2324 : : insn_info, loc))
2325 : : return;
2326 : :
2327 : : /* The bases are the same, just see if the offsets
2328 : : could overlap. */
2329 : 4279101 : if (ranges_maybe_overlap_p (offset, width,
2330 : 4279101 : store_info->offset,
2331 : 4279101 : store_info->width))
2332 : : remove = true;
2333 : : }
2334 : : }
2335 : :
2336 : : /* else
2337 : : The else case that is missing here is that the
2338 : : bases are constant but different. There is nothing
2339 : : to do here because there is no overlap. */
2340 : :
2341 : 10435874 : if (remove)
2342 : : {
2343 : 5547000 : if (dump_file && (dump_flags & TDF_DETAILS))
2344 : 0 : dump_insn_info ("removing from active", i_ptr);
2345 : :
2346 : 5547000 : active_local_stores_len--;
2347 : 5547000 : if (last)
2348 : 507329 : last->next_local_store = i_ptr->next_local_store;
2349 : : else
2350 : 5039671 : active_local_stores = i_ptr->next_local_store;
2351 : : }
2352 : : else
2353 : : last = i_ptr;
2354 : 18424355 : i_ptr = i_ptr->next_local_store;
2355 : : }
2356 : : }
2357 : : else
2358 : : {
2359 : 12726922 : insn_info_t i_ptr = active_local_stores;
2360 : 12726922 : insn_info_t last = NULL;
2361 : 12726922 : if (dump_file && (dump_flags & TDF_DETAILS))
2362 : : {
2363 : 0 : fprintf (dump_file, " processing cselib load mem:");
2364 : 0 : print_inline_rtx (dump_file, mem, 0);
2365 : 0 : fprintf (dump_file, "\n");
2366 : : }
2367 : :
2368 : 32566991 : while (i_ptr)
2369 : : {
2370 : 19840961 : bool remove = false;
2371 : 19840961 : store_info *store_info = i_ptr->store_rec;
2372 : :
2373 : 19840961 : if (dump_file && (dump_flags & TDF_DETAILS))
2374 : 0 : fprintf (dump_file, " processing cselib load against insn %d\n",
2375 : 0 : INSN_UID (i_ptr->insn));
2376 : :
2377 : : /* Skip the clobbers. */
2378 : 19840965 : while (!store_info->is_set)
2379 : 4 : store_info = store_info->next;
2380 : :
2381 : : /* If this read is just reading back something that we just
2382 : : stored, rewrite the read. */
2383 : 19840961 : if (!used_in_call
2384 : 19840913 : && store_info->rhs
2385 : 1044869 : && store_info->group_id == -1
2386 : 550201 : && store_info->cse_base == base
2387 : 77345 : && known_subrange_p (offset, width, store_info->offset,
2388 : 77345 : store_info->width)
2389 : 1850 : && all_positions_needed_p (store_info,
2390 : 1850 : offset - store_info->offset, width)
2391 : 19842782 : && replace_read (store_info, i_ptr, read_info, insn_info, loc))
2392 : : return;
2393 : :
2394 : 39680138 : remove = canon_true_dependence (store_info->mem,
2395 : 19840069 : GET_MODE (store_info->mem),
2396 : : store_info->mem_addr,
2397 : : mem, mem_addr);
2398 : :
2399 : 19840069 : if (remove)
2400 : : {
2401 : 1384539 : if (dump_file && (dump_flags & TDF_DETAILS))
2402 : 0 : dump_insn_info ("removing from active", i_ptr);
2403 : :
2404 : 1384539 : active_local_stores_len--;
2405 : 1384539 : if (last)
2406 : 287520 : last->next_local_store = i_ptr->next_local_store;
2407 : : else
2408 : 1097019 : active_local_stores = i_ptr->next_local_store;
2409 : : }
2410 : : else
2411 : : last = i_ptr;
2412 : 19840069 : i_ptr = i_ptr->next_local_store;
2413 : : }
2414 : : }
2415 : : }
2416 : :
2417 : : /* A note_uses callback in which DATA points the INSN_INFO for
2418 : : as check_mem_read_rtx. Nullify the pointer if i_m_r_m_r returns
2419 : : true for any part of *LOC. */
2420 : :
2421 : : static void
2422 : 137440229 : check_mem_read_use (rtx *loc, void *data)
2423 : : {
2424 : 137440229 : subrtx_ptr_iterator::array_type array;
2425 : 498768086 : FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2426 : : {
2427 : 361327857 : rtx *loc = *iter;
2428 : 361327857 : if (MEM_P (*loc))
2429 : 29356812 : check_mem_read_rtx (loc, (bb_info_t) data);
2430 : : }
2431 : 137440229 : }
2432 : :
2433 : :
2434 : : /* Get arguments passed to CALL_INSN. Return TRUE if successful.
2435 : : So far it only handles arguments passed in registers. */
2436 : :
2437 : : static bool
2438 : 22616 : get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2439 : : {
2440 : 22616 : CUMULATIVE_ARGS args_so_far_v;
2441 : 22616 : cumulative_args_t args_so_far;
2442 : 22616 : tree arg;
2443 : 22616 : int idx;
2444 : :
2445 : 22616 : INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2446 : 22616 : args_so_far = pack_cumulative_args (&args_so_far_v);
2447 : :
2448 : 22616 : arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2449 : 22616 : for (idx = 0;
2450 : 81014 : arg != void_list_node && idx < nargs;
2451 : 58398 : arg = TREE_CHAIN (arg), idx++)
2452 : : {
2453 : 61548 : scalar_int_mode mode;
2454 : 61548 : rtx reg, link, tmp;
2455 : :
2456 : 61548 : if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2457 : 3150 : return false;
2458 : :
2459 : 61548 : function_arg_info arg (mode, /*named=*/true);
2460 : 61548 : reg = targetm.calls.function_arg (args_so_far, arg);
2461 : 61548 : if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2462 : : return false;
2463 : :
2464 : 58408 : for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2465 : 170416 : link;
2466 : 112008 : link = XEXP (link, 1))
2467 : 170406 : if (GET_CODE (XEXP (link, 0)) == USE)
2468 : : {
2469 : 116826 : scalar_int_mode arg_mode;
2470 : 116826 : args[idx] = XEXP (XEXP (link, 0), 0);
2471 : 116826 : if (REG_P (args[idx])
2472 : 116826 : && REGNO (args[idx]) == REGNO (reg)
2473 : 175234 : && (GET_MODE (args[idx]) == mode
2474 : 112018 : || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2475 : 10 : && (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2476 : 20 : && (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2477 : : break;
2478 : : }
2479 : 58398 : if (!link)
2480 : : return false;
2481 : :
2482 : 58398 : tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2483 : 58398 : if (GET_MODE (args[idx]) != mode)
2484 : : {
2485 : 0 : if (!tmp || !CONST_INT_P (tmp))
2486 : : return false;
2487 : 0 : tmp = gen_int_mode (INTVAL (tmp), mode);
2488 : : }
2489 : 58398 : if (tmp)
2490 : 58398 : args[idx] = tmp;
2491 : :
2492 : 58398 : targetm.calls.function_arg_advance (args_so_far, arg);
2493 : : }
2494 : 19466 : if (arg != void_list_node || idx != nargs)
2495 : : return false;
2496 : : return true;
2497 : : }
2498 : :
2499 : : /* Return a bitmap of the fixed registers contained in IN. */
2500 : :
2501 : : static bitmap
2502 : 18952760 : copy_fixed_regs (const_bitmap in)
2503 : : {
2504 : 18952760 : bitmap ret;
2505 : :
2506 : 18952760 : ret = ALLOC_REG_SET (NULL);
2507 : 18952760 : bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set));
2508 : 18952760 : return ret;
2509 : : }
2510 : :
2511 : : /* Apply record_store to all candidate stores in INSN. Mark INSN
2512 : : if some part of it is not a candidate store and assigns to a
2513 : : non-register target. */
2514 : :
2515 : : static void
2516 : 195614948 : scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores)
2517 : : {
2518 : 195614948 : rtx body;
2519 : 195614948 : insn_info_type *insn_info = insn_info_type_pool.allocate ();
2520 : 195614948 : int mems_found = 0;
2521 : 195614948 : memset (insn_info, 0, sizeof (struct insn_info_type));
2522 : :
2523 : 195614948 : if (dump_file && (dump_flags & TDF_DETAILS))
2524 : 0 : fprintf (dump_file, "\n**scanning insn=%d\n",
2525 : 0 : INSN_UID (insn));
2526 : :
2527 : 195614948 : insn_info->prev_insn = bb_info->last_insn;
2528 : 195614948 : insn_info->insn = insn;
2529 : 195614948 : bb_info->last_insn = insn_info;
2530 : :
2531 : 195614948 : if (DEBUG_INSN_P (insn))
2532 : : {
2533 : 80056263 : insn_info->cannot_delete = true;
2534 : 80056263 : return;
2535 : : }
2536 : :
2537 : : /* Look at all of the uses in the insn. */
2538 : 115558685 : note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2539 : :
2540 : 115558685 : if (CALL_P (insn))
2541 : : {
2542 : 9075927 : bool const_call;
2543 : 9075927 : rtx call, sym;
2544 : 9075927 : tree memset_call = NULL_TREE;
2545 : :
2546 : 9075927 : insn_info->cannot_delete = true;
2547 : :
2548 : : /* Const functions cannot do anything bad i.e. read memory,
2549 : : however, they can read their parameters which may have
2550 : : been pushed onto the stack.
2551 : : memset and bzero don't read memory either. */
2552 : 9075927 : const_call = RTL_CONST_CALL_P (insn);
2553 : 9075927 : if (!const_call
2554 : 8780558 : && (call = get_call_rtx_from (insn))
2555 : 8780558 : && (sym = XEXP (XEXP (call, 0), 0))
2556 : 8780558 : && GET_CODE (sym) == SYMBOL_REF
2557 : 8452334 : && SYMBOL_REF_DECL (sym)
2558 : 8207899 : && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2559 : 17283791 : && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET))
2560 : : memset_call = SYMBOL_REF_DECL (sym);
2561 : :
2562 : 9075927 : if (const_call || memset_call)
2563 : : {
2564 : 317985 : insn_info_t i_ptr = active_local_stores;
2565 : 317985 : insn_info_t last = NULL;
2566 : :
2567 : 317985 : if (dump_file && (dump_flags & TDF_DETAILS))
2568 : 0 : fprintf (dump_file, "%s call %d\n",
2569 : 0 : const_call ? "const" : "memset", INSN_UID (insn));
2570 : :
2571 : : /* See the head comment of the frame_read field. */
2572 : 317985 : if (reload_completed
2573 : : /* Tail calls are storing their arguments using
2574 : : arg pointer. If it is a frame pointer on the target,
2575 : : even before reload we need to kill frame pointer based
2576 : : stores. */
2577 : 317985 : || (SIBLING_CALL_P (insn)
2578 : : && HARD_FRAME_POINTER_IS_ARG_POINTER))
2579 : 158093 : insn_info->frame_read = true;
2580 : :
2581 : : /* Loop over the active stores and remove those which are
2582 : : killed by the const function call. */
2583 : 363870 : while (i_ptr)
2584 : : {
2585 : 45885 : bool remove_store = false;
2586 : :
2587 : : /* The stack pointer based stores are always killed. */
2588 : 45885 : if (i_ptr->stack_pointer_based)
2589 : : remove_store = true;
2590 : :
2591 : : /* If the frame is read, the frame related stores are killed. */
2592 : 9514 : else if (insn_info->frame_read)
2593 : : {
2594 : 3482 : store_info *store_info = i_ptr->store_rec;
2595 : :
2596 : : /* Skip the clobbers. */
2597 : 3482 : while (!store_info->is_set)
2598 : 0 : store_info = store_info->next;
2599 : :
2600 : 3482 : if (store_info->group_id >= 0
2601 : 3482 : && rtx_group_vec[store_info->group_id]->frame_related)
2602 : : remove_store = true;
2603 : : }
2604 : :
2605 : : if (remove_store)
2606 : : {
2607 : 38650 : if (dump_file && (dump_flags & TDF_DETAILS))
2608 : 0 : dump_insn_info ("removing from active", i_ptr);
2609 : :
2610 : 38650 : active_local_stores_len--;
2611 : 38650 : if (last)
2612 : 44 : last->next_local_store = i_ptr->next_local_store;
2613 : : else
2614 : 38606 : active_local_stores = i_ptr->next_local_store;
2615 : : }
2616 : : else
2617 : : last = i_ptr;
2618 : :
2619 : 45885 : i_ptr = i_ptr->next_local_store;
2620 : : }
2621 : :
2622 : 317985 : if (memset_call)
2623 : : {
2624 : 22616 : rtx args[3];
2625 : 22616 : if (get_call_args (insn, memset_call, args, 3)
2626 : 19466 : && CONST_INT_P (args[1])
2627 : 18335 : && CONST_INT_P (args[2])
2628 : 23564 : && INTVAL (args[2]) > 0)
2629 : : {
2630 : 897 : rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2631 : 897 : set_mem_size (mem, INTVAL (args[2]));
2632 : 897 : body = gen_rtx_SET (mem, args[1]);
2633 : 897 : mems_found += record_store (body, bb_info);
2634 : 897 : if (dump_file && (dump_flags & TDF_DETAILS))
2635 : 0 : fprintf (dump_file, "handling memset as BLKmode store\n");
2636 : 897 : if (mems_found == 1)
2637 : : {
2638 : 565 : if (active_local_stores_len++ >= max_active_local_stores)
2639 : : {
2640 : 0 : active_local_stores_len = 1;
2641 : 0 : active_local_stores = NULL;
2642 : : }
2643 : 565 : insn_info->fixed_regs_live
2644 : 565 : = copy_fixed_regs (bb_info->regs_live);
2645 : 565 : insn_info->next_local_store = active_local_stores;
2646 : 565 : active_local_stores = insn_info;
2647 : : }
2648 : : }
2649 : : else
2650 : 21719 : clear_rhs_from_active_local_stores ();
2651 : : }
2652 : : }
2653 : 8757942 : else if (SIBLING_CALL_P (insn)
2654 : 8757942 : && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER))
2655 : : /* Arguments for a sibling call that are pushed to memory are passed
2656 : : using the incoming argument pointer of the current function. After
2657 : : reload that might be (and likely is) frame pointer based. And, if
2658 : : it is a frame pointer on the target, even before reload we need to
2659 : : kill frame pointer based stores. */
2660 : 116157 : add_wild_read (bb_info);
2661 : : else
2662 : : /* Every other call, including pure functions, may read any memory
2663 : : that is not relative to the frame. */
2664 : 8641785 : add_non_frame_wild_read (bb_info);
2665 : :
2666 : 9075927 : for (rtx link = CALL_INSN_FUNCTION_USAGE (insn);
2667 : 26225742 : link != NULL_RTX;
2668 : 17149815 : link = XEXP (link, 1))
2669 : 17149815 : if (GET_CODE (XEXP (link, 0)) == USE && MEM_P (XEXP (XEXP (link, 0),0)))
2670 : 103213 : check_mem_read_rtx (&XEXP (XEXP (link, 0),0), bb_info, true);
2671 : :
2672 : : return;
2673 : : }
2674 : :
2675 : : /* Assuming that there are sets in these insns, we cannot delete
2676 : : them. */
2677 : 106482758 : if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2678 : 106445156 : || volatile_refs_p (PATTERN (insn))
2679 : 103855958 : || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2680 : 101129632 : || (RTX_FRAME_RELATED_P (insn))
2681 : 204727702 : || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2682 : 8237814 : insn_info->cannot_delete = true;
2683 : :
2684 : 106482758 : body = PATTERN (insn);
2685 : 106482758 : if (GET_CODE (body) == PARALLEL)
2686 : : {
2687 : : int i;
2688 : 46194795 : for (i = 0; i < XVECLEN (body, 0); i++)
2689 : 31506402 : mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2690 : : }
2691 : : else
2692 : 91794365 : mems_found += record_store (body, bb_info);
2693 : :
2694 : 106482758 : if (dump_file && (dump_flags & TDF_DETAILS))
2695 : 0 : fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2696 : 0 : mems_found, insn_info->cannot_delete ? "true" : "false");
2697 : :
2698 : : /* If we found some sets of mems, add it into the active_local_stores so
2699 : : that it can be locally deleted if found dead or used for
2700 : : replace_read and redundant constant store elimination. Otherwise mark
2701 : : it as cannot delete. This simplifies the processing later. */
2702 : 106482758 : if (mems_found == 1)
2703 : : {
2704 : 18952195 : if (active_local_stores_len++ >= max_active_local_stores)
2705 : : {
2706 : 16 : active_local_stores_len = 1;
2707 : 16 : active_local_stores = NULL;
2708 : : }
2709 : 18952195 : insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2710 : 18952195 : insn_info->next_local_store = active_local_stores;
2711 : 18952195 : active_local_stores = insn_info;
2712 : : }
2713 : : else
2714 : 87530563 : insn_info->cannot_delete = true;
2715 : : }
2716 : :
2717 : :
2718 : : /* Remove BASE from the set of active_local_stores. This is a
2719 : : callback from cselib that is used to get rid of the stores in
2720 : : active_local_stores. */
2721 : :
2722 : : static void
2723 : 507116 : remove_useless_values (cselib_val *base)
2724 : : {
2725 : 507116 : insn_info_t insn_info = active_local_stores;
2726 : 507116 : insn_info_t last = NULL;
2727 : :
2728 : 5181176 : while (insn_info)
2729 : : {
2730 : 4674060 : store_info *store_info = insn_info->store_rec;
2731 : 4674060 : bool del = false;
2732 : :
2733 : : /* If ANY of the store_infos match the cselib group that is
2734 : : being deleted, then the insn cannot be deleted. */
2735 : 9340077 : while (store_info)
2736 : : {
2737 : 4674060 : if ((store_info->group_id == -1)
2738 : 4347799 : && (store_info->cse_base == base))
2739 : : {
2740 : : del = true;
2741 : : break;
2742 : : }
2743 : 4666017 : store_info = store_info->next;
2744 : : }
2745 : :
2746 : 4674060 : if (del)
2747 : : {
2748 : 8043 : active_local_stores_len--;
2749 : 8043 : if (last)
2750 : 7525 : last->next_local_store = insn_info->next_local_store;
2751 : : else
2752 : 518 : active_local_stores = insn_info->next_local_store;
2753 : 8043 : free_store_info (insn_info);
2754 : : }
2755 : : else
2756 : : last = insn_info;
2757 : :
2758 : 4674060 : insn_info = insn_info->next_local_store;
2759 : : }
2760 : 507116 : }
2761 : :
2762 : :
2763 : : /* Do all of step 1. */
2764 : :
2765 : : static void
2766 : 1991506 : dse_step1 (void)
2767 : : {
2768 : 1991506 : basic_block bb;
2769 : 1991506 : bitmap regs_live = BITMAP_ALLOC (®_obstack);
2770 : :
2771 : 1991506 : cselib_init (0);
2772 : 1991506 : all_blocks = BITMAP_ALLOC (NULL);
2773 : 1991506 : bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2774 : 1991506 : bitmap_set_bit (all_blocks, EXIT_BLOCK);
2775 : :
2776 : : /* For -O1 reduce the maximum number of active local stores for RTL DSE
2777 : : since this can consume huge amounts of memory (PR89115). */
2778 : 1991506 : int max_active_local_stores = param_max_dse_active_local_stores;
2779 : 1991506 : if (optimize < 2)
2780 : 143420 : max_active_local_stores /= 10;
2781 : :
2782 : 26202689 : FOR_ALL_BB_FN (bb, cfun)
2783 : : {
2784 : 24211183 : insn_info_t ptr;
2785 : 24211183 : bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2786 : :
2787 : 24211183 : memset (bb_info, 0, sizeof (dse_bb_info_type));
2788 : 24211183 : bitmap_set_bit (all_blocks, bb->index);
2789 : 24211183 : bb_info->regs_live = regs_live;
2790 : :
2791 : 48422366 : bitmap_copy (regs_live, DF_LR_IN (bb));
2792 : 24211183 : df_simulate_initialize_forwards (bb, regs_live);
2793 : :
2794 : 24211183 : bb_table[bb->index] = bb_info;
2795 : 24211183 : cselib_discard_hook = remove_useless_values;
2796 : :
2797 : 24211183 : if (bb->index >= NUM_FIXED_BLOCKS)
2798 : : {
2799 : 20228171 : rtx_insn *insn;
2800 : :
2801 : 20228171 : active_local_stores = NULL;
2802 : 20228171 : active_local_stores_len = 0;
2803 : 20228171 : cselib_clear_table ();
2804 : :
2805 : : /* Scan the insns. */
2806 : 255013561 : FOR_BB_INSNS (bb, insn)
2807 : : {
2808 : 234785390 : if (INSN_P (insn))
2809 : 195614948 : scan_insn (bb_info, insn, max_active_local_stores);
2810 : 234785390 : cselib_process_insn (insn);
2811 : 234785390 : if (INSN_P (insn))
2812 : 195614948 : df_simulate_one_insn_forwards (bb, insn, regs_live);
2813 : : }
2814 : :
2815 : : /* This is something of a hack, because the global algorithm
2816 : : is supposed to take care of the case where stores go dead
2817 : : at the end of the function. However, the global
2818 : : algorithm must take a more conservative view of block
2819 : : mode reads than the local alg does. So to get the case
2820 : : where you have a store to the frame followed by a non
2821 : : overlapping block more read, we look at the active local
2822 : : stores at the end of the function and delete all of the
2823 : : frame and spill based ones. */
2824 : 20228171 : if (stores_off_frame_dead_at_return
2825 : 20228171 : && (EDGE_COUNT (bb->succs) == 0
2826 : 19077608 : || (single_succ_p (bb)
2827 : 8957557 : && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2828 : 2092128 : && ! crtl->calls_eh_return)))
2829 : : {
2830 : 3151177 : insn_info_t i_ptr = active_local_stores;
2831 : 3531923 : while (i_ptr)
2832 : : {
2833 : 380746 : store_info *store_info = i_ptr->store_rec;
2834 : :
2835 : : /* Skip the clobbers. */
2836 : 380749 : while (!store_info->is_set)
2837 : 3 : store_info = store_info->next;
2838 : 380746 : if (store_info->group_id >= 0)
2839 : : {
2840 : 79851 : group_info *group = rtx_group_vec[store_info->group_id];
2841 : 79851 : if (group->frame_related && !i_ptr->cannot_delete)
2842 : 10616 : delete_dead_store_insn (i_ptr);
2843 : : }
2844 : :
2845 : 380746 : i_ptr = i_ptr->next_local_store;
2846 : : }
2847 : : }
2848 : :
2849 : : /* Get rid of the loads that were discovered in
2850 : : replace_read. Cselib is finished with this block. */
2851 : 20311009 : while (deferred_change_list)
2852 : : {
2853 : 82838 : deferred_change *next = deferred_change_list->next;
2854 : :
2855 : : /* There is no reason to validate this change. That was
2856 : : done earlier. */
2857 : 82838 : *deferred_change_list->loc = deferred_change_list->reg;
2858 : 82838 : deferred_change_pool.remove (deferred_change_list);
2859 : 82838 : deferred_change_list = next;
2860 : : }
2861 : :
2862 : : /* Get rid of all of the cselib based store_infos in this
2863 : : block and mark the containing insns as not being
2864 : : deletable. */
2865 : 20228171 : ptr = bb_info->last_insn;
2866 : 215843119 : while (ptr)
2867 : : {
2868 : 195614948 : if (ptr->contains_cselib_groups)
2869 : : {
2870 : 13874354 : store_info *s_info = ptr->store_rec;
2871 : 13874379 : while (s_info && !s_info->is_set)
2872 : 25 : s_info = s_info->next;
2873 : 13874354 : if (s_info
2874 : 13874354 : && s_info->redundant_reason
2875 : 45 : && s_info->redundant_reason->insn
2876 : 38 : && !ptr->cannot_delete)
2877 : : {
2878 : 38 : if (dump_file && (dump_flags & TDF_DETAILS))
2879 : 0 : fprintf (dump_file, "Locally deleting insn %d "
2880 : : "because insn %d stores the "
2881 : : "same value and couldn't be "
2882 : : "eliminated\n",
2883 : 0 : INSN_UID (ptr->insn),
2884 : 0 : INSN_UID (s_info->redundant_reason->insn));
2885 : 38 : delete_dead_store_insn (ptr);
2886 : : }
2887 : 13874354 : free_store_info (ptr);
2888 : : }
2889 : : else
2890 : : {
2891 : 181740594 : store_info *s_info;
2892 : :
2893 : : /* Free at least positions_needed bitmaps. */
2894 : 186789823 : for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2895 : 5049229 : if (s_info->is_large)
2896 : : {
2897 : 15617 : BITMAP_FREE (s_info->positions_needed.large.bmap);
2898 : 15617 : s_info->is_large = false;
2899 : : }
2900 : : }
2901 : 195614948 : ptr = ptr->prev_insn;
2902 : : }
2903 : :
2904 : 20228171 : cse_store_info_pool.release ();
2905 : : }
2906 : 24211183 : bb_info->regs_live = NULL;
2907 : : }
2908 : :
2909 : 1991506 : BITMAP_FREE (regs_live);
2910 : 1991506 : cselib_finish ();
2911 : 1991506 : rtx_group_table->empty ();
2912 : 1991506 : }
2913 : :
2914 : :
2915 : : /*----------------------------------------------------------------------------
2916 : : Second step.
2917 : :
2918 : : Assign each byte position in the stores that we are going to
2919 : : analyze globally to a position in the bitmaps. Returns true if
2920 : : there are any bit positions assigned.
2921 : : ----------------------------------------------------------------------------*/
2922 : :
2923 : : static void
2924 : 1991506 : dse_step2_init (void)
2925 : : {
2926 : 1991506 : unsigned int i;
2927 : 1991506 : group_info *group;
2928 : :
2929 : 7767370 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2930 : : {
2931 : : /* For all non stack related bases, we only consider a store to
2932 : : be deletable if there are two or more stores for that
2933 : : position. This is because it takes one store to make the
2934 : : other store redundant. However, for the stores that are
2935 : : stack related, we consider them if there is only one store
2936 : : for the position. We do this because the stack related
2937 : : stores can be deleted if their is no read between them and
2938 : : the end of the function.
2939 : :
2940 : : To make this work in the current framework, we take the stack
2941 : : related bases add all of the bits from store1 into store2.
2942 : : This has the effect of making the eligible even if there is
2943 : : only one store. */
2944 : :
2945 : 5775864 : if (stores_off_frame_dead_at_return && group->frame_related)
2946 : : {
2947 : 393902 : bitmap_ior_into (group->store2_n, group->store1_n);
2948 : 393902 : bitmap_ior_into (group->store2_p, group->store1_p);
2949 : 393902 : if (dump_file && (dump_flags & TDF_DETAILS))
2950 : 0 : fprintf (dump_file, "group %d is frame related ", i);
2951 : : }
2952 : :
2953 : 5775864 : group->offset_map_size_n++;
2954 : 5775864 : group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2955 : : group->offset_map_size_n);
2956 : 5775864 : group->offset_map_size_p++;
2957 : 5775864 : group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2958 : : group->offset_map_size_p);
2959 : 5775864 : group->process_globally = false;
2960 : 5775864 : if (dump_file && (dump_flags & TDF_DETAILS))
2961 : : {
2962 : 0 : fprintf (dump_file, "group %d(%d+%d): ", i,
2963 : 0 : (int)bitmap_count_bits (group->store2_n),
2964 : 0 : (int)bitmap_count_bits (group->store2_p));
2965 : 0 : bitmap_print (dump_file, group->store2_n, "n ", " ");
2966 : 0 : bitmap_print (dump_file, group->store2_p, "p ", "\n");
2967 : : }
2968 : : }
2969 : 1991506 : }
2970 : :
2971 : :
2972 : : /* Init the offset tables. */
2973 : :
2974 : : static bool
2975 : 1991506 : dse_step2 (void)
2976 : : {
2977 : 1991506 : unsigned int i;
2978 : 1991506 : group_info *group;
2979 : : /* Position 0 is unused because 0 is used in the maps to mean
2980 : : unused. */
2981 : 1991506 : current_position = 1;
2982 : 7767370 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2983 : : {
2984 : 5775864 : bitmap_iterator bi;
2985 : 5775864 : unsigned int j;
2986 : :
2987 : 5775864 : memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2988 : 5775864 : memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2989 : 5775864 : bitmap_clear (group->group_kill);
2990 : :
2991 : 37662577 : EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2992 : : {
2993 : 31886713 : bitmap_set_bit (group->group_kill, current_position);
2994 : 31886713 : if (bitmap_bit_p (group->escaped_n, j))
2995 : 21357814 : bitmap_set_bit (kill_on_calls, current_position);
2996 : 31886713 : group->offset_map_n[j] = current_position++;
2997 : 31886713 : group->process_globally = true;
2998 : : }
2999 : 7196894 : EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
3000 : : {
3001 : 1421030 : bitmap_set_bit (group->group_kill, current_position);
3002 : 1421030 : if (bitmap_bit_p (group->escaped_p, j))
3003 : 1330991 : bitmap_set_bit (kill_on_calls, current_position);
3004 : 1421030 : group->offset_map_p[j] = current_position++;
3005 : 1421030 : group->process_globally = true;
3006 : : }
3007 : : }
3008 : 1991506 : return current_position != 1;
3009 : : }
3010 : :
3011 : :
3012 : :
3013 : : /*----------------------------------------------------------------------------
3014 : : Third step.
3015 : :
3016 : : Build the bit vectors for the transfer functions.
3017 : : ----------------------------------------------------------------------------*/
3018 : :
3019 : :
3020 : : /* Look up the bitmap index for OFFSET in GROUP_INFO. If it is not
3021 : : there, return 0. */
3022 : :
3023 : : static int
3024 : 147475776 : get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
3025 : : {
3026 : 147475776 : if (offset < 0)
3027 : : {
3028 : 129708665 : HOST_WIDE_INT offset_p = -offset;
3029 : 129708665 : if (offset_p >= group_info->offset_map_size_n)
3030 : : return 0;
3031 : 126479230 : return group_info->offset_map_n[offset_p];
3032 : : }
3033 : : else
3034 : : {
3035 : 17767111 : if (offset >= group_info->offset_map_size_p)
3036 : : return 0;
3037 : 17328988 : return group_info->offset_map_p[offset];
3038 : : }
3039 : : }
3040 : :
3041 : :
3042 : : /* Process the STORE_INFOs into the bitmaps into GEN and KILL. KILL
3043 : : may be NULL. */
3044 : :
3045 : : static void
3046 : 156596117 : scan_stores (store_info *store_info, bitmap gen, bitmap kill)
3047 : : {
3048 : 165555708 : while (store_info)
3049 : : {
3050 : 8959591 : HOST_WIDE_INT i, offset, width;
3051 : 8959591 : group_info *group_info
3052 : 8959591 : = rtx_group_vec[store_info->group_id];
3053 : : /* We can (conservatively) ignore stores whose bounds aren't known;
3054 : : they simply don't generate new global dse opportunities. */
3055 : 8959591 : if (group_info->process_globally
3056 : 8758881 : && store_info->offset.is_constant (&offset)
3057 : 8959591 : && store_info->width.is_constant (&width))
3058 : : {
3059 : 8758881 : HOST_WIDE_INT end = offset + width;
3060 : 109249761 : for (i = offset; i < end; i++)
3061 : : {
3062 : 100490880 : int index = get_bitmap_index (group_info, i);
3063 : 100490880 : if (index != 0)
3064 : : {
3065 : 97052431 : bitmap_set_bit (gen, index);
3066 : 97052431 : if (kill)
3067 : 43805263 : bitmap_clear_bit (kill, index);
3068 : : }
3069 : : }
3070 : : }
3071 : 8959591 : store_info = store_info->next;
3072 : : }
3073 : 156596117 : }
3074 : :
3075 : :
3076 : : /* Process the READ_INFOs into the bitmaps into GEN and KILL. KILL
3077 : : may be NULL. */
3078 : :
3079 : : static void
3080 : 86429162 : scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
3081 : : {
3082 : 86429162 : read_info_t read_info = insn_info->read_rec;
3083 : 86429162 : int i;
3084 : 86429162 : group_info *group;
3085 : :
3086 : : /* If this insn reads the frame, kill all the frame related stores. */
3087 : 86429162 : if (insn_info->frame_read)
3088 : : {
3089 : 230734 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3090 : 219748 : if (group->process_globally && group->frame_related)
3091 : : {
3092 : 7542 : if (kill)
3093 : 3328 : bitmap_ior_into (kill, group->group_kill);
3094 : 7542 : bitmap_and_compl_into (gen, group->group_kill);
3095 : : }
3096 : : }
3097 : 86429162 : if (insn_info->non_frame_wild_read)
3098 : : {
3099 : : /* Kill all non-frame related stores. Kill all stores of variables that
3100 : : escape. */
3101 : 6578597 : if (kill)
3102 : 3134989 : bitmap_ior_into (kill, kill_on_calls);
3103 : 6578597 : bitmap_and_compl_into (gen, kill_on_calls);
3104 : 150772111 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3105 : 137614917 : if (group->process_globally && !group->frame_related)
3106 : : {
3107 : 4276839 : if (kill)
3108 : 1934383 : bitmap_ior_into (kill, group->group_kill);
3109 : 4276839 : bitmap_and_compl_into (gen, group->group_kill);
3110 : : }
3111 : : }
3112 : 99389033 : while (read_info)
3113 : : {
3114 : 350487890 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3115 : : {
3116 : 337528019 : if (group->process_globally)
3117 : : {
3118 : 21489358 : if (i == read_info->group_id)
3119 : : {
3120 : 5225480 : HOST_WIDE_INT offset, width;
3121 : : /* Reads with non-constant size kill all DSE opportunities
3122 : : in the group. */
3123 : 5225480 : if (!read_info->offset.is_constant (&offset)
3124 : 5225480 : || !read_info->width.is_constant (&width)
3125 : 5225480 : || !known_size_p (width))
3126 : : {
3127 : : /* Handle block mode reads. */
3128 : 28391 : if (kill)
3129 : 13905 : bitmap_ior_into (kill, group->group_kill);
3130 : 28391 : bitmap_and_compl_into (gen, group->group_kill);
3131 : : }
3132 : : else
3133 : : {
3134 : : /* The groups are the same, just process the
3135 : : offsets. */
3136 : 5197089 : HOST_WIDE_INT j;
3137 : 5197089 : HOST_WIDE_INT end = offset + width;
3138 : 48508116 : for (j = offset; j < end; j++)
3139 : : {
3140 : 43311027 : int index = get_bitmap_index (group, j);
3141 : 43311027 : if (index != 0)
3142 : : {
3143 : 33507839 : if (kill)
3144 : 16081182 : bitmap_set_bit (kill, index);
3145 : 33507839 : bitmap_clear_bit (gen, index);
3146 : : }
3147 : : }
3148 : : }
3149 : : }
3150 : : else
3151 : : {
3152 : : /* The groups are different, if the alias sets
3153 : : conflict, clear the entire group. We only need
3154 : : to apply this test if the read_info is a cselib
3155 : : read. Anything with a constant base cannot alias
3156 : : something else with a different constant
3157 : : base. */
3158 : 16263878 : if ((read_info->group_id < 0)
3159 : 24730468 : && canon_true_dependence (group->base_mem,
3160 : 8466590 : GET_MODE (group->base_mem),
3161 : : group->canon_base_addr,
3162 : 8466590 : read_info->mem, NULL_RTX))
3163 : : {
3164 : 4543136 : if (kill)
3165 : 2167460 : bitmap_ior_into (kill, group->group_kill);
3166 : 4543136 : bitmap_and_compl_into (gen, group->group_kill);
3167 : : }
3168 : : }
3169 : : }
3170 : : }
3171 : :
3172 : 12959871 : read_info = read_info->next;
3173 : : }
3174 : 86429162 : }
3175 : :
3176 : :
3177 : : /* Return the insn in BB_INFO before the first wild read or if there
3178 : : are no wild reads in the block, return the last insn. */
3179 : :
3180 : : static insn_info_t
3181 : 7567297 : find_insn_before_first_wild_read (bb_info_t bb_info)
3182 : : {
3183 : 7567297 : insn_info_t insn_info = bb_info->last_insn;
3184 : 7567297 : insn_info_t last_wild_read = NULL;
3185 : :
3186 : 87637391 : while (insn_info)
3187 : : {
3188 : 80177062 : if (insn_info->wild_read)
3189 : : {
3190 : 660328 : last_wild_read = insn_info->prev_insn;
3191 : : /* Block starts with wild read. */
3192 : 660328 : if (!last_wild_read)
3193 : : return NULL;
3194 : : }
3195 : :
3196 : 80070094 : insn_info = insn_info->prev_insn;
3197 : : }
3198 : :
3199 : 7460329 : if (last_wild_read)
3200 : : return last_wild_read;
3201 : : else
3202 : 7191567 : return bb_info->last_insn;
3203 : : }
3204 : :
3205 : :
3206 : : /* Scan the insns in BB_INFO starting at PTR and going to the top of
3207 : : the block in order to build the gen and kill sets for the block.
3208 : : We start at ptr which may be the last insn in the block or may be
3209 : : the first insn with a wild read. In the latter case we are able to
3210 : : skip the rest of the block because it just does not matter:
3211 : : anything that happens is hidden by the wild read. */
3212 : :
3213 : : static void
3214 : 7567297 : dse_step3_scan (basic_block bb)
3215 : : {
3216 : 7567297 : bb_info_t bb_info = bb_table[bb->index];
3217 : 7567297 : insn_info_t insn_info;
3218 : :
3219 : 7567297 : insn_info = find_insn_before_first_wild_read (bb_info);
3220 : :
3221 : : /* In the spill case or in the no_spill case if there is no wild
3222 : : read in the block, we will need a kill set. */
3223 : 7567297 : if (insn_info == bb_info->last_insn)
3224 : : {
3225 : 7191567 : if (bb_info->kill)
3226 : 0 : bitmap_clear (bb_info->kill);
3227 : : else
3228 : 7191567 : bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3229 : : }
3230 : : else
3231 : 375730 : if (bb_info->kill)
3232 : 0 : BITMAP_FREE (bb_info->kill);
3233 : :
3234 : 84079548 : while (insn_info)
3235 : : {
3236 : : /* There may have been code deleted by the dce pass run before
3237 : : this phase. */
3238 : 76512251 : if (insn_info->insn && INSN_P (insn_info->insn))
3239 : : {
3240 : 76485877 : scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3241 : 76485877 : scan_reads (insn_info, bb_info->gen, bb_info->kill);
3242 : : }
3243 : :
3244 : 76512251 : insn_info = insn_info->prev_insn;
3245 : : }
3246 : 7567297 : }
3247 : :
3248 : :
3249 : : /* Set the gen set of the exit block, and also any block with no
3250 : : successors that does not have a wild read. */
3251 : :
3252 : : static void
3253 : 241298 : dse_step3_exit_block_scan (bb_info_t bb_info)
3254 : : {
3255 : : /* The gen set is all 0's for the exit block except for the
3256 : : frame_pointer_group. */
3257 : :
3258 : 241298 : if (stores_off_frame_dead_at_return)
3259 : : {
3260 : : unsigned int i;
3261 : : group_info *group;
3262 : :
3263 : 2202290 : FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3264 : : {
3265 : 1962049 : if (group->process_globally && group->frame_related)
3266 : 215690 : bitmap_ior_into (bb_info->gen, group->group_kill);
3267 : : }
3268 : : }
3269 : 241298 : }
3270 : :
3271 : :
3272 : : /* Find all of the blocks that are not backwards reachable from the
3273 : : exit block or any block with no successors (BB). These are the
3274 : : infinite loops or infinite self loops. These blocks will still
3275 : : have their bits set in UNREACHABLE_BLOCKS. */
3276 : :
3277 : : static void
3278 : 12322470 : mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3279 : : {
3280 : 12322470 : edge e;
3281 : 12322470 : edge_iterator ei;
3282 : :
3283 : 12322470 : if (bitmap_bit_p (unreachable_blocks, bb->index))
3284 : : {
3285 : 8038872 : bitmap_clear_bit (unreachable_blocks, bb->index);
3286 : 19701210 : FOR_EACH_EDGE (e, ei, bb->preds)
3287 : : {
3288 : 11662338 : mark_reachable_blocks (unreachable_blocks, e->src);
3289 : : }
3290 : : }
3291 : 12322470 : }
3292 : :
3293 : : /* Build the transfer functions for the function. */
3294 : :
3295 : : static void
3296 : 241298 : dse_step3 ()
3297 : : {
3298 : 241298 : basic_block bb;
3299 : 241298 : sbitmap_iterator sbi;
3300 : 241298 : bitmap all_ones = NULL;
3301 : 241298 : unsigned int i;
3302 : :
3303 : 241298 : auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3304 : 241298 : bitmap_ones (unreachable_blocks);
3305 : :
3306 : 8291191 : FOR_ALL_BB_FN (bb, cfun)
3307 : : {
3308 : 8049893 : bb_info_t bb_info = bb_table[bb->index];
3309 : 8049893 : if (bb_info->gen)
3310 : 0 : bitmap_clear (bb_info->gen);
3311 : : else
3312 : 8049893 : bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3313 : :
3314 : 8049893 : if (bb->index == ENTRY_BLOCK)
3315 : : ;
3316 : 7808595 : else if (bb->index == EXIT_BLOCK)
3317 : 241298 : dse_step3_exit_block_scan (bb_info);
3318 : : else
3319 : 7567297 : dse_step3_scan (bb);
3320 : 8049893 : if (EDGE_COUNT (bb->succs) == 0)
3321 : 660132 : mark_reachable_blocks (unreachable_blocks, bb);
3322 : :
3323 : : /* If this is the second time dataflow is run, delete the old
3324 : : sets. */
3325 : 8049893 : if (bb_info->in)
3326 : 0 : BITMAP_FREE (bb_info->in);
3327 : 8049893 : if (bb_info->out)
3328 : 0 : BITMAP_FREE (bb_info->out);
3329 : : }
3330 : :
3331 : : /* For any block in an infinite loop, we must initialize the out set
3332 : : to all ones. This could be expensive, but almost never occurs in
3333 : : practice. However, it is common in regression tests. */
3334 : 1058107 : EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3335 : : {
3336 : 575511 : if (bitmap_bit_p (all_blocks, i))
3337 : : {
3338 : 11021 : bb_info_t bb_info = bb_table[i];
3339 : 11021 : if (!all_ones)
3340 : : {
3341 : 1490 : unsigned int j;
3342 : 1490 : group_info *group;
3343 : :
3344 : 1490 : all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3345 : 12683 : FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3346 : 9703 : bitmap_ior_into (all_ones, group->group_kill);
3347 : : }
3348 : 11021 : if (!bb_info->out)
3349 : : {
3350 : 11021 : bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3351 : 11021 : bitmap_copy (bb_info->out, all_ones);
3352 : : }
3353 : : }
3354 : : }
3355 : :
3356 : 241298 : if (all_ones)
3357 : 1490 : BITMAP_FREE (all_ones);
3358 : 241298 : }
3359 : :
3360 : :
3361 : :
3362 : : /*----------------------------------------------------------------------------
3363 : : Fourth step.
3364 : :
3365 : : Solve the bitvector equations.
3366 : : ----------------------------------------------------------------------------*/
3367 : :
3368 : :
3369 : : /* Confluence function for blocks with no successors. Create an out
3370 : : set from the gen set of the exit block. This block logically has
3371 : : the exit block as a successor. */
3372 : :
3373 : :
3374 : :
3375 : : static void
3376 : 660132 : dse_confluence_0 (basic_block bb)
3377 : : {
3378 : 660132 : bb_info_t bb_info = bb_table[bb->index];
3379 : :
3380 : 660132 : if (bb->index == EXIT_BLOCK)
3381 : : return;
3382 : :
3383 : 418834 : if (!bb_info->out)
3384 : : {
3385 : 418834 : bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3386 : 418834 : bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3387 : : }
3388 : : }
3389 : :
3390 : : /* Propagate the information from the in set of the dest of E to the
3391 : : out set of the src of E. If the various in or out sets are not
3392 : : there, that means they are all ones. */
3393 : :
3394 : : static bool
3395 : 12416648 : dse_confluence_n (edge e)
3396 : : {
3397 : 12416648 : bb_info_t src_info = bb_table[e->src->index];
3398 : 12416648 : bb_info_t dest_info = bb_table[e->dest->index];
3399 : :
3400 : 12416648 : if (dest_info->in)
3401 : : {
3402 : 11972767 : if (src_info->out)
3403 : 4594027 : bitmap_and_into (src_info->out, dest_info->in);
3404 : : else
3405 : : {
3406 : 7378740 : src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3407 : 7378740 : bitmap_copy (src_info->out, dest_info->in);
3408 : : }
3409 : : }
3410 : 12416648 : return true;
3411 : : }
3412 : :
3413 : :
3414 : : /* Propagate the info from the out to the in set of BB_INDEX's basic
3415 : : block. There are three cases:
3416 : :
3417 : : 1) The block has no kill set. In this case the kill set is all
3418 : : ones. It does not matter what the out set of the block is, none of
3419 : : the info can reach the top. The only thing that reaches the top is
3420 : : the gen set and we just copy the set.
3421 : :
3422 : : 2) There is a kill set but no out set and bb has successors. In
3423 : : this case we just return. Eventually an out set will be created and
3424 : : it is better to wait than to create a set of ones.
3425 : :
3426 : : 3) There is both a kill and out set. We apply the obvious transfer
3427 : : function.
3428 : : */
3429 : :
3430 : : static bool
3431 : 8716104 : dse_transfer_function (int bb_index)
3432 : : {
3433 : 8716104 : bb_info_t bb_info = bb_table[bb_index];
3434 : :
3435 : 8716104 : if (bb_info->kill)
3436 : : {
3437 : 7829162 : if (bb_info->out)
3438 : : {
3439 : : /* Case 3 above. */
3440 : 7829162 : if (bb_info->in)
3441 : 637595 : return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3442 : 637595 : bb_info->out, bb_info->kill);
3443 : : else
3444 : : {
3445 : 7191567 : bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3446 : 7191567 : bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3447 : 7191567 : bb_info->out, bb_info->kill);
3448 : 7191567 : return true;
3449 : : }
3450 : : }
3451 : : else
3452 : : /* Case 2 above. */
3453 : : return false;
3454 : : }
3455 : : else
3456 : : {
3457 : : /* Case 1 above. If there is already an in set, nothing
3458 : : happens. */
3459 : 886942 : if (bb_info->in)
3460 : : return false;
3461 : : else
3462 : : {
3463 : 858326 : bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3464 : 858326 : bitmap_copy (bb_info->in, bb_info->gen);
3465 : 858326 : return true;
3466 : : }
3467 : : }
3468 : : }
3469 : :
3470 : : /* Solve the dataflow equations. */
3471 : :
3472 : : static void
3473 : 241298 : dse_step4 (void)
3474 : : {
3475 : 241298 : df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3476 : : dse_confluence_n, dse_transfer_function,
3477 : : all_blocks, df_get_postorder (DF_BACKWARD),
3478 : : df_get_n_blocks (DF_BACKWARD));
3479 : 241298 : if (dump_file && (dump_flags & TDF_DETAILS))
3480 : : {
3481 : 0 : basic_block bb;
3482 : :
3483 : 0 : fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3484 : 0 : FOR_ALL_BB_FN (bb, cfun)
3485 : : {
3486 : 0 : bb_info_t bb_info = bb_table[bb->index];
3487 : :
3488 : 0 : df_print_bb_index (bb, dump_file);
3489 : 0 : if (bb_info->in)
3490 : 0 : bitmap_print (dump_file, bb_info->in, " in: ", "\n");
3491 : : else
3492 : 0 : fprintf (dump_file, " in: *MISSING*\n");
3493 : 0 : if (bb_info->gen)
3494 : 0 : bitmap_print (dump_file, bb_info->gen, " gen: ", "\n");
3495 : : else
3496 : 0 : fprintf (dump_file, " gen: *MISSING*\n");
3497 : 0 : if (bb_info->kill)
3498 : 0 : bitmap_print (dump_file, bb_info->kill, " kill: ", "\n");
3499 : : else
3500 : 0 : fprintf (dump_file, " kill: *MISSING*\n");
3501 : 0 : if (bb_info->out)
3502 : 0 : bitmap_print (dump_file, bb_info->out, " out: ", "\n");
3503 : : else
3504 : 0 : fprintf (dump_file, " out: *MISSING*\n\n");
3505 : : }
3506 : : }
3507 : 241298 : }
3508 : :
3509 : :
3510 : :
3511 : : /*----------------------------------------------------------------------------
3512 : : Fifth step.
3513 : :
3514 : : Delete the stores that can only be deleted using the global information.
3515 : : ----------------------------------------------------------------------------*/
3516 : :
3517 : :
3518 : : static void
3519 : 241298 : dse_step5 (void)
3520 : : {
3521 : 241298 : basic_block bb;
3522 : 7808595 : FOR_EACH_BB_FN (bb, cfun)
3523 : : {
3524 : 7567297 : bb_info_t bb_info = bb_table[bb->index];
3525 : 7567297 : insn_info_t insn_info = bb_info->last_insn;
3526 : 7567297 : bitmap v = bb_info->out;
3527 : :
3528 : 87744359 : while (insn_info)
3529 : : {
3530 : 80177062 : bool deleted = false;
3531 : 80177062 : if (dump_file && insn_info->insn)
3532 : : {
3533 : 21 : fprintf (dump_file, "starting to process insn %d\n",
3534 : 21 : INSN_UID (insn_info->insn));
3535 : 21 : bitmap_print (dump_file, v, " v: ", "\n");
3536 : : }
3537 : :
3538 : : /* There may have been code deleted by the dce pass run before
3539 : : this phase. */
3540 : 80177062 : if (insn_info->insn
3541 : 80150446 : && INSN_P (insn_info->insn)
3542 : 80150446 : && (!insn_info->cannot_delete)
3543 : 84522417 : && (!bitmap_empty_p (v)))
3544 : : {
3545 : 3022315 : store_info *store_info = insn_info->store_rec;
3546 : :
3547 : : /* Try to delete the current insn. */
3548 : 3022315 : deleted = true;
3549 : :
3550 : : /* Skip the clobbers. */
3551 : 3022315 : while (!store_info->is_set)
3552 : 0 : store_info = store_info->next;
3553 : :
3554 : 3022315 : HOST_WIDE_INT i, offset, width;
3555 : 3022315 : group_info *group_info = rtx_group_vec[store_info->group_id];
3556 : :
3557 : 3022315 : if (!store_info->offset.is_constant (&offset)
3558 : 3022315 : || !store_info->width.is_constant (&width))
3559 : : deleted = false;
3560 : : else
3561 : : {
3562 : 3022315 : HOST_WIDE_INT end = offset + width;
3563 : 3714075 : for (i = offset; i < end; i++)
3564 : : {
3565 : 3673869 : int index = get_bitmap_index (group_info, i);
3566 : :
3567 : 3673869 : if (dump_file && (dump_flags & TDF_DETAILS))
3568 : 0 : fprintf (dump_file, "i = %d, index = %d\n",
3569 : : (int) i, index);
3570 : 3673869 : if (index == 0 || !bitmap_bit_p (v, index))
3571 : : {
3572 : 2982109 : if (dump_file && (dump_flags & TDF_DETAILS))
3573 : 0 : fprintf (dump_file, "failing at i = %d\n",
3574 : : (int) i);
3575 : : deleted = false;
3576 : : break;
3577 : : }
3578 : : }
3579 : : }
3580 : 40206 : if (deleted)
3581 : : {
3582 : 40206 : if (dbg_cnt (dse)
3583 : 40206 : && check_for_inc_dec_1 (insn_info))
3584 : : {
3585 : 40206 : delete_insn (insn_info->insn);
3586 : 40206 : insn_info->insn = NULL;
3587 : 40206 : globally_deleted++;
3588 : : }
3589 : : }
3590 : : }
3591 : : /* We do want to process the local info if the insn was
3592 : : deleted. For instance, if the insn did a wild read, we
3593 : : no longer need to trash the info. */
3594 : 80177062 : if (insn_info->insn
3595 : 80110240 : && INSN_P (insn_info->insn)
3596 : 80110240 : && (!deleted))
3597 : : {
3598 : 80110240 : scan_stores (insn_info->store_rec, v, NULL);
3599 : 80110240 : if (insn_info->wild_read)
3600 : : {
3601 : 660328 : if (dump_file && (dump_flags & TDF_DETAILS))
3602 : 0 : fprintf (dump_file, "wild read\n");
3603 : 660328 : bitmap_clear (v);
3604 : : }
3605 : 79449912 : else if (insn_info->read_rec
3606 : 72876347 : || insn_info->non_frame_wild_read
3607 : 69506627 : || insn_info->frame_read)
3608 : : {
3609 : 9943285 : if (dump_file && (dump_flags & TDF_DETAILS))
3610 : : {
3611 : 0 : if (!insn_info->non_frame_wild_read
3612 : 0 : && !insn_info->frame_read)
3613 : 0 : fprintf (dump_file, "regular read\n");
3614 : 0 : if (insn_info->non_frame_wild_read)
3615 : 0 : fprintf (dump_file, "non-frame wild read\n");
3616 : 0 : if (insn_info->frame_read)
3617 : 0 : fprintf (dump_file, "frame read\n");
3618 : : }
3619 : 9943285 : scan_reads (insn_info, v, NULL);
3620 : : }
3621 : : }
3622 : :
3623 : 80177062 : insn_info = insn_info->prev_insn;
3624 : : }
3625 : : }
3626 : 241298 : }
3627 : :
3628 : :
3629 : :
3630 : : /*----------------------------------------------------------------------------
3631 : : Sixth step.
3632 : :
3633 : : Delete stores made redundant by earlier stores (which store the same
3634 : : value) that couldn't be eliminated.
3635 : : ----------------------------------------------------------------------------*/
3636 : :
3637 : : static void
3638 : 1991506 : dse_step6 (void)
3639 : : {
3640 : 1991506 : basic_block bb;
3641 : :
3642 : 26202689 : FOR_ALL_BB_FN (bb, cfun)
3643 : : {
3644 : 24211183 : bb_info_t bb_info = bb_table[bb->index];
3645 : 24211183 : insn_info_t insn_info = bb_info->last_insn;
3646 : :
3647 : 219826131 : while (insn_info)
3648 : : {
3649 : : /* There may have been code deleted by the dce pass run before
3650 : : this phase. */
3651 : 195614948 : if (insn_info->insn
3652 : 195539714 : && INSN_P (insn_info->insn)
3653 : 195539714 : && !insn_info->cannot_delete)
3654 : : {
3655 : 4559528 : store_info *s_info = insn_info->store_rec;
3656 : :
3657 : 4559528 : while (s_info && !s_info->is_set)
3658 : 0 : s_info = s_info->next;
3659 : 4559528 : if (s_info
3660 : 4559528 : && s_info->redundant_reason
3661 : 168 : && s_info->redundant_reason->insn
3662 : 22 : && INSN_P (s_info->redundant_reason->insn))
3663 : : {
3664 : 22 : rtx_insn *rinsn = s_info->redundant_reason->insn;
3665 : 22 : if (dump_file && (dump_flags & TDF_DETAILS))
3666 : 0 : fprintf (dump_file, "Locally deleting insn %d "
3667 : : "because insn %d stores the "
3668 : : "same value and couldn't be "
3669 : : "eliminated\n",
3670 : 0 : INSN_UID (insn_info->insn),
3671 : 0 : INSN_UID (rinsn));
3672 : 22 : delete_dead_store_insn (insn_info);
3673 : : }
3674 : : }
3675 : 195614948 : insn_info = insn_info->prev_insn;
3676 : : }
3677 : : }
3678 : 1991506 : }
3679 : :
3680 : : /*----------------------------------------------------------------------------
3681 : : Seventh step.
3682 : :
3683 : : Destroy everything left standing.
3684 : : ----------------------------------------------------------------------------*/
3685 : :
3686 : : static void
3687 : 1991506 : dse_step7 (void)
3688 : : {
3689 : 1991506 : bitmap_obstack_release (&dse_bitmap_obstack);
3690 : 1991506 : obstack_free (&dse_obstack, NULL);
3691 : :
3692 : 1991506 : end_alias_analysis ();
3693 : 1991506 : free (bb_table);
3694 : 1991506 : delete rtx_group_table;
3695 : 1991506 : rtx_group_table = NULL;
3696 : 1991506 : rtx_group_vec.release ();
3697 : 1991506 : BITMAP_FREE (all_blocks);
3698 : 1991506 : BITMAP_FREE (scratch);
3699 : :
3700 : 1991506 : rtx_store_info_pool.release ();
3701 : 1991506 : read_info_type_pool.release ();
3702 : 1991506 : insn_info_type_pool.release ();
3703 : 1991506 : dse_bb_info_type_pool.release ();
3704 : 1991506 : group_info_pool.release ();
3705 : 1991506 : deferred_change_pool.release ();
3706 : 1991506 : }
3707 : :
3708 : :
3709 : : /* -------------------------------------------------------------------------
3710 : : DSE
3711 : : ------------------------------------------------------------------------- */
3712 : :
3713 : : /* Callback for running pass_rtl_dse. */
3714 : :
3715 : : static unsigned int
3716 : 1991506 : rest_of_handle_dse (void)
3717 : : {
3718 : 1991506 : df_set_flags (DF_DEFER_INSN_RESCAN);
3719 : :
3720 : : /* Need the notes since we must track live hardregs in the forwards
3721 : : direction. */
3722 : 1991506 : df_note_add_problem ();
3723 : 1991506 : df_analyze ();
3724 : :
3725 : 1991506 : dse_step0 ();
3726 : 1991506 : dse_step1 ();
3727 : : /* DSE can eliminate potentially-trapping MEMs.
3728 : : Remove any EH edges associated with them, since otherwise
3729 : : DF_LR_RUN_DCE will complain later. */
3730 : 1979640 : if ((locally_deleted || globally_deleted)
3731 : 11866 : && cfun->can_throw_non_call_exceptions
3732 : 1997906 : && purge_all_dead_edges ())
3733 : : {
3734 : 0 : free_dominance_info (CDI_DOMINATORS);
3735 : 0 : delete_unreachable_blocks ();
3736 : : }
3737 : 1991506 : dse_step2_init ();
3738 : 1991506 : if (dse_step2 ())
3739 : : {
3740 : 241298 : df_set_flags (DF_LR_RUN_DCE);
3741 : 241298 : df_analyze ();
3742 : 241298 : if (dump_file && (dump_flags & TDF_DETAILS))
3743 : 0 : fprintf (dump_file, "doing global processing\n");
3744 : 241298 : dse_step3 ();
3745 : 241298 : dse_step4 ();
3746 : 241298 : dse_step5 ();
3747 : : }
3748 : :
3749 : 1991506 : dse_step6 ();
3750 : 1991506 : dse_step7 ();
3751 : :
3752 : 1991506 : if (dump_file)
3753 : 54 : fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3754 : : locally_deleted, globally_deleted);
3755 : :
3756 : : /* DSE can eliminate potentially-trapping MEMs.
3757 : : Remove any EH edges associated with them. */
3758 : 1979626 : if ((locally_deleted || globally_deleted)
3759 : 27168 : && cfun->can_throw_non_call_exceptions
3760 : 2009577 : && purge_all_dead_edges ())
3761 : : {
3762 : 0 : free_dominance_info (CDI_DOMINATORS);
3763 : 0 : cleanup_cfg (0);
3764 : : }
3765 : :
3766 : 1991506 : return 0;
3767 : : }
3768 : :
3769 : : namespace {
3770 : :
3771 : : const pass_data pass_data_rtl_dse1 =
3772 : : {
3773 : : RTL_PASS, /* type */
3774 : : "dse1", /* name */
3775 : : OPTGROUP_NONE, /* optinfo_flags */
3776 : : TV_DSE1, /* tv_id */
3777 : : 0, /* properties_required */
3778 : : 0, /* properties_provided */
3779 : : 0, /* properties_destroyed */
3780 : : 0, /* todo_flags_start */
3781 : : TODO_df_finish, /* todo_flags_finish */
3782 : : };
3783 : :
3784 : : class pass_rtl_dse1 : public rtl_opt_pass
3785 : : {
3786 : : public:
3787 : 280114 : pass_rtl_dse1 (gcc::context *ctxt)
3788 : 560228 : : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3789 : : {}
3790 : :
3791 : : /* opt_pass methods: */
3792 : 1415668 : bool gate (function *) final override
3793 : : {
3794 : 1415668 : return optimize > 0 && flag_dse && dbg_cnt (dse1);
3795 : : }
3796 : :
3797 : 995753 : unsigned int execute (function *) final override
3798 : : {
3799 : 995753 : return rest_of_handle_dse ();
3800 : : }
3801 : :
3802 : : }; // class pass_rtl_dse1
3803 : :
3804 : : } // anon namespace
3805 : :
3806 : : rtl_opt_pass *
3807 : 280114 : make_pass_rtl_dse1 (gcc::context *ctxt)
3808 : : {
3809 : 280114 : return new pass_rtl_dse1 (ctxt);
3810 : : }
3811 : :
3812 : : namespace {
3813 : :
3814 : : const pass_data pass_data_rtl_dse2 =
3815 : : {
3816 : : RTL_PASS, /* type */
3817 : : "dse2", /* name */
3818 : : OPTGROUP_NONE, /* optinfo_flags */
3819 : : TV_DSE2, /* tv_id */
3820 : : 0, /* properties_required */
3821 : : 0, /* properties_provided */
3822 : : 0, /* properties_destroyed */
3823 : : 0, /* todo_flags_start */
3824 : : TODO_df_finish, /* todo_flags_finish */
3825 : : };
3826 : :
3827 : : class pass_rtl_dse2 : public rtl_opt_pass
3828 : : {
3829 : : public:
3830 : 280114 : pass_rtl_dse2 (gcc::context *ctxt)
3831 : 560228 : : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3832 : : {}
3833 : :
3834 : : /* opt_pass methods: */
3835 : 1415668 : bool gate (function *) final override
3836 : : {
3837 : 1415668 : return optimize > 0 && flag_dse && dbg_cnt (dse2);
3838 : : }
3839 : :
3840 : 995753 : unsigned int execute (function *) final override
3841 : : {
3842 : 995753 : return rest_of_handle_dse ();
3843 : : }
3844 : :
3845 : : }; // class pass_rtl_dse2
3846 : :
3847 : : } // anon namespace
3848 : :
3849 : : rtl_opt_pass *
3850 : 280114 : make_pass_rtl_dse2 (gcc::context *ctxt)
3851 : : {
3852 : 280114 : return new pass_rtl_dse2 (ctxt);
3853 : : }
|