Line data Source code
1 : /* GIMPLE store merging and byte swapping passes.
2 : Copyright (C) 2009-2026 Free Software Foundation, Inc.
3 : Contributed by ARM Ltd.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* The purpose of the store merging pass is to combine multiple memory stores
22 : of constant values, values loaded from memory, bitwise operations on those,
23 : or bit-field values, to consecutive locations, into fewer wider stores.
24 :
25 : For example, if we have a sequence performing four byte stores to
26 : consecutive memory locations:
27 : [p ] := imm1;
28 : [p + 1B] := imm2;
29 : [p + 2B] := imm3;
30 : [p + 3B] := imm4;
31 : we can transform this into a single 4-byte store if the target supports it:
32 : [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 :
34 : Or:
35 : [p ] := [q ];
36 : [p + 1B] := [q + 1B];
37 : [p + 2B] := [q + 2B];
38 : [p + 3B] := [q + 3B];
39 : if there is no overlap can be transformed into a single 4-byte
40 : load followed by single 4-byte store.
41 :
42 : Or:
43 : [p ] := [q ] ^ imm1;
44 : [p + 1B] := [q + 1B] ^ imm2;
45 : [p + 2B] := [q + 2B] ^ imm3;
46 : [p + 3B] := [q + 3B] ^ imm4;
47 : if there is no overlap can be transformed into a single 4-byte
48 : load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 :
50 : Or:
51 : [p:1 ] := imm;
52 : [p:31] := val & 0x7FFFFFFF;
53 : we can transform this into a single 4-byte store if the target supports it:
54 : [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 :
56 : The algorithm is applied to each basic block in three phases:
57 :
58 : 1) Scan through the basic block and record assignments to destinations
59 : that can be expressed as a store to memory of a certain size at a certain
60 : bit offset from base expressions we can handle. For bit-fields we also
61 : record the surrounding bit region, i.e. bits that could be stored in
62 : a read-modify-write operation when storing the bit-field. Record store
63 : chains to different bases in a hash_map (m_stores) and make sure to
64 : terminate such chains when appropriate (for example when the stored
65 : values get used subsequently).
66 : These stores can be a result of structure element initializers, array stores
67 : etc. A store_immediate_info object is recorded for every such store.
68 : Record as many such assignments to a single base as possible until a
69 : statement that interferes with the store sequence is encountered.
70 : Each store has up to 2 operands, which can be a either constant, a memory
71 : load or an SSA name, from which the value to be stored can be computed.
72 : At most one of the operands can be a constant. The operands are recorded
73 : in store_operand_info struct.
74 :
75 : 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 : store_immediate_info objects) and coalesce contiguous stores into
77 : merged_store_group objects. For bit-field stores, we don't need to
78 : require the stores to be contiguous, just their surrounding bit regions
79 : have to be contiguous. If the expression being stored is different
80 : between adjacent stores, such as one store storing a constant and
81 : following storing a value loaded from memory, or if the loaded memory
82 : objects are not adjacent, a new merged_store_group is created as well.
83 :
84 : For example, given the stores:
85 : [p ] := 0;
86 : [p + 1B] := 1;
87 : [p + 3B] := 0;
88 : [p + 4B] := 1;
89 : [p + 5B] := 0;
90 : [p + 6B] := 0;
91 : This phase would produce two merged_store_group objects, one recording the
92 : two bytes stored in the memory region [p : p + 1] and another
93 : recording the four bytes stored in the memory region [p + 3 : p + 6].
94 :
95 : 3) The merged_store_group objects produced in phase 2) are processed
96 : to generate the sequence of wider stores that set the contiguous memory
97 : regions to the sequence of bytes that correspond to it. This may emit
98 : multiple stores per store group to handle contiguous stores that are not
99 : of a size that is a power of 2. For example it can try to emit a 40-bit
100 : store as a 32-bit store followed by an 8-bit store.
101 : We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 : or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 :
104 : Note on endianness and example:
105 : Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 : [p ] := 0x1234;
107 : [p + 2B] := 0x5678;
108 : [p + 4B] := 0xab;
109 : [p + 5B] := 0xcd;
110 :
111 : The memory layout for little-endian (LE) and big-endian (BE) must be:
112 : p |LE|BE|
113 : ---------
114 : 0 |34|12|
115 : 1 |12|34|
116 : 2 |78|56|
117 : 3 |56|78|
118 : 4 |ab|ab|
119 : 5 |cd|cd|
120 :
121 : To merge these into a single 48-bit merged value 'val' in phase 2)
122 : on little-endian we insert stores to higher (consecutive) bitpositions
123 : into the most significant bits of the merged value.
124 : The final merged value would be: 0xcdab56781234
125 :
126 : For big-endian we insert stores to higher bitpositions into the least
127 : significant bits of the merged value.
128 : The final merged value would be: 0x12345678abcd
129 :
130 : Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 : followed by a 16-bit store. Again, we must consider endianness when
132 : breaking down the 48-bit value 'val' computed above.
133 : For little endian we emit:
134 : [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 : [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136 :
137 : Whereas for big-endian we emit:
138 : [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 : [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140 :
141 : #include "config.h"
142 : #include "system.h"
143 : #include "coretypes.h"
144 : #include "backend.h"
145 : #include "tree.h"
146 : #include "gimple.h"
147 : #include "builtins.h"
148 : #include "fold-const.h"
149 : #include "tree-pass.h"
150 : #include "ssa.h"
151 : #include "gimple-pretty-print.h"
152 : #include "alias.h"
153 : #include "fold-const.h"
154 : #include "print-tree.h"
155 : #include "tree-hash-traits.h"
156 : #include "gimple-iterator.h"
157 : #include "gimplify.h"
158 : #include "gimple-fold.h"
159 : #include "stor-layout.h"
160 : #include "timevar.h"
161 : #include "cfganal.h"
162 : #include "cfgcleanup.h"
163 : #include "tree-cfg.h"
164 : #include "except.h"
165 : #include "tree-eh.h"
166 : #include "target.h"
167 : #include "gimplify-me.h"
168 : #include "rtl.h"
169 : #include "expr.h" /* For get_bit_range. */
170 : #include "optabs-tree.h"
171 : #include "dbgcnt.h"
172 : #include "selftest.h"
173 :
174 : /* The maximum size (in bits) of the stores this pass should generate. */
175 : #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 : #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 :
178 : /* Limit to bound the number of aliasing checks for loads with the same
179 : vuse as the corresponding store. */
180 : #define MAX_STORE_ALIAS_CHECKS 64
181 :
182 : namespace {
183 :
184 : struct bswap_stat
185 : {
186 : /* Number of hand-written 16-bit nop / bswaps found. */
187 : int found_16bit;
188 :
189 : /* Number of hand-written 32-bit nop / bswaps found. */
190 : int found_32bit;
191 :
192 : /* Number of hand-written 64-bit nop / bswaps found. */
193 : int found_64bit;
194 : } nop_stats, bswap_stats;
195 :
196 : /* A symbolic number structure is used to detect byte permutation and selection
197 : patterns of a source. To achieve that, its field N contains an artificial
198 : number consisting of BITS_PER_MARKER sized markers tracking where does each
199 : byte come from in the source:
200 :
201 : 0 - target byte has the value 0
202 : FF - target byte has an unknown value (eg. due to sign extension)
203 : 1..size - marker value is the byte index in the source (0 for lsb).
204 :
205 : To detect permutations on memory sources (arrays and structures), a symbolic
206 : number is also associated:
207 : - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 : - a range which gives the difference between the highest and lowest accessed
209 : memory location to make such a symbolic number;
210 : - the address SRC of the source element of lowest address as a convenience
211 : to easily get BASE_ADDR + offset + lowest bytepos;
212 : - number of expressions N_OPS bitwise ored together to represent
213 : approximate cost of the computation.
214 :
215 : Note 1: the range is different from size as size reflects the size of the
216 : type of the current expression. For instance, for an array char a[],
217 : (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 : (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 : time a range of 1.
220 :
221 : Note 2: for non-memory sources, range holds the same value as size.
222 :
223 : Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224 :
225 : struct symbolic_number {
226 : uint64_t n;
227 : tree type;
228 : tree base_addr;
229 : tree offset;
230 : poly_int64 bytepos;
231 : tree src;
232 : tree alias_set;
233 : tree vuse;
234 : unsigned HOST_WIDE_INT range;
235 : int n_ops;
236 : };
237 :
238 : #define BITS_PER_MARKER 8
239 : #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 : #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 : #define HEAD_MARKER(n, size) \
242 : ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 :
244 : /* The number which the find_bswap_or_nop_1 result should match in
245 : order to have a nop. The number is masked according to the size of
246 : the symbolic number before using it. */
247 : #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 : (uint64_t)0x08070605 << 32 | 0x04030201)
249 :
250 : /* The number which the find_bswap_or_nop_1 result should match in
251 : order to have a byte swap. The number is masked according to the
252 : size of the symbolic number before using it. */
253 : #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 : (uint64_t)0x01020304 << 32 | 0x05060708)
255 :
256 : /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 : number N. Return false if the requested operation is not permitted
258 : on a symbolic number. */
259 :
260 : inline bool
261 192510 : do_shift_rotate (enum tree_code code,
262 : struct symbolic_number *n,
263 : int count)
264 : {
265 192510 : int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 192510 : uint64_t head_marker;
267 :
268 192510 : if (count < 0
269 192510 : || count >= TYPE_PRECISION (n->type)
270 385020 : || count % BITS_PER_UNIT != 0)
271 : return false;
272 149932 : count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 :
274 : /* Zero out the extra bits of N in order to avoid them being shifted
275 : into the significant bits. */
276 149932 : if (size < 64 / BITS_PER_MARKER)
277 43148 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 :
279 149932 : switch (code)
280 : {
281 121137 : case LSHIFT_EXPR:
282 121137 : n->n <<= count;
283 121137 : break;
284 25939 : case RSHIFT_EXPR:
285 25939 : head_marker = HEAD_MARKER (n->n, size);
286 25939 : n->n >>= count;
287 : /* Arithmetic shift of signed type: result is dependent on the value. */
288 25939 : if (!TYPE_UNSIGNED (n->type) && head_marker)
289 2683 : for (i = 0; i < count / BITS_PER_MARKER; i++)
290 1744 : n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 1744 : << ((size - 1 - i) * BITS_PER_MARKER);
292 : break;
293 25 : case LROTATE_EXPR:
294 25 : n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 25 : break;
296 2831 : case RROTATE_EXPR:
297 2831 : n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 2831 : break;
299 : default:
300 : return false;
301 : }
302 : /* Zero unused bits for size. */
303 149932 : if (size < 64 / BITS_PER_MARKER)
304 43148 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 : return true;
306 : }
307 :
308 : /* Perform sanity checking for the symbolic number N and the gimple
309 : statement STMT. */
310 :
311 : inline bool
312 342434 : verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 : {
314 342434 : tree lhs_type;
315 :
316 342434 : lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
317 :
318 342434 : if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 342434 : && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 : return false;
321 :
322 329925 : if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 0 : return false;
324 :
325 : return true;
326 : }
327 :
328 : /* Initialize the symbolic number N for the bswap pass from the base element
329 : SRC manipulated by the bitwise OR expression. */
330 :
331 : bool
332 1132437 : init_symbolic_number (struct symbolic_number *n, tree src)
333 : {
334 1132437 : int size;
335 :
336 1132437 : if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 : return false;
338 :
339 981379 : n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 981379 : n->src = src;
341 :
342 : /* Set up the symbolic number N by setting each byte to a value between 1 and
343 : the byte size of rhs1. The highest order byte is set to n->size and the
344 : lowest order byte to 1. */
345 981379 : n->type = TREE_TYPE (src);
346 981379 : size = TYPE_PRECISION (n->type);
347 981379 : if (size % BITS_PER_UNIT != 0)
348 : return false;
349 966838 : size /= BITS_PER_UNIT;
350 966838 : if (size > 64 / BITS_PER_MARKER)
351 : return false;
352 965894 : n->range = size;
353 965894 : n->n = CMPNOP;
354 965894 : n->n_ops = 1;
355 :
356 965894 : if (size < 64 / BITS_PER_MARKER)
357 446258 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 :
359 : return true;
360 : }
361 :
362 : /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 : the answer. If so, REF is that memory source and the base of the memory area
364 : accessed and the offset of the access from that base are recorded in N. */
365 :
366 : static bool
367 5229117 : find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 : {
369 : /* Leaf node is an array or component ref. Memorize its base and
370 : offset from base to compare to other such leaf node. */
371 5229117 : poly_int64 bitsize, bitpos, bytepos;
372 5229117 : machine_mode mode;
373 5229117 : int unsignedp, reversep, volatilep;
374 5229117 : tree offset, base_addr;
375 :
376 : /* Not prepared to handle PDP endian. */
377 5229117 : if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 : return false;
379 :
380 6131540 : if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 : return false;
382 :
383 897195 : base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 : &unsignedp, &reversep, &volatilep);
385 :
386 897195 : if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 : /* Do not rewrite TARGET_MEM_REF. */
388 : return false;
389 849699 : else if (TREE_CODE (base_addr) == MEM_REF)
390 : {
391 470423 : poly_offset_int bit_offset = 0;
392 470423 : tree off = TREE_OPERAND (base_addr, 1);
393 :
394 470423 : if (!integer_zerop (off))
395 : {
396 103186 : poly_offset_int boff = mem_ref_offset (base_addr);
397 103186 : boff <<= LOG2_BITS_PER_UNIT;
398 103186 : bit_offset += boff;
399 : }
400 :
401 470423 : base_addr = TREE_OPERAND (base_addr, 0);
402 :
403 : /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 470423 : if (maybe_lt (bit_offset, 0))
405 : {
406 4290 : tree byte_offset = wide_int_to_tree
407 4290 : (sizetype, bits_to_bytes_round_down (bit_offset));
408 4290 : bit_offset = num_trailing_bits (bit_offset);
409 4290 : if (offset)
410 0 : offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 : else
412 4290 : offset = byte_offset;
413 : }
414 :
415 470423 : bitpos += bit_offset.force_shwi ();
416 : }
417 : else
418 379276 : base_addr = build_fold_addr_expr (base_addr);
419 :
420 5367463 : if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 : return false;
422 848732 : if (!multiple_p (bitsize, BITS_PER_UNIT))
423 : return false;
424 847456 : if (reversep)
425 : return false;
426 :
427 847447 : if (!init_symbolic_number (n, ref))
428 : return false;
429 710386 : n->base_addr = base_addr;
430 710386 : n->offset = offset;
431 710386 : n->bytepos = bytepos;
432 710386 : n->alias_set = reference_alias_ptr_type (ref);
433 710386 : n->vuse = gimple_vuse (stmt);
434 710386 : return true;
435 : }
436 :
437 : /* Compute the symbolic number N representing the result of a bitwise OR,
438 : bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 : are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
440 :
441 : gimple *
442 157079 : perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 : gimple *source_stmt2, struct symbolic_number *n2,
444 : struct symbolic_number *n, enum tree_code code)
445 : {
446 157079 : int i, size;
447 157079 : uint64_t mask;
448 157079 : gimple *source_stmt;
449 157079 : struct symbolic_number *n_start;
450 :
451 157079 : tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 157079 : if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 157079 : && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 385 : rhs1 = TREE_OPERAND (rhs1, 0);
455 157079 : tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 157079 : if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 157079 : && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 380 : rhs2 = TREE_OPERAND (rhs2, 0);
459 :
460 : /* Sources are different, cancel bswap if they are not memory location with
461 : the same base (array, structure, ...). */
462 157079 : if (rhs1 != rhs2)
463 : {
464 149456 : uint64_t inc;
465 149456 : HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 149456 : struct symbolic_number *toinc_n_ptr, *n_end;
467 149456 : basic_block bb1, bb2;
468 :
469 122370 : if (!n1->base_addr || !n2->base_addr
470 271813 : || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 84247 : return NULL;
472 :
473 65209 : if (!n1->offset != !n2->offset
474 65209 : || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 3432 : return NULL;
476 :
477 61777 : start1 = 0;
478 61777 : if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 : return NULL;
480 :
481 61777 : if (start1 < start2)
482 : {
483 : n_start = n1;
484 : start_sub = start2 - start1;
485 : }
486 : else
487 : {
488 14547 : n_start = n2;
489 14547 : start_sub = start1 - start2;
490 : }
491 :
492 61777 : bb1 = gimple_bb (source_stmt1);
493 61777 : bb2 = gimple_bb (source_stmt2);
494 61777 : if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 : source_stmt = source_stmt1;
496 : else
497 4630 : source_stmt = source_stmt2;
498 :
499 : /* Find the highest address at which a load is performed and
500 : compute related info. */
501 61777 : end1 = start1 + (n1->range - 1);
502 61777 : end2 = start2 + (n2->range - 1);
503 61777 : if (end1 < end2)
504 : {
505 61777 : end = end2;
506 : end_sub = end2 - end1;
507 : }
508 : else
509 : {
510 : end = end1;
511 : end_sub = end1 - end2;
512 : }
513 61777 : n_end = (end2 > end1) ? n2 : n1;
514 :
515 : /* Find symbolic number whose lsb is the most significant. */
516 61777 : if (BYTES_BIG_ENDIAN)
517 : toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 : else
519 61777 : toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 :
521 61777 : n->range = end - MIN (start1, start2) + 1;
522 :
523 : /* Check that the range of memory covered can be represented by
524 : a symbolic number. */
525 61777 : if (n->range > 64 / BITS_PER_MARKER)
526 : return NULL;
527 :
528 : /* Reinterpret byte marks in symbolic number holding the value of
529 : bigger weight according to target endianness. */
530 47412 : inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 47412 : size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 372243 : for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 : {
534 324831 : unsigned marker
535 324831 : = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 324831 : if (marker && marker != MARKER_BYTE_UNKNOWN)
537 123360 : toinc_n_ptr->n += inc;
538 : }
539 : }
540 : else
541 : {
542 7623 : n->range = n1->range;
543 7623 : n_start = n1;
544 7623 : source_stmt = source_stmt1;
545 : }
546 :
547 55035 : if (!n1->alias_set
548 55035 : || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 47434 : n->alias_set = n1->alias_set;
550 : else
551 7601 : n->alias_set = ptr_type_node;
552 55035 : n->vuse = n_start->vuse;
553 55035 : n->base_addr = n_start->base_addr;
554 55035 : n->offset = n_start->offset;
555 55035 : n->src = n_start->src;
556 55035 : n->bytepos = n_start->bytepos;
557 55035 : n->type = n_start->type;
558 55035 : size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 55035 : uint64_t res_n = n1->n | n2->n;
560 :
561 425631 : for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 : {
563 371852 : uint64_t masked1, masked2;
564 :
565 371852 : masked1 = n1->n & mask;
566 371852 : masked2 = n2->n & mask;
567 : /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 371852 : if (masked1 && masked2)
569 : {
570 : /* + can carry into upper bits, just punt. */
571 6458 : if (code == PLUS_EXPR)
572 : return NULL;
573 : /* x | x is still x. */
574 5202 : if (code == BIT_IOR_EXPR && masked1 == masked2)
575 220 : continue;
576 4982 : if (code == BIT_XOR_EXPR)
577 : {
578 : /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 : unknown values and unknown ^ unknown is unknown. */
580 946 : if (masked1 == masked2
581 196 : && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 134 : << i * BITS_PER_MARKER))
583 : {
584 62 : res_n &= ~mask;
585 62 : continue;
586 : }
587 : }
588 : /* Otherwise set the byte to unknown, it might still be
589 : later masked off. */
590 4920 : res_n |= mask;
591 : }
592 : }
593 53779 : n->n = res_n;
594 53779 : n->n_ops = n1->n_ops + n2->n_ops;
595 :
596 53779 : return source_stmt;
597 : }
598 :
599 : /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 : the operation given by the rhs of STMT on the result. If the operation
601 : could successfully be executed the function returns a gimple stmt whose
602 : rhs's first tree is the expression of the source operand and NULL
603 : otherwise. */
604 :
605 : gimple *
606 6214587 : find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607 : {
608 6214587 : enum tree_code code;
609 6214587 : tree rhs1, rhs2 = NULL;
610 6214587 : gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 6214587 : enum gimple_rhs_class rhs_class;
612 :
613 6214587 : if (!limit
614 6162919 : || !is_gimple_assign (stmt)
615 11450725 : || stmt_can_throw_internal (cfun, stmt))
616 985470 : return NULL;
617 :
618 5229117 : rhs1 = gimple_assign_rhs1 (stmt);
619 :
620 5229117 : if (find_bswap_or_nop_load (stmt, rhs1, n))
621 : return stmt;
622 :
623 : /* Handle BIT_FIELD_REF. */
624 4518731 : if (TREE_CODE (rhs1) == BIT_FIELD_REF
625 4518731 : && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
626 : {
627 12120 : if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
628 12120 : || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
629 : return NULL;
630 :
631 12120 : unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
632 12120 : unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
633 12120 : if (bitpos % BITS_PER_UNIT == 0
634 12120 : && bitsize % BITS_PER_UNIT == 0
635 24240 : && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
636 : {
637 : /* Handle big-endian bit numbering in BIT_FIELD_REF. */
638 548 : if (BYTES_BIG_ENDIAN)
639 : bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
640 :
641 : /* Shift. */
642 548 : if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
643 : return NULL;
644 :
645 : /* Mask. */
646 : uint64_t mask = 0;
647 1367 : for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; i++)
648 819 : mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 548 : n->n &= mask;
650 :
651 : /* Convert. */
652 548 : n->type = TREE_TYPE (rhs1);
653 548 : if (!verify_symbolic_number_p (n, stmt))
654 : return NULL;
655 :
656 536 : if (!n->base_addr)
657 536 : n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
658 :
659 536 : return stmt;
660 : }
661 :
662 11572 : return NULL;
663 : }
664 :
665 4506611 : if (TREE_CODE (rhs1) != SSA_NAME)
666 : return NULL;
667 :
668 4203531 : code = gimple_assign_rhs_code (stmt);
669 4203531 : rhs_class = gimple_assign_rhs_class (stmt);
670 4203531 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
671 :
672 4203531 : if (rhs_class == GIMPLE_BINARY_RHS)
673 3890754 : rhs2 = gimple_assign_rhs2 (stmt);
674 :
675 : /* Handle unary rhs and binary rhs with integer constants as second
676 : operand. */
677 :
678 4203531 : if (rhs_class == GIMPLE_UNARY_RHS
679 3894195 : || (rhs_class == GIMPLE_BINARY_RHS
680 3890754 : && TREE_CODE (rhs2) == INTEGER_CST))
681 : {
682 1958555 : if (code != BIT_AND_EXPR
683 1958555 : && code != LSHIFT_EXPR
684 : && code != RSHIFT_EXPR
685 1881140 : && code != LROTATE_EXPR
686 1824442 : && code != RROTATE_EXPR
687 1818787 : && !CONVERT_EXPR_CODE_P (code))
688 : return NULL;
689 :
690 409096 : source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
691 :
692 : /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
693 : we have to initialize the symbolic number. */
694 409096 : if (!source_stmt1)
695 : {
696 272870 : if (gimple_assign_load_p (stmt)
697 272870 : || !init_symbolic_number (n, rhs1))
698 17910 : return NULL;
699 : source_stmt1 = stmt;
700 : }
701 :
702 391186 : switch (code)
703 : {
704 42207 : case BIT_AND_EXPR:
705 42207 : {
706 42207 : int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
707 42207 : uint64_t val = int_cst_value (rhs2), mask = 0;
708 42207 : uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
709 :
710 : /* Only constants masking full bytes are allowed. */
711 230071 : for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
712 209301 : if ((val & tmp) != 0 && (val & tmp) != tmp)
713 : return NULL;
714 187864 : else if (val & tmp)
715 96635 : mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
716 :
717 20770 : n->n &= mask;
718 : }
719 20770 : break;
720 97060 : case LSHIFT_EXPR:
721 97060 : case RSHIFT_EXPR:
722 97060 : case LROTATE_EXPR:
723 97060 : case RROTATE_EXPR:
724 97060 : if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
725 : return NULL;
726 : break;
727 251919 : CASE_CONVERT:
728 251919 : {
729 251919 : int i, type_size, old_type_size;
730 251919 : tree type;
731 :
732 251919 : type = TREE_TYPE (gimple_assign_lhs (stmt));
733 251919 : type_size = TYPE_PRECISION (type);
734 251919 : if (type_size % BITS_PER_UNIT != 0)
735 : return NULL;
736 249177 : type_size /= BITS_PER_UNIT;
737 249177 : if (type_size > 64 / BITS_PER_MARKER)
738 : return NULL;
739 :
740 : /* Sign extension: result is dependent on the value. */
741 248603 : old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
742 353281 : if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
743 286461 : && HEAD_MARKER (n->n, old_type_size))
744 181680 : for (i = 0; i < type_size - old_type_size; i++)
745 143955 : n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
746 143955 : << ((type_size - 1 - i) * BITS_PER_MARKER);
747 :
748 248603 : if (type_size < 64 / BITS_PER_MARKER)
749 : {
750 : /* If STMT casts to a smaller type mask out the bits not
751 : belonging to the target type. */
752 121068 : n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
753 : }
754 248603 : n->type = type;
755 248603 : if (!n->base_addr)
756 167852 : n->range = type_size;
757 : }
758 : break;
759 : default:
760 : return NULL;
761 323855 : };
762 323855 : return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
763 : }
764 :
765 : /* Handle binary rhs. */
766 :
767 2241535 : if (rhs_class == GIMPLE_BINARY_RHS)
768 : {
769 2241535 : struct symbolic_number n1, n2;
770 2241535 : gimple *source_stmt, *source_stmt2;
771 :
772 2241535 : if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
773 : return NULL;
774 :
775 2182652 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
776 :
777 2182652 : switch (code)
778 : {
779 1944031 : case BIT_IOR_EXPR:
780 1944031 : case BIT_XOR_EXPR:
781 1944031 : case PLUS_EXPR:
782 1944031 : source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
783 :
784 1944031 : if (!source_stmt1)
785 : return NULL;
786 :
787 265160 : source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
788 :
789 265160 : if (!source_stmt2)
790 : return NULL;
791 :
792 133935 : if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
793 : return NULL;
794 :
795 133935 : if (n1.vuse != n2.vuse)
796 : return NULL;
797 :
798 106498 : source_stmt
799 106498 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
800 : code);
801 :
802 106498 : if (!source_stmt)
803 : return NULL;
804 :
805 18031 : if (!verify_symbolic_number_p (n, stmt))
806 : return NULL;
807 :
808 : break;
809 : default:
810 : return NULL;
811 : }
812 : return source_stmt;
813 : }
814 : return NULL;
815 : }
816 :
817 : /* Like find_bswap_or_nop_1, but also handles CONSTRUCTOR and
818 : VEC_PACK_TRUNC_EXPR. */
819 :
820 : gimple *
821 2091151 : find_bswap_or_nop_2 (gimple *stmt, struct symbolic_number *n, int limit)
822 : {
823 2091151 : if (gimple *ret = find_bswap_or_nop_1 (stmt, n, limit))
824 : return ret;
825 :
826 2083010 : if (!limit
827 2083010 : || !is_gimple_assign (stmt)
828 2083010 : || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
829 4166008 : || stmt_can_throw_internal (cfun, stmt))
830 46 : return NULL;
831 :
832 2082964 : tree rhs1 = gimple_assign_rhs1 (stmt);
833 2082964 : if (gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR
834 2082964 : && TREE_CODE (rhs1) == SSA_NAME)
835 : {
836 1220 : if (BYTES_BIG_ENDIAN)
837 : return NULL; /* For now... */
838 1220 : gimple *rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
839 1220 : tree rhs2 = gimple_assign_rhs2 (stmt);
840 1220 : if (TREE_CODE (rhs2) != SSA_NAME)
841 : return NULL;
842 1220 : gimple *rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
843 1220 : struct symbolic_number n1, n2;
844 :
845 1220 : gimple *source_stmt1 = find_bswap_or_nop_2 (rhs1_stmt, &n1, limit - 1);
846 1220 : if (!source_stmt1)
847 : return NULL;
848 59 : gimple *source_stmt2 = find_bswap_or_nop_2 (rhs2_stmt, &n2, limit - 1);
849 59 : if (!source_stmt2)
850 : return NULL;
851 :
852 59 : if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
853 : return NULL;
854 59 : if (n1.vuse != n2.vuse)
855 : return NULL;
856 59 : auto nn2 = n2.n;
857 59 : n2.n = 0;
858 : /* We need to take into account number of elements of each vector,
859 : which perform_symbolic_merge doesn't know. So, handle it as
860 : two separate BIT_IOR_EXPR merges, each time with one operand
861 : with changed mastk to all 0s, and then merge here. */
862 59 : gimple *source_stmt
863 59 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
864 : BIT_IOR_EXPR);
865 59 : if (!source_stmt)
866 : return NULL;
867 59 : n2.n = nn2;
868 59 : auto nn1 = n->n;
869 59 : n1.n = 0;
870 59 : gimple *source_stmt3
871 59 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
872 : BIT_IOR_EXPR);
873 59 : gcc_assert (source_stmt == source_stmt3);
874 59 : nn2 = n->n;
875 59 : tree lhs = gimple_assign_lhs (stmt);
876 59 : int eltsize
877 59 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) / BITS_PER_UNIT;
878 59 : int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs)).to_constant ();
879 59 : int size = eltsize * nelts;
880 59 : int hsize = size / 2;
881 59 : n->n = 0;
882 59 : if (!BYTES_BIG_ENDIAN)
883 407 : for (int i = 0; i < size; i++)
884 348 : n->n |= ((((i < hsize ? nn1 : nn2)
885 348 : >> (((i % hsize) / eltsize * 2 * eltsize
886 348 : + (i % eltsize))) * BITS_PER_MARKER) & MARKER_MASK)
887 348 : << (i * BITS_PER_MARKER));
888 : else
889 : gcc_unreachable ();
890 : return source_stmt;
891 : }
892 :
893 2081744 : if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
894 : return NULL;
895 :
896 26485 : tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
897 26485 : unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
898 26485 : if (sz != 16 && sz != 32 && sz != 64)
899 : return NULL;
900 2100173 : if (CONSTRUCTOR_NELTS (rhs1) == 0)
901 : return NULL;
902 22129 : tree eltype = TREE_TYPE (TREE_TYPE (rhs1));
903 22129 : unsigned HOST_WIDE_INT eltsz = int_size_in_bytes (eltype) * BITS_PER_UNIT;
904 22129 : if (TYPE_PRECISION (eltype) != eltsz)
905 : return NULL;
906 21989 : constructor_elt *elt;
907 21989 : unsigned int i;
908 21989 : tree type = build_nonstandard_integer_type (sz, 1);
909 21989 : gimple *ins_stmt = NULL;
910 38649 : FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs1), i, elt)
911 : {
912 34903 : if (TREE_CODE (elt->value) != SSA_NAME
913 34903 : || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
914 18243 : return NULL;
915 34195 : struct symbolic_number n1;
916 34195 : gimple *source_stmt
917 34195 : = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1, limit - 1);
918 :
919 34195 : if (!source_stmt)
920 : return NULL;
921 :
922 23855 : n1.type = type;
923 23855 : if (!n1.base_addr)
924 3791 : n1.range = sz / BITS_PER_UNIT;
925 :
926 23855 : if (i == 0)
927 : {
928 11494 : ins_stmt = source_stmt;
929 11494 : *n = n1;
930 : }
931 : else
932 : {
933 12361 : if (n->vuse != n1.vuse)
934 7195 : return NULL;
935 :
936 7519 : struct symbolic_number n0 = *n;
937 :
938 7519 : if (!BYTES_BIG_ENDIAN)
939 : {
940 7519 : if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
941 : return NULL;
942 : }
943 : else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
944 : return NULL;
945 7519 : ins_stmt
946 7519 : = perform_symbolic_merge (ins_stmt, &n0, source_stmt,
947 : &n1, n, BIT_IOR_EXPR);
948 :
949 7519 : if (!ins_stmt)
950 : return NULL;
951 : }
952 : }
953 : return ins_stmt;
954 : }
955 :
956 : /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
957 : *CMPXCHG, *CMPNOP and adjust *N. */
958 :
959 : void
960 35711 : find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
961 : uint64_t *cmpnop, bool *cast64_to_32)
962 : {
963 35711 : unsigned rsize;
964 35711 : uint64_t tmpn, mask;
965 :
966 : /* The number which the find_bswap_or_nop_1 result should match in order
967 : to have a full byte swap. The number is shifted to the right
968 : according to the size of the symbolic number before using it. */
969 35711 : *cmpxchg = CMPXCHG;
970 35711 : *cmpnop = CMPNOP;
971 35711 : *cast64_to_32 = false;
972 :
973 : /* Find real size of result (highest non-zero byte). */
974 35711 : if (n->base_addr)
975 253502 : for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
976 : else
977 1835 : rsize = n->range;
978 :
979 : /* Zero out the bits corresponding to untouched bytes in original gimple
980 : expression. */
981 35711 : if (n->range < (int) sizeof (int64_t))
982 : {
983 11544 : mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
984 11544 : if (n->base_addr == NULL
985 993 : && n->range == 4
986 12252 : && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
987 : {
988 : /* If all bytes in n->n are either 0 or in [5..8] range, this
989 : might be a candidate for (unsigned) __builtin_bswap64 (src).
990 : It is not worth it for (unsigned short) __builtin_bswap64 (src)
991 : or (unsigned short) __builtin_bswap32 (src). */
992 129 : *cast64_to_32 = true;
993 269 : for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
994 232 : if ((tmpn & MARKER_MASK)
995 232 : && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
996 : {
997 92 : *cast64_to_32 = false;
998 92 : break;
999 : }
1000 : }
1001 11544 : if (*cast64_to_32)
1002 37 : *cmpxchg &= mask;
1003 : else
1004 11507 : *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
1005 11544 : *cmpnop &= mask;
1006 : }
1007 :
1008 : /* Zero out the bits corresponding to unused bytes in the result of the
1009 : gimple expression. */
1010 35711 : if (rsize < n->range)
1011 : {
1012 2562 : if (BYTES_BIG_ENDIAN)
1013 : {
1014 : mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
1015 : *cmpxchg &= mask;
1016 : if (n->range - rsize == sizeof (int64_t))
1017 : *cmpnop = 0;
1018 : else
1019 : *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
1020 : }
1021 : else
1022 : {
1023 2562 : mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
1024 2562 : if (n->range - rsize == sizeof (int64_t))
1025 6 : *cmpxchg = 0;
1026 : else
1027 2556 : *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
1028 2562 : *cmpnop &= mask;
1029 : }
1030 2562 : n->range = rsize;
1031 : }
1032 :
1033 35711 : if (*cast64_to_32)
1034 37 : n->range = 8;
1035 35711 : n->range *= BITS_PER_UNIT;
1036 35711 : }
1037 :
1038 : /* Helper function for find_bswap_or_nop,
1039 : Return true if N is a swap or nop with MASK. */
1040 : static bool
1041 13178 : is_bswap_or_nop_p (uint64_t n, uint64_t cmpxchg,
1042 : uint64_t cmpnop, uint64_t* mask,
1043 : bool* bswap)
1044 : {
1045 13178 : *mask = ~(uint64_t) 0;
1046 13178 : if (n == cmpnop)
1047 5216 : *bswap = false;
1048 7962 : else if (n == cmpxchg)
1049 2276 : *bswap = true;
1050 : else
1051 : {
1052 : int set = 0;
1053 12922 : for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1054 12348 : if ((n & msk) == 0)
1055 4732 : *mask &= ~msk;
1056 7616 : else if ((n & msk) == (cmpxchg & msk))
1057 2504 : set++;
1058 : else
1059 : return false;
1060 :
1061 574 : if (set < 2)
1062 : return false;
1063 573 : *bswap = true;
1064 : }
1065 : return true;
1066 : }
1067 :
1068 :
1069 : /* Check if STMT completes a bswap implementation or a read in a given
1070 : endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
1071 : accordingly. It also sets N to represent the kind of operations
1072 : performed: size of the resulting expression and whether it works on
1073 : a memory source, and if so alias-set and vuse. At last, the
1074 : function returns a stmt whose rhs's first tree is the source
1075 : expression. */
1076 :
1077 : gimple *
1078 2089872 : find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
1079 : bool *cast64_to_32, uint64_t *mask, uint64_t* l_rotate)
1080 : {
1081 2089872 : tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
1082 2089872 : if (!tree_fits_uhwi_p (type_size))
1083 : return NULL;
1084 :
1085 : /* The last parameter determines the depth search limit. It usually
1086 : correlates directly to the number n of bytes to be touched. We
1087 : increase that number by 2 * (log2(n) + 1) here in order to also
1088 : cover signed -> unsigned conversions of the src operand as can be seen
1089 : in libgcc, and for initial shift/and operation of the src operand. */
1090 2089872 : int limit = tree_to_uhwi (type_size);
1091 2089872 : limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
1092 2089872 : gimple *ins_stmt = find_bswap_or_nop_2 (stmt, n, limit);
1093 2089872 : if (!ins_stmt)
1094 : return NULL;
1095 :
1096 11828 : uint64_t cmpxchg, cmpnop;
1097 11828 : uint64_t orig_range = n->range * BITS_PER_UNIT;
1098 11828 : find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
1099 :
1100 : /* A complete byte swap should make the symbolic number to start with
1101 : the largest digit in the highest order byte. Unchanged symbolic
1102 : number indicates a read with same endianness as target architecture. */
1103 11828 : *l_rotate = 0;
1104 11828 : uint64_t tmp_n = n->n;
1105 11828 : if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
1106 : {
1107 : /* Try bswap + lrotate. */
1108 : /* TODO, handle cast64_to_32 and big/litte_endian memory
1109 : source when rsize < range. */
1110 3885 : if (n->range == orig_range
1111 : /* There're case like 0x300000200 for uint32->uint64 cast,
1112 : Don't handle this. */
1113 2976 : && n->range == TYPE_PRECISION (n->type)
1114 1696 : && ((orig_range == 32
1115 576 : && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
1116 1120 : || (orig_range == 64
1117 1101 : && optab_handler (rotl_optab, DImode) != CODE_FOR_nothing))
1118 5562 : && (tmp_n & MARKER_MASK) < orig_range / BITS_PER_UNIT)
1119 : {
1120 1354 : uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER;
1121 1354 : uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER;
1122 : /* .i.e. handle 0x203040506070800 when lower byte is zero. */
1123 1354 : if (!count)
1124 : {
1125 60 : for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++)
1126 : {
1127 60 : count = (tmp_n >> i * BITS_PER_MARKER) & MARKER_MASK;
1128 60 : if (count)
1129 : {
1130 : /* Count should be meaningful not 0xff. */
1131 31 : if (count <= range / BITS_PER_MARKER)
1132 : {
1133 31 : count = (count + i) * BITS_PER_MARKER % range;
1134 31 : if (!count)
1135 : return NULL;
1136 : break;
1137 : }
1138 : else
1139 : return NULL;
1140 : }
1141 : }
1142 : }
1143 1350 : tmp_n = tmp_n >> count | tmp_n << (range - count);
1144 1350 : if (orig_range == 32)
1145 366 : tmp_n &= (1ULL << 32) - 1;
1146 1350 : if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
1147 : return NULL;
1148 122 : *l_rotate = count / BITS_PER_MARKER * BITS_PER_UNIT;
1149 122 : gcc_assert (*bswap);
1150 : }
1151 : else
1152 2531 : return NULL;
1153 : }
1154 :
1155 : /* Useless bit manipulation performed by code. */
1156 8065 : if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1157 : return NULL;
1158 :
1159 : return ins_stmt;
1160 : }
1161 :
1162 : const pass_data pass_data_optimize_bswap =
1163 : {
1164 : GIMPLE_PASS, /* type */
1165 : "bswap", /* name */
1166 : OPTGROUP_NONE, /* optinfo_flags */
1167 : TV_NONE, /* tv_id */
1168 : PROP_ssa, /* properties_required */
1169 : 0, /* properties_provided */
1170 : 0, /* properties_destroyed */
1171 : 0, /* todo_flags_start */
1172 : 0, /* todo_flags_finish */
1173 : };
1174 :
1175 : class pass_optimize_bswap : public gimple_opt_pass
1176 : {
1177 : public:
1178 298828 : pass_optimize_bswap (gcc::context *ctxt)
1179 597656 : : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1180 : {}
1181 :
1182 : /* opt_pass methods: */
1183 1039819 : bool gate (function *) final override
1184 : {
1185 1039819 : return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1186 : }
1187 :
1188 : unsigned int execute (function *) final override;
1189 :
1190 : }; // class pass_optimize_bswap
1191 :
1192 : /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1193 : VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1194 : first. */
1195 :
1196 : static tree
1197 2239 : bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1198 : bool before)
1199 : {
1200 2239 : gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1201 : || POINTER_TYPE_P (TREE_TYPE (val)));
1202 2239 : if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1203 : {
1204 1 : HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1205 1 : if (POINTER_TYPE_P (TREE_TYPE (val)))
1206 : {
1207 0 : gimple *g
1208 0 : = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1209 : NOP_EXPR, val);
1210 0 : if (before)
1211 0 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
1212 : else
1213 0 : gsi_insert_after (gsi, g, GSI_NEW_STMT);
1214 0 : val = gimple_assign_lhs (g);
1215 : }
1216 1 : tree itype = build_nonstandard_integer_type (prec, 1);
1217 1 : gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1218 1 : if (before)
1219 0 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
1220 : else
1221 1 : gsi_insert_after (gsi, g, GSI_NEW_STMT);
1222 1 : val = gimple_assign_lhs (g);
1223 : }
1224 2239 : return build1 (VIEW_CONVERT_EXPR, type, val);
1225 : }
1226 :
1227 : /* Perform the bswap optimization: replace the expression computed in the rhs
1228 : of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1229 : bswap, load or load + bswap expression.
1230 : Which of these alternatives replace the rhs is given by N->base_addr (non
1231 : null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1232 : load to perform are also given in N while the builtin bswap invoke is given
1233 : in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1234 : load statements involved to construct the rhs in gsi_stmt (GSI) and
1235 : N->range gives the size of the rhs expression for maintaining some
1236 : statistics.
1237 :
1238 : Note that if the replacement involve a load and if gsi_stmt (GSI) is
1239 : non-NULL, that stmt is moved just after INS_STMT to do the load with the
1240 : same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1241 :
1242 : tree
1243 6572 : bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1244 : tree bswap_type, tree load_type, struct symbolic_number *n,
1245 : bool bswap, uint64_t mask, uint64_t l_rotate)
1246 : {
1247 6572 : tree src, tmp, tgt = NULL_TREE;
1248 6572 : gimple *bswap_stmt, *mask_stmt = NULL, *rotl_stmt = NULL;
1249 6572 : tree_code conv_code = NOP_EXPR;
1250 :
1251 6572 : gimple *cur_stmt = gsi_stmt (gsi);
1252 6572 : src = n->src;
1253 6572 : if (cur_stmt)
1254 : {
1255 5738 : tgt = gimple_assign_lhs (cur_stmt);
1256 5738 : if ((gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1257 3530 : || gimple_assign_rhs_code (cur_stmt) == VEC_PACK_TRUNC_EXPR)
1258 2239 : && tgt
1259 7977 : && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1260 : conv_code = VIEW_CONVERT_EXPR;
1261 : }
1262 :
1263 : /* Need to load the value from memory first. */
1264 6572 : if (n->base_addr)
1265 : {
1266 5869 : gimple_stmt_iterator gsi_ins = gsi;
1267 5869 : if (ins_stmt)
1268 5796 : gsi_ins = gsi_for_stmt (ins_stmt);
1269 5869 : tree addr_expr, addr_tmp, val_expr, val_tmp;
1270 5869 : tree load_offset_ptr, aligned_load_type;
1271 5869 : gimple *load_stmt;
1272 5869 : unsigned align = get_object_alignment (src);
1273 5869 : poly_int64 load_offset = 0;
1274 :
1275 5869 : if (cur_stmt)
1276 : {
1277 5448 : basic_block ins_bb = gimple_bb (ins_stmt);
1278 5448 : basic_block cur_bb = gimple_bb (cur_stmt);
1279 5448 : if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1280 4114 : return NULL_TREE;
1281 :
1282 : /* Move cur_stmt just before one of the load of the original
1283 : to ensure it has the same VUSE. See PR61517 for what could
1284 : go wrong. */
1285 5448 : if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1286 1017 : reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1287 5448 : gsi_move_before (&gsi, &gsi_ins);
1288 5448 : gsi = gsi_for_stmt (cur_stmt);
1289 : }
1290 : else
1291 421 : gsi = gsi_ins;
1292 :
1293 : /* Compute address to load from and cast according to the size
1294 : of the load. */
1295 5869 : addr_expr = build_fold_addr_expr (src);
1296 5869 : if (is_gimple_mem_ref_addr (addr_expr))
1297 460 : addr_tmp = unshare_expr (addr_expr);
1298 : else
1299 : {
1300 5409 : addr_tmp = unshare_expr (n->base_addr);
1301 5409 : if (!is_gimple_mem_ref_addr (addr_tmp))
1302 0 : addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1303 : is_gimple_mem_ref_addr,
1304 : NULL_TREE, true,
1305 : GSI_SAME_STMT);
1306 5409 : load_offset = n->bytepos;
1307 5409 : if (n->offset)
1308 : {
1309 0 : tree off
1310 0 : = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1311 : true, NULL_TREE, true,
1312 : GSI_SAME_STMT);
1313 0 : gimple *stmt
1314 0 : = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1315 : POINTER_PLUS_EXPR, addr_tmp, off);
1316 0 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1317 0 : addr_tmp = gimple_assign_lhs (stmt);
1318 : }
1319 : }
1320 :
1321 : /* Perform the load. */
1322 5869 : aligned_load_type = load_type;
1323 5869 : if (align < TYPE_ALIGN (load_type))
1324 5128 : aligned_load_type = build_aligned_type (load_type, align);
1325 5869 : load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1326 5869 : val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1327 : load_offset_ptr);
1328 :
1329 5869 : if (!bswap)
1330 : {
1331 4114 : if (n->range == 16)
1332 1184 : nop_stats.found_16bit++;
1333 2930 : else if (n->range == 32)
1334 628 : nop_stats.found_32bit++;
1335 : else
1336 : {
1337 2302 : gcc_assert (n->range == 64);
1338 2302 : nop_stats.found_64bit++;
1339 : }
1340 :
1341 : /* Convert the result of load if necessary. */
1342 4114 : if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1343 : {
1344 3583 : val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1345 : "load_dst");
1346 3583 : load_stmt = gimple_build_assign (val_tmp, val_expr);
1347 3583 : gimple_set_vuse (load_stmt, n->vuse);
1348 3583 : gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1349 3583 : if (conv_code == VIEW_CONVERT_EXPR)
1350 2105 : val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1351 : true);
1352 3583 : gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1353 3583 : update_stmt (cur_stmt);
1354 : }
1355 531 : else if (cur_stmt)
1356 : {
1357 444 : gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1358 444 : gimple_set_vuse (cur_stmt, n->vuse);
1359 444 : update_stmt (cur_stmt);
1360 : }
1361 : else
1362 : {
1363 87 : tgt = make_ssa_name (load_type);
1364 87 : cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1365 87 : gimple_set_vuse (cur_stmt, n->vuse);
1366 87 : gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1367 : }
1368 :
1369 4114 : if (dump_file)
1370 : {
1371 31 : fprintf (dump_file,
1372 : "%d bit load in target endianness found at: ",
1373 31 : (int) n->range);
1374 31 : print_gimple_stmt (dump_file, cur_stmt, 0);
1375 : }
1376 4114 : return tgt;
1377 : }
1378 : else
1379 : {
1380 1755 : val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1381 1755 : load_stmt = gimple_build_assign (val_tmp, val_expr);
1382 1755 : gimple_set_vuse (load_stmt, n->vuse);
1383 1755 : gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1384 : }
1385 1755 : src = val_tmp;
1386 : }
1387 703 : else if (!bswap)
1388 : {
1389 249 : gimple *g = NULL;
1390 249 : if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1391 : {
1392 2 : if (!is_gimple_val (src))
1393 : return NULL_TREE;
1394 2 : if (conv_code == VIEW_CONVERT_EXPR)
1395 2 : src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1396 2 : g = gimple_build_assign (tgt, conv_code, src);
1397 : }
1398 247 : else if (cur_stmt)
1399 7 : g = gimple_build_assign (tgt, src);
1400 : else
1401 : tgt = src;
1402 249 : if (n->range == 16)
1403 67 : nop_stats.found_16bit++;
1404 182 : else if (n->range == 32)
1405 100 : nop_stats.found_32bit++;
1406 : else
1407 : {
1408 82 : gcc_assert (n->range == 64);
1409 82 : nop_stats.found_64bit++;
1410 : }
1411 249 : if (dump_file)
1412 : {
1413 1 : fprintf (dump_file,
1414 : "%d bit reshuffle in target endianness found at: ",
1415 : (int) n->range);
1416 1 : if (cur_stmt)
1417 0 : print_gimple_stmt (dump_file, cur_stmt, 0);
1418 : else
1419 : {
1420 1 : print_generic_expr (dump_file, tgt, TDF_NONE);
1421 1 : fprintf (dump_file, "\n");
1422 : }
1423 : }
1424 249 : if (cur_stmt)
1425 9 : gsi_replace (&gsi, g, true);
1426 249 : return tgt;
1427 : }
1428 454 : else if (TREE_CODE (src) == BIT_FIELD_REF)
1429 0 : src = TREE_OPERAND (src, 0);
1430 :
1431 2209 : if (n->range == 16)
1432 993 : bswap_stats.found_16bit++;
1433 1216 : else if (n->range == 32)
1434 805 : bswap_stats.found_32bit++;
1435 : else
1436 : {
1437 411 : gcc_assert (n->range == 64);
1438 411 : bswap_stats.found_64bit++;
1439 : }
1440 :
1441 2209 : tmp = src;
1442 :
1443 : /* Convert the src expression if necessary. */
1444 2209 : if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1445 : {
1446 166 : gimple *convert_stmt;
1447 :
1448 166 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1449 166 : convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1450 166 : gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1451 : }
1452 :
1453 : /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1454 : are considered as rotation of 2N bit values by N bits is generally not
1455 : equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1456 : gives 0x03040102 while a bswap for that value is 0x04030201. */
1457 2209 : if (bswap && n->range == 16)
1458 : {
1459 993 : tree count = build_int_cst (NULL, BITS_PER_UNIT);
1460 993 : src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1461 993 : bswap_stmt = gimple_build_assign (NULL, src);
1462 993 : }
1463 : else
1464 1216 : bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1465 :
1466 2209 : if (tgt == NULL_TREE)
1467 507 : tgt = make_ssa_name (bswap_type);
1468 2209 : tmp = tgt;
1469 :
1470 2209 : if (mask != ~(uint64_t) 0)
1471 : {
1472 507 : tree m = build_int_cst (bswap_type, mask);
1473 507 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1474 507 : gimple_set_lhs (bswap_stmt, tmp);
1475 507 : mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1476 507 : tmp = tgt;
1477 : }
1478 :
1479 2209 : if (l_rotate)
1480 : {
1481 122 : tree m = build_int_cst (bswap_type, l_rotate);
1482 177 : tmp = make_temp_ssa_name (bswap_type, NULL,
1483 : mask_stmt ? "bswapmaskdst" : "bswapdst");
1484 177 : gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1485 122 : rotl_stmt = gimple_build_assign (tgt, LROTATE_EXPR, tmp, m);
1486 122 : tmp = tgt;
1487 : }
1488 :
1489 : /* Convert the result if necessary. */
1490 2209 : if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1491 : {
1492 750 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1493 750 : tree atmp = tmp;
1494 750 : gimple_stmt_iterator gsi2 = gsi;
1495 750 : if (conv_code == VIEW_CONVERT_EXPR)
1496 132 : atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1497 750 : gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1498 750 : gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1499 : }
1500 :
1501 4296 : gimple_set_lhs (rotl_stmt ? rotl_stmt
1502 2087 : : mask_stmt ? mask_stmt : bswap_stmt, tmp);
1503 :
1504 2209 : if (dump_file)
1505 : {
1506 37 : fprintf (dump_file, "%d bit bswap implementation found at: ",
1507 37 : (int) n->range);
1508 37 : if (cur_stmt)
1509 28 : print_gimple_stmt (dump_file, cur_stmt, 0);
1510 : else
1511 : {
1512 9 : print_generic_expr (dump_file, tgt, TDF_NONE);
1513 9 : fprintf (dump_file, "\n");
1514 : }
1515 : }
1516 :
1517 2209 : if (cur_stmt)
1518 : {
1519 1702 : if (rotl_stmt)
1520 122 : gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
1521 1702 : if (mask_stmt)
1522 507 : gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1523 1702 : gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1524 1702 : gsi_remove (&gsi, true);
1525 : }
1526 : else
1527 : {
1528 507 : gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1529 507 : if (mask_stmt)
1530 0 : gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1531 507 : if (rotl_stmt)
1532 0 : gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
1533 : }
1534 : return tgt;
1535 : }
1536 :
1537 : /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR/VEC_PACK_TRUNC_EXPR
1538 : on the rhs using bswap optimizations. CDI_DOMINATORS need to be
1539 : computed on entry. Return true if it has been optimized and
1540 : TODO_update_ssa is needed. */
1541 :
1542 : static bool
1543 1026334 : maybe_optimize_vector_constructor (gimple *cur_stmt)
1544 : {
1545 1026334 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1546 1026334 : struct symbolic_number n;
1547 1026334 : bool bswap;
1548 :
1549 1026334 : gcc_assert (is_gimple_assign (cur_stmt));
1550 1026334 : switch (gimple_assign_rhs_code (cur_stmt))
1551 : {
1552 1026334 : case CONSTRUCTOR:
1553 1026334 : case VEC_PACK_TRUNC_EXPR:
1554 1026334 : break;
1555 0 : default:
1556 0 : gcc_unreachable ();
1557 : }
1558 :
1559 1026334 : tree rhs = gimple_assign_rhs1 (cur_stmt);
1560 1026334 : if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1561 88643 : || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1562 1111862 : || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1563 : return false;
1564 :
1565 85528 : HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1566 85528 : switch (sz)
1567 : {
1568 936 : case 16:
1569 936 : load_type = bswap_type = uint16_type_node;
1570 936 : break;
1571 710 : case 32:
1572 710 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1573 710 : && can_open_code_p (bswap_optab, SImode))
1574 : {
1575 702 : load_type = uint32_type_node;
1576 702 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1577 702 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1578 : }
1579 : else
1580 8 : return false;
1581 702 : break;
1582 21922 : case 64:
1583 21922 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1584 21922 : && can_open_code_p (bswap_optab, DImode))
1585 : {
1586 21032 : load_type = uint64_type_node;
1587 21032 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1588 21032 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1589 : }
1590 : else
1591 890 : return false;
1592 21032 : break;
1593 : default:
1594 : return false;
1595 : }
1596 :
1597 22670 : bool cast64_to_32;
1598 22670 : uint64_t mask, l_rotate;
1599 22670 : gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1600 : &cast64_to_32, &mask, &l_rotate);
1601 22670 : if (!ins_stmt
1602 2202 : || n.range != (unsigned HOST_WIDE_INT) sz
1603 2202 : || cast64_to_32
1604 2202 : || mask != ~(uint64_t) 0)
1605 : return false;
1606 :
1607 2202 : if (bswap && !fndecl && n.range != 16)
1608 : return false;
1609 :
1610 2202 : memset (&nop_stats, 0, sizeof (nop_stats));
1611 2202 : memset (&bswap_stats, 0, sizeof (bswap_stats));
1612 2202 : return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1613 : bswap_type, load_type, &n, bswap, mask,
1614 2202 : l_rotate) != NULL_TREE;
1615 : }
1616 :
1617 : /* Find manual byte swap implementations as well as load in a given
1618 : endianness. Byte swaps are turned into a bswap builtin invocation
1619 : while endian loads are converted to bswap builtin invocation or
1620 : simple load according to the target endianness. */
1621 :
1622 : unsigned int
1623 961473 : pass_optimize_bswap::execute (function *fun)
1624 : {
1625 961473 : basic_block bb;
1626 961473 : bool bswap32_p, bswap64_p;
1627 961473 : bool changed = false;
1628 961473 : tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1629 :
1630 961473 : bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1631 961473 : && can_open_code_p (bswap_optab, SImode));
1632 961473 : bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1633 961473 : && can_open_code_p (bswap_optab, DImode));
1634 :
1635 : /* Determine the argument type of the builtins. The code later on
1636 : assumes that the return and argument type are the same. */
1637 961473 : if (bswap32_p)
1638 : {
1639 861877 : tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1640 861877 : bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1641 : }
1642 :
1643 961473 : if (bswap64_p)
1644 : {
1645 861877 : tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1646 861877 : bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1647 : }
1648 :
1649 961473 : memset (&nop_stats, 0, sizeof (nop_stats));
1650 961473 : memset (&bswap_stats, 0, sizeof (bswap_stats));
1651 961473 : calculate_dominance_info (CDI_DOMINATORS);
1652 :
1653 9716204 : FOR_EACH_BB_FN (bb, fun)
1654 : {
1655 8754731 : gimple_stmt_iterator gsi;
1656 :
1657 : /* We do a reverse scan for bswap patterns to make sure we get the
1658 : widest match. As bswap pattern matching doesn't handle previously
1659 : inserted smaller bswap replacements as sub-patterns, the wider
1660 : variant wouldn't be detected. */
1661 96022890 : for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1662 : {
1663 78513428 : gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1664 78513428 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1665 78513428 : enum tree_code code;
1666 78513428 : struct symbolic_number n;
1667 78513428 : bool bswap, cast64_to_32;
1668 78513428 : uint64_t mask, l_rotate;
1669 :
1670 : /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1671 : might be moved to a different basic block by bswap_replace and gsi
1672 : must not points to it if that's the case. Moving the gsi_prev
1673 : there make sure that gsi points to the statement previous to
1674 : cur_stmt while still making sure that all statements are
1675 : considered in this basic block. */
1676 78513428 : gsi_prev (&gsi);
1677 :
1678 78513428 : if (!is_gimple_assign (cur_stmt))
1679 78509892 : continue;
1680 :
1681 20244260 : code = gimple_assign_rhs_code (cur_stmt);
1682 20244260 : switch (code)
1683 : {
1684 6600 : case LROTATE_EXPR:
1685 6600 : case RROTATE_EXPR:
1686 6600 : if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1687 6600 : || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1688 4064 : % BITS_PER_UNIT)
1689 5555 : continue;
1690 : /* Fall through. */
1691 : case BIT_IOR_EXPR:
1692 : case BIT_XOR_EXPR:
1693 : case PLUS_EXPR:
1694 : break;
1695 1257825 : case CONSTRUCTOR:
1696 1257825 : case VEC_PACK_TRUNC_EXPR:
1697 1257825 : {
1698 1257825 : tree rhs = gimple_assign_rhs1 (cur_stmt);
1699 1257825 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1700 1257825 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1701 : break;
1702 : }
1703 1253261 : continue;
1704 16918242 : default:
1705 16918242 : continue;
1706 18171503 : }
1707 :
1708 2067202 : ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1709 : &cast64_to_32, &mask, &l_rotate);
1710 :
1711 2067202 : if (!ins_stmt)
1712 2061339 : continue;
1713 :
1714 5863 : switch (n.range)
1715 : {
1716 2286 : case 16:
1717 : /* Already in canonical form, nothing to do. */
1718 2286 : if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1719 625 : continue;
1720 1661 : load_type = bswap_type = uint16_type_node;
1721 1661 : break;
1722 1280 : case 32:
1723 1280 : load_type = uint32_type_node;
1724 1280 : if (bswap32_p)
1725 : {
1726 1280 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1727 1280 : bswap_type = bswap32_type;
1728 : }
1729 : break;
1730 595 : case 64:
1731 595 : load_type = uint64_type_node;
1732 595 : if (bswap64_p)
1733 : {
1734 595 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1735 595 : bswap_type = bswap64_type;
1736 : }
1737 : break;
1738 1702 : default:
1739 1702 : continue;
1740 : }
1741 :
1742 3536 : if (bswap && !fndecl && n.range != 16)
1743 0 : continue;
1744 :
1745 3536 : if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1746 : bswap_type, load_type, &n, bswap, mask,
1747 : l_rotate))
1748 3536 : changed = true;
1749 : }
1750 : }
1751 :
1752 961473 : statistics_counter_event (fun, "16-bit nop implementations found",
1753 : nop_stats.found_16bit);
1754 961473 : statistics_counter_event (fun, "32-bit nop implementations found",
1755 : nop_stats.found_32bit);
1756 961473 : statistics_counter_event (fun, "64-bit nop implementations found",
1757 : nop_stats.found_64bit);
1758 961473 : statistics_counter_event (fun, "16-bit bswap implementations found",
1759 : bswap_stats.found_16bit);
1760 961473 : statistics_counter_event (fun, "32-bit bswap implementations found",
1761 : bswap_stats.found_32bit);
1762 961473 : statistics_counter_event (fun, "64-bit bswap implementations found",
1763 : bswap_stats.found_64bit);
1764 :
1765 961473 : return (changed ? TODO_update_ssa : 0);
1766 : }
1767 :
1768 : } // anon namespace
1769 :
1770 : gimple_opt_pass *
1771 298828 : make_pass_optimize_bswap (gcc::context *ctxt)
1772 : {
1773 298828 : return new pass_optimize_bswap (ctxt);
1774 : }
1775 :
1776 : namespace {
1777 :
1778 : /* Struct recording one operand for the store, which is either a constant,
1779 : then VAL represents the constant and all the other fields are zero, or
1780 : a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1781 : and the other fields also reflect the memory load, or an SSA name, then
1782 : VAL represents the SSA name and all the other fields are zero. */
1783 :
1784 : class store_operand_info
1785 : {
1786 : public:
1787 : tree val;
1788 : tree base_addr;
1789 : poly_uint64 bitsize;
1790 : poly_uint64 bitpos;
1791 : poly_uint64 bitregion_start;
1792 : poly_uint64 bitregion_end;
1793 : gimple *stmt;
1794 : bool bit_not_p;
1795 : store_operand_info ();
1796 : };
1797 :
1798 10110894 : store_operand_info::store_operand_info ()
1799 10110894 : : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1800 10110894 : bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1801 : {
1802 0 : }
1803 :
1804 : /* Struct recording the information about a single store of an immediate
1805 : to memory. These are created in the first phase and coalesced into
1806 : merged_store_group objects in the second phase. */
1807 :
1808 : class store_immediate_info
1809 : {
1810 : public:
1811 : unsigned HOST_WIDE_INT bitsize;
1812 : unsigned HOST_WIDE_INT bitpos;
1813 : unsigned HOST_WIDE_INT bitregion_start;
1814 : /* This is one past the last bit of the bit region. */
1815 : unsigned HOST_WIDE_INT bitregion_end;
1816 : gimple *stmt;
1817 : unsigned int order;
1818 : /* INTEGER_CST for constant store, STRING_CST for string store,
1819 : MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1820 : BIT_INSERT_EXPR for bit insertion.
1821 : LROTATE_EXPR if it can be only bswap optimized and
1822 : ops are not really meaningful.
1823 : NOP_EXPR if bswap optimization detected identity, ops
1824 : are not meaningful. */
1825 : enum tree_code rhs_code;
1826 : /* Two fields for bswap optimization purposes. */
1827 : struct symbolic_number n;
1828 : gimple *ins_stmt;
1829 : /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1830 : bool bit_not_p;
1831 : /* True if ops have been swapped and thus ops[1] represents
1832 : rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1833 : bool ops_swapped_p;
1834 : /* The index number of the landing pad, or 0 if there is none. */
1835 : int lp_nr;
1836 : /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1837 : just the first one. */
1838 : store_operand_info ops[2];
1839 : store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1840 : unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1841 : gimple *, unsigned int, enum tree_code,
1842 : struct symbolic_number &, gimple *, bool, int,
1843 : const store_operand_info &,
1844 : const store_operand_info &);
1845 : };
1846 :
1847 3038027 : store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1848 : unsigned HOST_WIDE_INT bp,
1849 : unsigned HOST_WIDE_INT brs,
1850 : unsigned HOST_WIDE_INT bre,
1851 : gimple *st,
1852 : unsigned int ord,
1853 : enum tree_code rhscode,
1854 : struct symbolic_number &nr,
1855 : gimple *ins_stmtp,
1856 : bool bitnotp,
1857 : int nr2,
1858 : const store_operand_info &op0r,
1859 3038027 : const store_operand_info &op1r)
1860 3038027 : : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1861 3038027 : stmt (st), order (ord), rhs_code (rhscode), n (nr),
1862 3038027 : ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1863 3038027 : lp_nr (nr2), ops { op0r, op1r }
1864 : {
1865 0 : }
1866 :
1867 : /* Struct representing a group of stores to contiguous memory locations.
1868 : These are produced by the second phase (coalescing) and consumed in the
1869 : third phase that outputs the widened stores. */
1870 :
1871 : class merged_store_group
1872 : {
1873 : public:
1874 : unsigned HOST_WIDE_INT start;
1875 : unsigned HOST_WIDE_INT width;
1876 : unsigned HOST_WIDE_INT bitregion_start;
1877 : unsigned HOST_WIDE_INT bitregion_end;
1878 : /* The size of the allocated memory for val and mask. */
1879 : unsigned HOST_WIDE_INT buf_size;
1880 : unsigned HOST_WIDE_INT align_base;
1881 : poly_uint64 load_align_base[2];
1882 :
1883 : unsigned int align;
1884 : unsigned int load_align[2];
1885 : unsigned int first_order;
1886 : unsigned int last_order;
1887 : bool bit_insertion;
1888 : bool string_concatenation;
1889 : bool only_constants;
1890 : bool consecutive;
1891 : unsigned int first_nonmergeable_order;
1892 : int lp_nr;
1893 :
1894 : auto_vec<store_immediate_info *> stores;
1895 : /* We record the first and last original statements in the sequence because
1896 : we'll need their vuse/vdef and replacement position. It's easier to keep
1897 : track of them separately as 'stores' is reordered by apply_stores. */
1898 : gimple *last_stmt;
1899 : gimple *first_stmt;
1900 : unsigned char *val;
1901 : unsigned char *mask;
1902 :
1903 : merged_store_group (store_immediate_info *);
1904 : ~merged_store_group ();
1905 : bool can_be_merged_into (store_immediate_info *);
1906 : void merge_into (store_immediate_info *);
1907 : void merge_overlapping (store_immediate_info *);
1908 : bool apply_stores ();
1909 : private:
1910 : void do_merge (store_immediate_info *);
1911 : };
1912 :
1913 : /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1914 :
1915 : static void
1916 444 : dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1917 : {
1918 444 : if (!fd)
1919 : return;
1920 :
1921 22044 : for (unsigned int i = 0; i < len; i++)
1922 21600 : fprintf (fd, "%02x ", ptr[i]);
1923 444 : fprintf (fd, "\n");
1924 : }
1925 :
1926 : /* Clear out LEN bits starting from bit START in the byte array
1927 : PTR. This clears the bits to the *right* from START.
1928 : START must be within [0, BITS_PER_UNIT) and counts starting from
1929 : the least significant bit. */
1930 :
1931 : static void
1932 12 : clear_bit_region_be (unsigned char *ptr, unsigned int start,
1933 : unsigned int len)
1934 : {
1935 20 : if (len == 0)
1936 : return;
1937 : /* Clear len bits to the right of start. */
1938 20 : else if (len <= start + 1)
1939 : {
1940 8 : unsigned char mask = (~(~0U << len));
1941 8 : mask = mask << (start + 1U - len);
1942 8 : ptr[0] &= ~mask;
1943 : }
1944 12 : else if (start != BITS_PER_UNIT - 1)
1945 : {
1946 4 : clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1947 4 : clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1948 4 : len - (start % BITS_PER_UNIT) - 1);
1949 : }
1950 8 : else if (start == BITS_PER_UNIT - 1
1951 : && len > BITS_PER_UNIT)
1952 : {
1953 8 : unsigned int nbytes = len / BITS_PER_UNIT;
1954 8 : memset (ptr, 0, nbytes);
1955 8 : if (len % BITS_PER_UNIT != 0)
1956 4 : clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1957 : len % BITS_PER_UNIT);
1958 : }
1959 : else
1960 0 : gcc_unreachable ();
1961 : }
1962 :
1963 : /* In the byte array PTR clear the bit region starting at bit
1964 : START and is LEN bits wide.
1965 : For regions spanning multiple bytes do this recursively until we reach
1966 : zero LEN or a region contained within a single byte. */
1967 :
1968 : static void
1969 1276304 : clear_bit_region (unsigned char *ptr, unsigned int start,
1970 : unsigned int len)
1971 : {
1972 : /* Degenerate base case. */
1973 1368530 : if (len == 0)
1974 : return;
1975 1368530 : else if (start >= BITS_PER_UNIT)
1976 46196 : clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1977 : /* Second base case. */
1978 1322334 : else if ((start + len) <= BITS_PER_UNIT)
1979 : {
1980 198750 : unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1981 198750 : mask >>= BITS_PER_UNIT - (start + len);
1982 :
1983 198750 : ptr[0] &= ~mask;
1984 :
1985 198750 : return;
1986 : }
1987 : /* Clear most significant bits in a byte and proceed with the next byte. */
1988 1123584 : else if (start != 0)
1989 : {
1990 43214 : clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1991 43214 : clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1992 : }
1993 : /* Whole bytes need to be cleared. */
1994 1080370 : else if (start == 0 && len > BITS_PER_UNIT)
1995 : {
1996 1080370 : unsigned int nbytes = len / BITS_PER_UNIT;
1997 : /* We could recurse on each byte but we clear whole bytes, so a simple
1998 : memset will do. */
1999 1080370 : memset (ptr, '\0', nbytes);
2000 : /* Clear the remaining sub-byte region if there is one. */
2001 1080370 : if (len % BITS_PER_UNIT != 0)
2002 2816 : clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
2003 : }
2004 : else
2005 0 : gcc_unreachable ();
2006 : }
2007 :
2008 : /* Write BITLEN bits of EXPR to the byte array PTR at
2009 : bit position BITPOS. PTR should contain TOTAL_BYTES elements.
2010 : Return true if the operation succeeded. */
2011 :
2012 : static bool
2013 999294 : encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
2014 : unsigned int total_bytes)
2015 : {
2016 999294 : unsigned int first_byte = bitpos / BITS_PER_UNIT;
2017 999294 : bool empty_ctor_p
2018 999294 : = (TREE_CODE (expr) == CONSTRUCTOR
2019 216949 : && CONSTRUCTOR_NELTS (expr) == 0
2020 216949 : && TYPE_SIZE_UNIT (TREE_TYPE (expr))
2021 1216243 : && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
2022 999294 : bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
2023 974635 : || (bitpos % BITS_PER_UNIT)
2024 1973389 : || (!int_mode_for_size (bitlen, 0).exists ()
2025 61175 : && !empty_ctor_p));
2026 :
2027 973933 : if (!sub_byte_op_p)
2028 : {
2029 973933 : if (first_byte >= total_bytes)
2030 : return false;
2031 973933 : total_bytes -= first_byte;
2032 973933 : if (empty_ctor_p)
2033 : {
2034 216949 : unsigned HOST_WIDE_INT rhs_bytes
2035 216949 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
2036 216949 : if (rhs_bytes > total_bytes)
2037 : return false;
2038 216949 : memset (ptr + first_byte, '\0', rhs_bytes);
2039 216949 : return true;
2040 : }
2041 756984 : return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
2042 : }
2043 :
2044 : /* LITTLE-ENDIAN
2045 : We are writing a non byte-sized quantity or at a position that is not
2046 : at a byte boundary.
2047 : |--------|--------|--------| ptr + first_byte
2048 : ^ ^
2049 : xxx xxxxxxxx xxx< bp>
2050 : |______EXPR____|
2051 :
2052 : First native_encode_expr EXPR into a temporary buffer and shift each
2053 : byte in the buffer by 'bp' (carrying the bits over as necessary).
2054 : |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
2055 : <------bitlen---->< bp>
2056 : Then we clear the destination bits:
2057 : |---00000|00000000|000-----| ptr + first_byte
2058 : <-------bitlen--->< bp>
2059 :
2060 : Finally we ORR the bytes of the shifted EXPR into the cleared region:
2061 : |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
2062 :
2063 : BIG-ENDIAN
2064 : We are writing a non byte-sized quantity or at a position that is not
2065 : at a byte boundary.
2066 : ptr + first_byte |--------|--------|--------|
2067 : ^ ^
2068 : <bp >xxx xxxxxxxx xxx
2069 : |_____EXPR_____|
2070 :
2071 : First native_encode_expr EXPR into a temporary buffer and shift each
2072 : byte in the buffer to the right by (carrying the bits over as necessary).
2073 : We shift by as much as needed to align the most significant bit of EXPR
2074 : with bitpos:
2075 : |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
2076 : <---bitlen----> <bp ><-----bitlen----->
2077 : Then we clear the destination bits:
2078 : ptr + first_byte |-----000||00000000||00000---|
2079 : <bp ><-------bitlen----->
2080 :
2081 : Finally we ORR the bytes of the shifted EXPR into the cleared region:
2082 : ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
2083 : The awkwardness comes from the fact that bitpos is counted from the
2084 : most significant bit of a byte. */
2085 :
2086 : /* We must be dealing with fixed-size data at this point, since the
2087 : total size is also fixed. */
2088 25361 : unsigned int byte_size;
2089 25361 : if (empty_ctor_p)
2090 : {
2091 0 : unsigned HOST_WIDE_INT rhs_bytes
2092 0 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
2093 0 : if (rhs_bytes > total_bytes)
2094 : return false;
2095 0 : byte_size = rhs_bytes;
2096 : }
2097 : else
2098 : {
2099 25361 : fixed_size_mode mode
2100 25361 : = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
2101 25361 : byte_size
2102 25361 : = mode == BLKmode
2103 68 : ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
2104 25293 : : GET_MODE_SIZE (mode);
2105 : }
2106 : /* Allocate an extra byte so that we have space to shift into. */
2107 25361 : byte_size++;
2108 25361 : unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
2109 25361 : memset (tmpbuf, '\0', byte_size);
2110 : /* The store detection code should only have allowed constants that are
2111 : accepted by native_encode_expr or empty ctors. */
2112 25361 : if (!empty_ctor_p
2113 25361 : && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
2114 0 : gcc_unreachable ();
2115 :
2116 : /* The native_encode_expr machinery uses TYPE_MODE to determine how many
2117 : bytes to write. This means it can write more than
2118 : ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
2119 : write 8 bytes for a bitlen of 40). Skip the bytes that are not within
2120 : bitlen and zero out the bits that are not relevant as well (that may
2121 : contain a sign bit due to sign-extension). */
2122 25361 : unsigned int padding
2123 25361 : = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
2124 : /* On big-endian the padding is at the 'front' so just skip the initial
2125 : bytes. */
2126 25361 : if (BYTES_BIG_ENDIAN)
2127 : tmpbuf += padding;
2128 :
2129 25361 : byte_size -= padding;
2130 :
2131 25361 : if (bitlen % BITS_PER_UNIT != 0)
2132 : {
2133 24659 : if (BYTES_BIG_ENDIAN)
2134 : clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
2135 : BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
2136 : else
2137 24659 : clear_bit_region (tmpbuf, bitlen,
2138 24659 : byte_size * BITS_PER_UNIT - bitlen);
2139 : }
2140 : /* Left shifting relies on the last byte being clear if bitlen is
2141 : a multiple of BITS_PER_UNIT, which might not be clear if
2142 : there are padding bytes. */
2143 702 : else if (!BYTES_BIG_ENDIAN)
2144 702 : tmpbuf[byte_size - 1] = '\0';
2145 :
2146 : /* Clear the bit region in PTR where the bits from TMPBUF will be
2147 : inserted into. */
2148 25361 : if (BYTES_BIG_ENDIAN)
2149 : clear_bit_region_be (ptr + first_byte,
2150 : BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
2151 : else
2152 25361 : clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
2153 :
2154 25361 : int shift_amnt;
2155 25361 : int bitlen_mod = bitlen % BITS_PER_UNIT;
2156 25361 : int bitpos_mod = bitpos % BITS_PER_UNIT;
2157 :
2158 25361 : bool skip_byte = false;
2159 25361 : if (BYTES_BIG_ENDIAN)
2160 : {
2161 : /* BITPOS and BITLEN are exactly aligned and no shifting
2162 : is necessary. */
2163 : if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
2164 : || (bitpos_mod == 0 && bitlen_mod == 0))
2165 : shift_amnt = 0;
2166 : /* |. . . . . . . .|
2167 : <bp > <blen >.
2168 : We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2169 : of the value until it aligns with 'bp' in the next byte over. */
2170 : else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2171 : {
2172 : shift_amnt = bitlen_mod + bitpos_mod;
2173 : skip_byte = bitlen_mod != 0;
2174 : }
2175 : /* |. . . . . . . .|
2176 : <----bp--->
2177 : <---blen---->.
2178 : Shift the value right within the same byte so it aligns with 'bp'. */
2179 : else
2180 : shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2181 : }
2182 : else
2183 25361 : shift_amnt = bitpos % BITS_PER_UNIT;
2184 :
2185 : /* Create the shifted version of EXPR. */
2186 25361 : if (!BYTES_BIG_ENDIAN)
2187 : {
2188 25361 : shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2189 25361 : if (shift_amnt == 0)
2190 11407 : byte_size--;
2191 : }
2192 : else
2193 : {
2194 : gcc_assert (BYTES_BIG_ENDIAN);
2195 : shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2196 : /* If shifting right forced us to move into the next byte skip the now
2197 : empty byte. */
2198 : if (skip_byte)
2199 : {
2200 : tmpbuf++;
2201 : byte_size--;
2202 : }
2203 : }
2204 :
2205 : /* Insert the bits from TMPBUF. */
2206 111651 : for (unsigned int i = 0; i < byte_size; i++)
2207 86290 : ptr[first_byte + i] |= tmpbuf[i];
2208 :
2209 : return true;
2210 : }
2211 :
2212 : /* Sorting function for store_immediate_info objects.
2213 : Sorts them by bitposition. */
2214 :
2215 : static int
2216 17575872 : sort_by_bitpos (const void *x, const void *y)
2217 : {
2218 17575872 : store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2219 17575872 : store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2220 :
2221 17575872 : if ((*tmp)->bitpos < (*tmp2)->bitpos)
2222 : return -1;
2223 9003939 : else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2224 : return 1;
2225 : else
2226 : /* If they are the same let's use the order which is guaranteed to
2227 : be different. */
2228 739721 : return (*tmp)->order - (*tmp2)->order;
2229 : }
2230 :
2231 : /* Sorting function for store_immediate_info objects.
2232 : Sorts them by the order field. */
2233 :
2234 : static int
2235 6737758 : sort_by_order (const void *x, const void *y)
2236 : {
2237 6737758 : store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2238 6737758 : store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2239 :
2240 6737758 : if ((*tmp)->order < (*tmp2)->order)
2241 : return -1;
2242 3257998 : else if ((*tmp)->order > (*tmp2)->order)
2243 : return 1;
2244 :
2245 0 : gcc_unreachable ();
2246 : }
2247 :
2248 : /* Initialize a merged_store_group object from a store_immediate_info
2249 : object. */
2250 :
2251 833711 : merged_store_group::merged_store_group (store_immediate_info *info)
2252 : {
2253 833711 : start = info->bitpos;
2254 833711 : width = info->bitsize;
2255 833711 : bitregion_start = info->bitregion_start;
2256 833711 : bitregion_end = info->bitregion_end;
2257 : /* VAL has memory allocated for it in apply_stores once the group
2258 : width has been finalized. */
2259 833711 : val = NULL;
2260 833711 : mask = NULL;
2261 833711 : bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2262 833711 : string_concatenation = info->rhs_code == STRING_CST;
2263 833711 : only_constants = info->rhs_code == INTEGER_CST;
2264 833711 : consecutive = true;
2265 833711 : first_nonmergeable_order = ~0U;
2266 833711 : lp_nr = info->lp_nr;
2267 833711 : unsigned HOST_WIDE_INT align_bitpos = 0;
2268 833711 : get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2269 : &align, &align_bitpos);
2270 833711 : align_base = start - align_bitpos;
2271 2501133 : for (int i = 0; i < 2; ++i)
2272 : {
2273 1667422 : store_operand_info &op = info->ops[i];
2274 1667422 : if (op.base_addr == NULL_TREE)
2275 : {
2276 1478330 : load_align[i] = 0;
2277 1478330 : load_align_base[i] = 0;
2278 : }
2279 : else
2280 : {
2281 189092 : get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2282 189092 : load_align_base[i] = op.bitpos - align_bitpos;
2283 : }
2284 : }
2285 833711 : stores.create (1);
2286 833711 : stores.safe_push (info);
2287 833711 : last_stmt = info->stmt;
2288 833711 : last_order = info->order;
2289 833711 : first_stmt = last_stmt;
2290 833711 : first_order = last_order;
2291 833711 : buf_size = 0;
2292 833711 : }
2293 :
2294 833711 : merged_store_group::~merged_store_group ()
2295 : {
2296 833711 : if (val)
2297 396452 : XDELETEVEC (val);
2298 833711 : }
2299 :
2300 : /* Return true if the store described by INFO can be merged into the group. */
2301 :
2302 : bool
2303 690124 : merged_store_group::can_be_merged_into (store_immediate_info *info)
2304 : {
2305 : /* Do not merge bswap patterns. */
2306 690124 : if (info->rhs_code == LROTATE_EXPR)
2307 : return false;
2308 :
2309 676298 : if (info->lp_nr != lp_nr)
2310 : return false;
2311 :
2312 : /* The canonical case. */
2313 676298 : if (info->rhs_code == stores[0]->rhs_code)
2314 : return true;
2315 :
2316 : /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2317 54678 : if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2318 2026 : return !string_concatenation;
2319 :
2320 52652 : if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2321 3182 : return !string_concatenation;
2322 :
2323 : /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2324 : only for small regions since this can generate a lot of instructions. */
2325 49470 : if (info->rhs_code == MEM_REF
2326 21596 : && (stores[0]->rhs_code == INTEGER_CST
2327 2773 : || stores[0]->rhs_code == BIT_INSERT_EXPR)
2328 19300 : && info->bitregion_start == stores[0]->bitregion_start
2329 768 : && info->bitregion_end == stores[0]->bitregion_end
2330 51006 : && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2331 766 : return !string_concatenation;
2332 :
2333 48704 : if (stores[0]->rhs_code == MEM_REF
2334 22305 : && (info->rhs_code == INTEGER_CST
2335 22305 : || info->rhs_code == BIT_INSERT_EXPR)
2336 22026 : && info->bitregion_start == stores[0]->bitregion_start
2337 4 : && info->bitregion_end == stores[0]->bitregion_end
2338 48708 : && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2339 2 : return !string_concatenation;
2340 :
2341 : /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2342 48702 : if (info->rhs_code == STRING_CST
2343 77 : && stores[0]->rhs_code == INTEGER_CST
2344 48779 : && stores[0]->bitsize == CHAR_BIT)
2345 66 : return !bit_insertion;
2346 :
2347 48636 : if (stores[0]->rhs_code == STRING_CST
2348 27 : && info->rhs_code == INTEGER_CST
2349 48663 : && info->bitsize == CHAR_BIT)
2350 0 : return !bit_insertion;
2351 :
2352 : return false;
2353 : }
2354 :
2355 : /* Helper method for merge_into and merge_overlapping to do
2356 : the common part. */
2357 :
2358 : void
2359 788791 : merged_store_group::do_merge (store_immediate_info *info)
2360 : {
2361 788791 : bitregion_start = MIN (bitregion_start, info->bitregion_start);
2362 788791 : bitregion_end = MAX (bitregion_end, info->bitregion_end);
2363 :
2364 788791 : unsigned int this_align;
2365 788791 : unsigned HOST_WIDE_INT align_bitpos = 0;
2366 788791 : get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2367 : &this_align, &align_bitpos);
2368 788791 : if (this_align > align)
2369 : {
2370 1116 : align = this_align;
2371 1116 : align_base = info->bitpos - align_bitpos;
2372 : }
2373 2366373 : for (int i = 0; i < 2; ++i)
2374 : {
2375 1577582 : store_operand_info &op = info->ops[i];
2376 1577582 : if (!op.base_addr)
2377 1475256 : continue;
2378 :
2379 102326 : get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2380 102326 : if (this_align > load_align[i])
2381 : {
2382 132 : load_align[i] = this_align;
2383 132 : load_align_base[i] = op.bitpos - align_bitpos;
2384 : }
2385 : }
2386 :
2387 788791 : gimple *stmt = info->stmt;
2388 788791 : stores.safe_push (info);
2389 788791 : if (info->order > last_order)
2390 : {
2391 527282 : last_order = info->order;
2392 527282 : last_stmt = stmt;
2393 : }
2394 261509 : else if (info->order < first_order)
2395 : {
2396 110598 : first_order = info->order;
2397 110598 : first_stmt = stmt;
2398 : }
2399 :
2400 788791 : if (info->bitpos != start + width)
2401 184835 : consecutive = false;
2402 :
2403 : /* We need to use extraction if there is any bit-field. */
2404 788791 : if (info->rhs_code == BIT_INSERT_EXPR)
2405 : {
2406 6654 : bit_insertion = true;
2407 6654 : gcc_assert (!string_concatenation);
2408 : }
2409 :
2410 : /* We want to use concatenation if there is any string. */
2411 788791 : if (info->rhs_code == STRING_CST)
2412 : {
2413 369 : string_concatenation = true;
2414 369 : gcc_assert (!bit_insertion);
2415 : }
2416 :
2417 : /* But we cannot use it if we don't have consecutive stores. */
2418 788791 : if (!consecutive)
2419 297210 : string_concatenation = false;
2420 :
2421 788791 : if (info->rhs_code != INTEGER_CST)
2422 110202 : only_constants = false;
2423 788791 : }
2424 :
2425 : /* Merge a store recorded by INFO into this merged store.
2426 : The store is not overlapping with the existing recorded
2427 : stores. */
2428 :
2429 : void
2430 113627 : merged_store_group::merge_into (store_immediate_info *info)
2431 : {
2432 113627 : do_merge (info);
2433 :
2434 : /* Make sure we're inserting in the position we think we're inserting. */
2435 113627 : gcc_assert (info->bitpos >= start + width
2436 : && info->bitregion_start <= bitregion_end);
2437 :
2438 113627 : width = info->bitpos + info->bitsize - start;
2439 113627 : }
2440 :
2441 : /* Merge a store described by INFO into this merged store.
2442 : INFO overlaps in some way with the current store (i.e. it's not contiguous
2443 : which is handled by merged_store_group::merge_into). */
2444 :
2445 : void
2446 675164 : merged_store_group::merge_overlapping (store_immediate_info *info)
2447 : {
2448 675164 : do_merge (info);
2449 :
2450 : /* If the store extends the size of the group, extend the width. */
2451 675164 : if (info->bitpos + info->bitsize > start + width)
2452 494501 : width = info->bitpos + info->bitsize - start;
2453 675164 : }
2454 :
2455 : /* Go through all the recorded stores in this group in program order and
2456 : apply their values to the VAL byte array to create the final merged
2457 : value. Return true if the operation succeeded. */
2458 :
2459 : bool
2460 832877 : merged_store_group::apply_stores ()
2461 : {
2462 832877 : store_immediate_info *info;
2463 832877 : unsigned int i;
2464 :
2465 : /* Make sure we have more than one store in the group, otherwise we cannot
2466 : merge anything. */
2467 832877 : if (bitregion_start % BITS_PER_UNIT != 0
2468 832877 : || bitregion_end % BITS_PER_UNIT != 0
2469 1665754 : || stores.length () == 1)
2470 : return false;
2471 :
2472 396452 : buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2473 :
2474 : /* Really do string concatenation for large strings only. */
2475 396452 : if (buf_size <= MOVE_MAX)
2476 194071 : string_concatenation = false;
2477 :
2478 : /* String concatenation only works for byte aligned start and end. */
2479 396452 : if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2480 3179 : string_concatenation = false;
2481 :
2482 : /* Create a power-of-2-sized buffer for native_encode_expr. */
2483 396452 : if (!string_concatenation)
2484 792832 : buf_size = 1 << ceil_log2 (buf_size);
2485 :
2486 396452 : val = XNEWVEC (unsigned char, 2 * buf_size);
2487 396452 : mask = val + buf_size;
2488 396452 : memset (val, 0, buf_size);
2489 396452 : memset (mask, ~0U, buf_size);
2490 :
2491 396452 : stores.qsort (sort_by_order);
2492 :
2493 1579485 : FOR_EACH_VEC_ELT (stores, i, info)
2494 : {
2495 1183033 : unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2496 1183033 : tree cst;
2497 1183033 : if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2498 : cst = info->ops[0].val;
2499 174825 : else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2500 : cst = info->ops[1].val;
2501 : else
2502 : cst = NULL_TREE;
2503 1008523 : bool ret = true;
2504 1008523 : if (cst && info->rhs_code != BIT_INSERT_EXPR)
2505 999294 : ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2506 999294 : buf_size);
2507 1183033 : unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2508 1183033 : if (BYTES_BIG_ENDIAN)
2509 : clear_bit_region_be (m, (BITS_PER_UNIT - 1
2510 : - (pos_in_buffer % BITS_PER_UNIT)),
2511 : info->bitsize);
2512 : else
2513 1183033 : clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2514 1183033 : if (cst && dump_file && (dump_flags & TDF_DETAILS))
2515 : {
2516 222 : if (ret)
2517 : {
2518 222 : fputs ("After writing ", dump_file);
2519 222 : print_generic_expr (dump_file, cst, TDF_NONE);
2520 222 : fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2521 : " at position %d\n", info->bitsize, pos_in_buffer);
2522 222 : fputs (" the merged value contains ", dump_file);
2523 222 : dump_char_array (dump_file, val, buf_size);
2524 222 : fputs (" the merged mask contains ", dump_file);
2525 222 : dump_char_array (dump_file, mask, buf_size);
2526 222 : if (bit_insertion)
2527 0 : fputs (" bit insertion is required\n", dump_file);
2528 222 : if (string_concatenation)
2529 0 : fputs (" string concatenation is required\n", dump_file);
2530 : }
2531 : else
2532 0 : fprintf (dump_file, "Failed to merge stores\n");
2533 : }
2534 1183033 : if (!ret)
2535 : return false;
2536 : }
2537 396452 : stores.qsort (sort_by_bitpos);
2538 : return true;
2539 : }
2540 :
2541 : /* Structure describing the store chain. */
2542 :
2543 : class imm_store_chain_info
2544 : {
2545 : public:
2546 : /* Doubly-linked list that imposes an order on chain processing.
2547 : PNXP (prev's next pointer) points to the head of a list, or to
2548 : the next field in the previous chain in the list.
2549 : See pass_store_merging::m_stores_head for more rationale. */
2550 : imm_store_chain_info *next, **pnxp;
2551 : tree base_addr;
2552 : auto_vec<store_immediate_info *> m_store_info;
2553 : auto_vec<merged_store_group *> m_merged_store_groups;
2554 :
2555 1907401 : imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2556 1907401 : : next (inspt), pnxp (&inspt), base_addr (b_a)
2557 : {
2558 1907401 : inspt = this;
2559 1907401 : if (next)
2560 : {
2561 641517 : gcc_checking_assert (pnxp == next->pnxp);
2562 641517 : next->pnxp = &next;
2563 : }
2564 1907401 : }
2565 1907401 : ~imm_store_chain_info ()
2566 : {
2567 1907401 : *pnxp = next;
2568 1907401 : if (next)
2569 : {
2570 616748 : gcc_checking_assert (&next == next->pnxp);
2571 616748 : next->pnxp = pnxp;
2572 : }
2573 1907401 : }
2574 : bool terminate_and_process_chain ();
2575 : bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2576 : unsigned int);
2577 : bool coalesce_immediate_stores ();
2578 : bool output_merged_store (merged_store_group *);
2579 : bool output_merged_stores ();
2580 : };
2581 :
2582 : const pass_data pass_data_tree_store_merging = {
2583 : GIMPLE_PASS, /* type */
2584 : "store-merging", /* name */
2585 : OPTGROUP_NONE, /* optinfo_flags */
2586 : TV_GIMPLE_STORE_MERGING, /* tv_id */
2587 : PROP_ssa, /* properties_required */
2588 : 0, /* properties_provided */
2589 : 0, /* properties_destroyed */
2590 : 0, /* todo_flags_start */
2591 : TODO_update_ssa, /* todo_flags_finish */
2592 : };
2593 :
2594 : class pass_store_merging : public gimple_opt_pass
2595 : {
2596 : public:
2597 298828 : pass_store_merging (gcc::context *ctxt)
2598 597656 : : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2599 298828 : m_n_chains (0), m_n_stores (0)
2600 : {
2601 298828 : }
2602 :
2603 : /* Pass not supported for PDP-endian, nor for insane hosts or
2604 : target character sizes where native_{encode,interpret}_expr
2605 : doesn't work properly. */
2606 : bool
2607 1039819 : gate (function *) final override
2608 : {
2609 1039819 : return flag_store_merging
2610 : && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2611 : && CHAR_BIT == 8
2612 1039819 : && BITS_PER_UNIT == 8;
2613 : }
2614 :
2615 : unsigned int execute (function *) final override;
2616 :
2617 : private:
2618 : hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2619 :
2620 : /* Form a doubly-linked stack of the elements of m_stores, so that
2621 : we can iterate over them in a predictable way. Using this order
2622 : avoids extraneous differences in the compiler output just because
2623 : of tree pointer variations (e.g. different chains end up in
2624 : different positions of m_stores, so they are handled in different
2625 : orders, so they allocate or release SSA names in different
2626 : orders, and when they get reused, subsequent passes end up
2627 : getting different SSA names, which may ultimately change
2628 : decisions when going out of SSA). */
2629 : imm_store_chain_info *m_stores_head;
2630 :
2631 : /* The number of store chains currently tracked. */
2632 : unsigned m_n_chains;
2633 : /* The number of stores currently tracked. */
2634 : unsigned m_n_stores;
2635 :
2636 : bool process_store (gimple *);
2637 : bool terminate_and_process_chain (imm_store_chain_info *);
2638 : bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2639 : bool terminate_and_process_all_chains ();
2640 : }; // class pass_store_merging
2641 :
2642 : /* Terminate and process all recorded chains. Return true if any changes
2643 : were made. */
2644 :
2645 : bool
2646 1153447 : pass_store_merging::terminate_and_process_all_chains ()
2647 : {
2648 1153447 : bool ret = false;
2649 2054448 : while (m_stores_head)
2650 901001 : ret |= terminate_and_process_chain (m_stores_head);
2651 1153447 : gcc_assert (m_stores.is_empty ());
2652 1153447 : return ret;
2653 : }
2654 :
2655 : /* Terminate all chains that are affected by the statement STMT.
2656 : CHAIN_INFO is the chain we should ignore from the checks if
2657 : non-NULL. Return true if any changes were made. */
2658 :
2659 : bool
2660 9799855 : pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2661 : **chain_info,
2662 : gimple *stmt)
2663 : {
2664 9799855 : bool ret = false;
2665 :
2666 : /* If the statement doesn't touch memory it can't alias. */
2667 19142268 : if (!gimple_vuse (stmt))
2668 : return false;
2669 :
2670 7832720 : tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2671 7832720 : ao_ref store_lhs_ref;
2672 7832720 : ao_ref_init (&store_lhs_ref, store_lhs);
2673 7832720 : for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2674 : {
2675 8244281 : next = cur->next;
2676 :
2677 : /* We already checked all the stores in chain_info and terminated the
2678 : chain if necessary. Skip it here. */
2679 8244281 : if (chain_info && *chain_info == cur)
2680 1130626 : continue;
2681 :
2682 : store_immediate_info *info;
2683 : unsigned int i;
2684 33738416 : FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2685 : {
2686 11551684 : tree lhs = gimple_assign_lhs (info->stmt);
2687 11551684 : ao_ref lhs_ref;
2688 11551684 : ao_ref_init (&lhs_ref, lhs);
2689 11551684 : if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2690 10717305 : || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2691 22133183 : || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2692 : &lhs_ref, false)))
2693 : {
2694 1003924 : if (dump_file && (dump_flags & TDF_DETAILS))
2695 : {
2696 24 : fprintf (dump_file, "stmt causes chain termination:\n");
2697 24 : print_gimple_stmt (dump_file, stmt, 0);
2698 : }
2699 1003924 : ret |= terminate_and_process_chain (cur);
2700 1003924 : break;
2701 : }
2702 : }
2703 : }
2704 :
2705 : return ret;
2706 : }
2707 :
2708 : /* Helper function. Terminate the recorded chain storing to base object
2709 : BASE. Return true if the merging and output was successful. The m_stores
2710 : entry is removed after the processing in any case. */
2711 :
2712 : bool
2713 1907401 : pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2714 : {
2715 1907401 : m_n_stores -= chain_info->m_store_info.length ();
2716 1907401 : m_n_chains--;
2717 1907401 : bool ret = chain_info->terminate_and_process_chain ();
2718 1907401 : m_stores.remove (chain_info->base_addr);
2719 1907401 : delete chain_info;
2720 1907401 : return ret;
2721 : }
2722 :
2723 : /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2724 : may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2725 : be able to sink load of REF across stores between FIRST and LAST, up
2726 : to right before LAST. */
2727 :
2728 : bool
2729 32265 : stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2730 : {
2731 32265 : ao_ref r;
2732 32265 : ao_ref_init (&r, ref);
2733 32265 : unsigned int count = 0;
2734 32265 : tree vop = gimple_vdef (last);
2735 32265 : gimple *stmt;
2736 :
2737 : /* Return true conservatively if the basic blocks are different. */
2738 32265 : if (gimple_bb (first) != gimple_bb (last))
2739 : return true;
2740 :
2741 83273 : do
2742 : {
2743 83273 : stmt = SSA_NAME_DEF_STMT (vop);
2744 83273 : if (stmt_may_clobber_ref_p_1 (stmt, &r))
2745 : return true;
2746 82740 : if (gimple_store_p (stmt)
2747 82740 : && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2748 : return true;
2749 : /* Avoid quadratic compile time by bounding the number of checks
2750 : we perform. */
2751 82475 : if (++count > MAX_STORE_ALIAS_CHECKS)
2752 : return true;
2753 82475 : vop = gimple_vuse (stmt);
2754 : }
2755 82475 : while (stmt != first);
2756 :
2757 : return false;
2758 : }
2759 :
2760 : /* Return true if INFO->ops[IDX] is mergeable with the
2761 : corresponding loads already in MERGED_STORE group.
2762 : BASE_ADDR is the base address of the whole store group. */
2763 :
2764 : bool
2765 124172 : compatible_load_p (merged_store_group *merged_store,
2766 : store_immediate_info *info,
2767 : tree base_addr, int idx)
2768 : {
2769 124172 : store_immediate_info *infof = merged_store->stores[0];
2770 124172 : if (!info->ops[idx].base_addr
2771 124172 : || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2772 124172 : info->bitpos - infof->bitpos)
2773 231784 : || !operand_equal_p (info->ops[idx].base_addr,
2774 107612 : infof->ops[idx].base_addr, 0))
2775 18439 : return false;
2776 :
2777 105733 : store_immediate_info *infol = merged_store->stores.last ();
2778 105733 : tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2779 : /* In this case all vuses should be the same, e.g.
2780 : _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2781 : or
2782 : _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2783 : and we can emit the coalesced load next to any of those loads. */
2784 105733 : if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2785 201247 : && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2786 : return true;
2787 :
2788 : /* Otherwise, at least for now require that the load has the same
2789 : vuse as the store. See following examples. */
2790 20438 : if (gimple_vuse (info->stmt) != load_vuse)
2791 : return false;
2792 :
2793 16314 : if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2794 8157 : || (infof != infol
2795 9996 : && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2796 : return false;
2797 :
2798 : /* If the load is from the same location as the store, already
2799 : the construction of the immediate chain info guarantees no intervening
2800 : stores, so no further checks are needed. Example:
2801 : _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2802 7632 : if (known_eq (info->ops[idx].bitpos, info->bitpos)
2803 7632 : && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2804 : return true;
2805 :
2806 : /* Otherwise, we need to punt if any of the loads can be clobbered by any
2807 : of the stores in the group, or any other stores in between those.
2808 : Previous calls to compatible_load_p ensured that for all the
2809 : merged_store->stores IDX loads, no stmts starting with
2810 : merged_store->first_stmt and ending right before merged_store->last_stmt
2811 : clobbers those loads. */
2812 7516 : gimple *first = merged_store->first_stmt;
2813 7516 : gimple *last = merged_store->last_stmt;
2814 : /* The stores are sorted by increasing store bitpos, so if info->stmt store
2815 : comes before the so far first load, we'll be changing
2816 : merged_store->first_stmt. In that case we need to give up if
2817 : any of the earlier processed loads clobber with the stmts in the new
2818 : range. */
2819 7516 : if (info->order < merged_store->first_order)
2820 : {
2821 1151 : for (store_immediate_info *infoc : merged_store->stores)
2822 325 : if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2823 : return false;
2824 176 : first = info->stmt;
2825 : }
2826 : /* Similarly, we could change merged_store->last_stmt, so ensure
2827 : in that case no stmts in the new range clobber any of the earlier
2828 : processed loads. */
2829 7191 : else if (info->order > merged_store->last_order)
2830 : {
2831 45966 : for (store_immediate_info *infoc : merged_store->stores)
2832 25027 : if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2833 : return false;
2834 6557 : last = info->stmt;
2835 : }
2836 : /* And finally, we'd be adding a new load to the set, ensure it isn't
2837 : clobbered in the new range. */
2838 6733 : if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2839 : return false;
2840 :
2841 : /* Otherwise, we are looking for:
2842 : _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2843 : or
2844 : _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2845 : return true;
2846 : }
2847 :
2848 : /* Add all refs loaded to compute VAL to REFS vector. */
2849 :
2850 : void
2851 231 : gather_bswap_load_refs (vec<tree> *refs, tree val)
2852 : {
2853 244 : if (TREE_CODE (val) != SSA_NAME)
2854 : return;
2855 :
2856 242 : gimple *stmt = SSA_NAME_DEF_STMT (val);
2857 242 : if (!is_gimple_assign (stmt))
2858 : return;
2859 :
2860 242 : if (gimple_assign_load_p (stmt))
2861 : {
2862 229 : refs->safe_push (gimple_assign_rhs1 (stmt));
2863 229 : return;
2864 : }
2865 :
2866 13 : switch (gimple_assign_rhs_class (stmt))
2867 : {
2868 2 : case GIMPLE_BINARY_RHS:
2869 2 : gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2870 : /* FALLTHRU */
2871 13 : case GIMPLE_UNARY_RHS:
2872 13 : gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2873 13 : break;
2874 0 : default:
2875 0 : gcc_unreachable ();
2876 : }
2877 : }
2878 :
2879 : /* Check if there are any stores in M_STORE_INFO after index I
2880 : (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2881 : a potential group ending with END that have their order
2882 : smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2883 : all the stores already merged and the one under consideration
2884 : have rhs_code of INTEGER_CST. Return true if there are no such stores.
2885 : Consider:
2886 : MEM[(long long int *)p_28] = 0;
2887 : MEM[(long long int *)p_28 + 8B] = 0;
2888 : MEM[(long long int *)p_28 + 16B] = 0;
2889 : MEM[(long long int *)p_28 + 24B] = 0;
2890 : _129 = (int) _130;
2891 : MEM[(int *)p_28 + 8B] = _129;
2892 : MEM[(int *)p_28].a = -1;
2893 : We already have
2894 : MEM[(long long int *)p_28] = 0;
2895 : MEM[(int *)p_28].a = -1;
2896 : stmts in the current group and need to consider if it is safe to
2897 : add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2898 : There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2899 : store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2900 : into the group and merging of those 3 stores is successful, merged
2901 : stmts will be emitted at the latest store from that group, i.e.
2902 : LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2903 : The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2904 : the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2905 : so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2906 : into the group. That way it will be its own store group and will
2907 : not be touched. If ALL_INTEGER_CST_P and there are overlapping
2908 : INTEGER_CST stores, those are mergeable using merge_overlapping,
2909 : so don't return false for those.
2910 :
2911 : Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2912 : (exclusive), whether they don't overlap the bitrange START to END
2913 : and have order in between FIRST_ORDER and LAST_ORDER. This is to
2914 : prevent merging in cases like:
2915 : MEM <char[12]> [&b + 8B] = {};
2916 : MEM[(short *) &b] = 5;
2917 : _5 = *x_4(D);
2918 : MEM <long long unsigned int> [&b + 2B] = _5;
2919 : MEM[(char *)&b + 16B] = 88;
2920 : MEM[(int *)&b + 20B] = 1;
2921 : The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2922 : be merged with it, because the = _5 store overlaps these and is in between
2923 : them in sort_by_order ordering. If it was merged, the merged store would
2924 : go after the = _5 store and thus change behavior. */
2925 :
2926 : static bool
2927 798975 : check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2928 : unsigned int i,
2929 : bool all_integer_cst_p, unsigned int first_order,
2930 : unsigned int last_order, unsigned HOST_WIDE_INT start,
2931 : unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2932 : unsigned end_earlier)
2933 : {
2934 798975 : unsigned int len = m_store_info.length ();
2935 810833 : for (unsigned int j = first_earlier; j < end_earlier; j++)
2936 : {
2937 11867 : store_immediate_info *info = m_store_info[j];
2938 11867 : if (info->order > first_order
2939 39 : && info->order < last_order
2940 11 : && info->bitpos + info->bitsize > start)
2941 : return false;
2942 : }
2943 883161 : for (++i; i < len; ++i)
2944 : {
2945 463854 : store_immediate_info *info = m_store_info[i];
2946 463854 : if (info->bitpos >= end)
2947 : break;
2948 84214 : if (info->order < last_order
2949 37186 : && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2950 : return false;
2951 : }
2952 : return true;
2953 : }
2954 :
2955 : /* Return true if m_store_info[first] and at least one following store
2956 : form a group which store try_size bitsize value which is byte swapped
2957 : from a memory load or some value, or identity from some value.
2958 : This uses the bswap pass APIs. */
2959 :
2960 : bool
2961 232533 : imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2962 : unsigned int first,
2963 : unsigned int try_size,
2964 : unsigned int first_earlier)
2965 : {
2966 232533 : unsigned int len = m_store_info.length (), last = first;
2967 232533 : unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2968 232533 : if (width >= try_size)
2969 : return false;
2970 85372 : for (unsigned int i = first + 1; i < len; ++i)
2971 : {
2972 77951 : if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2973 77233 : || m_store_info[i]->lp_nr != merged_store->lp_nr
2974 155184 : || m_store_info[i]->ins_stmt == NULL)
2975 : return false;
2976 76196 : width += m_store_info[i]->bitsize;
2977 76196 : if (width >= try_size)
2978 : {
2979 : last = i;
2980 : break;
2981 : }
2982 : }
2983 48367 : if (width != try_size)
2984 : return false;
2985 :
2986 40401 : bool allow_unaligned
2987 40401 : = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2988 : /* Punt if the combined store would not be aligned and we need alignment. */
2989 40401 : if (!allow_unaligned)
2990 : {
2991 0 : unsigned int align = merged_store->align;
2992 0 : unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2993 0 : for (unsigned int i = first + 1; i <= last; ++i)
2994 : {
2995 0 : unsigned int this_align;
2996 0 : unsigned HOST_WIDE_INT align_bitpos = 0;
2997 0 : get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2998 : &this_align, &align_bitpos);
2999 0 : if (this_align > align)
3000 : {
3001 0 : align = this_align;
3002 0 : align_base = m_store_info[i]->bitpos - align_bitpos;
3003 : }
3004 : }
3005 0 : unsigned HOST_WIDE_INT align_bitpos
3006 0 : = (m_store_info[first]->bitpos - align_base) & (align - 1);
3007 0 : if (align_bitpos)
3008 0 : align = least_bit_hwi (align_bitpos);
3009 0 : if (align < try_size)
3010 : return false;
3011 : }
3012 :
3013 40401 : tree type;
3014 40401 : switch (try_size)
3015 : {
3016 6185 : case 16: type = uint16_type_node; break;
3017 5087 : case 32: type = uint32_type_node; break;
3018 29129 : case 64: type = uint64_type_node; break;
3019 0 : default: gcc_unreachable ();
3020 : }
3021 40401 : struct symbolic_number n;
3022 40401 : gimple *ins_stmt = NULL;
3023 40401 : int vuse_store = -1;
3024 40401 : unsigned int first_order = merged_store->first_order;
3025 40401 : unsigned int last_order = merged_store->last_order;
3026 40401 : gimple *first_stmt = merged_store->first_stmt;
3027 40401 : gimple *last_stmt = merged_store->last_stmt;
3028 40401 : unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
3029 40401 : store_immediate_info *infof = m_store_info[first];
3030 :
3031 111266 : for (unsigned int i = first; i <= last; ++i)
3032 : {
3033 87383 : store_immediate_info *info = m_store_info[i];
3034 87383 : struct symbolic_number this_n = info->n;
3035 87383 : this_n.type = type;
3036 87383 : if (!this_n.base_addr)
3037 12062 : this_n.range = try_size / BITS_PER_UNIT;
3038 : else
3039 : /* Update vuse in case it has changed by output_merged_stores. */
3040 150642 : this_n.vuse = gimple_vuse (info->ins_stmt);
3041 87383 : unsigned int bitpos = info->bitpos - infof->bitpos;
3042 87383 : if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
3043 : BYTES_BIG_ENDIAN
3044 : ? try_size - info->bitsize - bitpos
3045 : : bitpos))
3046 16518 : return false;
3047 87383 : if (this_n.base_addr && vuse_store)
3048 : {
3049 : unsigned int j;
3050 118858 : for (j = first; j <= last; ++j)
3051 184550 : if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
3052 : break;
3053 45482 : if (j > last)
3054 : {
3055 26583 : if (vuse_store == 1)
3056 : return false;
3057 : vuse_store = 0;
3058 : }
3059 : }
3060 87383 : if (i == first)
3061 : {
3062 40401 : n = this_n;
3063 40401 : ins_stmt = info->ins_stmt;
3064 : }
3065 : else
3066 : {
3067 46982 : if (n.base_addr && n.vuse != this_n.vuse)
3068 : {
3069 7153 : if (vuse_store == 0)
3070 : return false;
3071 : vuse_store = 1;
3072 : }
3073 42944 : if (info->order > last_order)
3074 : {
3075 40358 : last_order = info->order;
3076 40358 : last_stmt = info->stmt;
3077 : }
3078 2586 : else if (info->order < first_order)
3079 : {
3080 2563 : first_order = info->order;
3081 2563 : first_stmt = info->stmt;
3082 : }
3083 42944 : end = MAX (end, info->bitpos + info->bitsize);
3084 :
3085 42944 : ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
3086 : &this_n, &n, BIT_IOR_EXPR);
3087 42944 : if (ins_stmt == NULL)
3088 : return false;
3089 : }
3090 : }
3091 :
3092 23883 : uint64_t cmpxchg, cmpnop;
3093 23883 : bool cast64_to_32;
3094 23883 : find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
3095 :
3096 : /* A complete byte swap should make the symbolic number to start with
3097 : the largest digit in the highest order byte. Unchanged symbolic
3098 : number indicates a read with same endianness as target architecture. */
3099 23883 : if (n.n != cmpnop && n.n != cmpxchg)
3100 : return false;
3101 :
3102 : /* For now. */
3103 21200 : if (cast64_to_32)
3104 : return false;
3105 :
3106 21200 : if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
3107 : return false;
3108 :
3109 21200 : if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
3110 : merged_store->start, end, first_earlier, first))
3111 : return false;
3112 :
3113 : /* Don't handle memory copy this way if normal non-bswap processing
3114 : would handle it too. */
3115 21198 : if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
3116 : {
3117 : unsigned int i;
3118 61451 : for (i = first; i <= last; ++i)
3119 41206 : if (m_store_info[i]->rhs_code != MEM_REF)
3120 : break;
3121 20572 : if (i == last + 1)
3122 : return false;
3123 : }
3124 :
3125 953 : if (n.n == cmpxchg)
3126 626 : switch (try_size)
3127 : {
3128 : case 16:
3129 : /* Will emit LROTATE_EXPR. */
3130 : break;
3131 182 : case 32:
3132 182 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
3133 182 : && can_open_code_p (bswap_optab, SImode))
3134 : break;
3135 58 : return false;
3136 114 : case 64:
3137 114 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
3138 114 : && can_open_code_p (bswap_optab, DImode))
3139 : break;
3140 42 : return false;
3141 0 : default:
3142 0 : gcc_unreachable ();
3143 : }
3144 :
3145 853 : if (!allow_unaligned && n.base_addr)
3146 : {
3147 0 : unsigned int align = get_object_alignment (n.src);
3148 0 : if (align < try_size)
3149 : return false;
3150 : }
3151 :
3152 : /* If each load has vuse of the corresponding store, need to verify
3153 : the loads can be sunk right before the last store. */
3154 853 : if (vuse_store == 1)
3155 : {
3156 92 : auto_vec<tree, 64> refs;
3157 321 : for (unsigned int i = first; i <= last; ++i)
3158 229 : gather_bswap_load_refs (&refs,
3159 229 : gimple_assign_rhs1 (m_store_info[i]->stmt));
3160 :
3161 437 : for (tree ref : refs)
3162 180 : if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
3163 19 : return false;
3164 73 : n.vuse = NULL_TREE;
3165 92 : }
3166 :
3167 834 : infof->n = n;
3168 834 : infof->ins_stmt = ins_stmt;
3169 3878 : for (unsigned int i = first; i <= last; ++i)
3170 : {
3171 4426 : m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3172 3044 : m_store_info[i]->ops[0].base_addr = NULL_TREE;
3173 3044 : m_store_info[i]->ops[1].base_addr = NULL_TREE;
3174 3044 : if (i != first)
3175 2210 : merged_store->merge_into (m_store_info[i]);
3176 : }
3177 :
3178 : return true;
3179 : }
3180 :
3181 : /* Go through the candidate stores recorded in m_store_info and merge them
3182 : into merged_store_group objects recorded into m_merged_store_groups
3183 : representing the widened stores. Return true if coalescing was successful
3184 : and the number of widened stores is fewer than the original number
3185 : of stores. */
3186 :
3187 : bool
3188 491934 : imm_store_chain_info::coalesce_immediate_stores ()
3189 : {
3190 : /* Anything less can't be processed. */
3191 618724 : if (m_store_info.length () < 2)
3192 : return false;
3193 :
3194 491934 : if (dump_file && (dump_flags & TDF_DETAILS))
3195 25 : fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3196 : m_store_info.length ());
3197 :
3198 491934 : store_immediate_info *info;
3199 491934 : unsigned int i, ignore = 0;
3200 491934 : unsigned int first_earlier = 0;
3201 491934 : unsigned int end_earlier = 0;
3202 :
3203 : /* Order the stores by the bitposition they write to. */
3204 491934 : m_store_info.qsort (sort_by_bitpos);
3205 :
3206 491934 : info = m_store_info[0];
3207 491934 : merged_store_group *merged_store = new merged_store_group (info);
3208 491934 : if (dump_file && (dump_flags & TDF_DETAILS))
3209 25 : fputs ("New store group\n", dump_file);
3210 :
3211 2114494 : FOR_EACH_VEC_ELT (m_store_info, i, info)
3212 : {
3213 1622560 : unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3214 :
3215 1622560 : if (i <= ignore)
3216 574968 : goto done;
3217 :
3218 : while (first_earlier < end_earlier
3219 1302271 : && (m_store_info[first_earlier]->bitpos
3220 265461 : + m_store_info[first_earlier]->bitsize
3221 265461 : <= merged_store->start))
3222 254679 : first_earlier++;
3223 :
3224 : /* First try to handle group of stores like:
3225 : p[0] = data >> 24;
3226 : p[1] = data >> 16;
3227 : p[2] = data >> 8;
3228 : p[3] = data;
3229 : using the bswap framework. */
3230 1047592 : if (info->bitpos == merged_store->start + merged_store->width
3231 686723 : && merged_store->stores.length () == 1
3232 371298 : && merged_store->stores[0]->ins_stmt != NULL
3233 116423 : && info->lp_nr == merged_store->lp_nr
3234 1164015 : && info->ins_stmt != NULL)
3235 : {
3236 : unsigned int try_size;
3237 309413 : for (try_size = 64; try_size >= 16; try_size >>= 1)
3238 232533 : if (try_coalesce_bswap (merged_store, i - 1, try_size,
3239 : first_earlier))
3240 : break;
3241 :
3242 77714 : if (try_size >= 16)
3243 : {
3244 834 : ignore = i + merged_store->stores.length () - 1;
3245 834 : m_merged_store_groups.safe_push (merged_store);
3246 834 : if (ignore < m_store_info.length ())
3247 : {
3248 324 : merged_store = new merged_store_group (m_store_info[ignore]);
3249 324 : end_earlier = ignore;
3250 : }
3251 : else
3252 510 : merged_store = NULL;
3253 834 : goto done;
3254 : }
3255 : }
3256 :
3257 1046758 : new_bitregion_start
3258 1046758 : = MIN (merged_store->bitregion_start, info->bitregion_start);
3259 1046758 : new_bitregion_end
3260 1046758 : = MAX (merged_store->bitregion_end, info->bitregion_end);
3261 :
3262 1046758 : if (info->order >= merged_store->first_nonmergeable_order
3263 1044921 : || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3264 1044921 : > (unsigned) param_store_merging_max_size))
3265 : ;
3266 :
3267 : /* |---store 1---|
3268 : |---store 2---|
3269 : Overlapping stores. */
3270 1044831 : else if (IN_RANGE (info->bitpos, merged_store->start,
3271 : merged_store->start + merged_store->width - 1)
3272 : /* |---store 1---||---store 2---|
3273 : Handle also the consecutive INTEGER_CST stores case here,
3274 : as we have here the code to deal with overlaps. */
3275 1044831 : || (info->bitregion_start <= merged_store->bitregion_end
3276 690124 : && info->rhs_code == INTEGER_CST
3277 524997 : && merged_store->only_constants
3278 494403 : && merged_store->can_be_merged_into (info)))
3279 : {
3280 : /* Only allow overlapping stores of constants. */
3281 601339 : if (info->rhs_code == INTEGER_CST
3282 594002 : && merged_store->only_constants
3283 593927 : && info->lp_nr == merged_store->lp_nr)
3284 : {
3285 593915 : unsigned int first_order
3286 593915 : = MIN (merged_store->first_order, info->order);
3287 593915 : unsigned int last_order
3288 593915 : = MAX (merged_store->last_order, info->order);
3289 593915 : unsigned HOST_WIDE_INT end
3290 593915 : = MAX (merged_store->start + merged_store->width,
3291 : info->bitpos + info->bitsize);
3292 593915 : if (check_no_overlap (m_store_info, i, true, first_order,
3293 : last_order, merged_store->start, end,
3294 : first_earlier, end_earlier))
3295 : {
3296 : /* check_no_overlap call above made sure there are no
3297 : overlapping stores with non-INTEGER_CST rhs_code
3298 : in between the first and last of the stores we've
3299 : just merged. If there are any INTEGER_CST rhs_code
3300 : stores in between, we need to merge_overlapping them
3301 : even if in the sort_by_bitpos order there are other
3302 : overlapping stores in between. Keep those stores as is.
3303 : Example:
3304 : MEM[(int *)p_28] = 0;
3305 : MEM[(char *)p_28 + 3B] = 1;
3306 : MEM[(char *)p_28 + 1B] = 2;
3307 : MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3308 : We can't merge the zero store with the store of two and
3309 : not merge anything else, because the store of one is
3310 : in the original order in between those two, but in
3311 : store_by_bitpos order it comes after the last store that
3312 : we can't merge with them. We can merge the first 3 stores
3313 : and keep the last store as is though. */
3314 593891 : unsigned int len = m_store_info.length ();
3315 593891 : unsigned int try_order = last_order;
3316 593891 : unsigned int first_nonmergeable_order;
3317 593891 : unsigned int k;
3318 593891 : bool last_iter = false;
3319 593891 : int attempts = 0;
3320 617031 : do
3321 : {
3322 617031 : unsigned int max_order = 0;
3323 617031 : unsigned int min_order = first_order;
3324 617031 : unsigned first_nonmergeable_int_order = ~0U;
3325 617031 : unsigned HOST_WIDE_INT this_end = end;
3326 617031 : unsigned HOST_WIDE_INT this_bitregion_start
3327 : = new_bitregion_start;
3328 617031 : unsigned HOST_WIDE_INT this_bitregion_end
3329 : = new_bitregion_end;
3330 617031 : k = i;
3331 617031 : first_nonmergeable_order = ~0U;
3332 747528 : for (unsigned int j = i + 1; j < len; ++j)
3333 : {
3334 460374 : store_immediate_info *info2 = m_store_info[j];
3335 460374 : if (info2->bitpos >= this_end)
3336 : break;
3337 130500 : if (info2->order < try_order)
3338 : {
3339 82071 : if (info2->rhs_code != INTEGER_CST
3340 82068 : || info2->lp_nr != merged_store->lp_nr)
3341 : {
3342 : /* Normally check_no_overlap makes sure this
3343 : doesn't happen, but if end grows below,
3344 : then we need to process more stores than
3345 : check_no_overlap verified. Example:
3346 : MEM[(int *)p_5] = 0;
3347 : MEM[(short *)p_5 + 3B] = 1;
3348 : MEM[(char *)p_5 + 4B] = _9;
3349 : MEM[(char *)p_5 + 2B] = 2; */
3350 : k = 0;
3351 : break;
3352 : }
3353 82068 : if (info2->bitregion_start
3354 : < this_bitregion_start)
3355 : this_bitregion_start = info2->bitregion_start;
3356 82068 : if (info2->bitregion_end
3357 : > this_bitregion_end)
3358 : this_bitregion_end = info2->bitregion_end;
3359 82068 : if (((this_bitregion_end - this_bitregion_start
3360 82068 : + 1) / BITS_PER_UNIT)
3361 : > (unsigned) param_store_merging_max_size)
3362 : {
3363 : k = 0;
3364 : break;
3365 : }
3366 82068 : k = j;
3367 82068 : min_order = MIN (min_order, info2->order);
3368 82068 : this_end = MAX (this_end,
3369 : info2->bitpos + info2->bitsize);
3370 : }
3371 48429 : else if (info2->rhs_code == INTEGER_CST
3372 45596 : && info2->lp_nr == merged_store->lp_nr
3373 45596 : && !last_iter)
3374 : {
3375 45596 : max_order = MAX (max_order, info2->order + 1);
3376 45596 : first_nonmergeable_int_order
3377 45596 : = MIN (first_nonmergeable_int_order,
3378 : info2->order);
3379 : }
3380 : else
3381 2833 : first_nonmergeable_order
3382 2833 : = MIN (first_nonmergeable_order, info2->order);
3383 : }
3384 617031 : if (k > i
3385 617031 : && !check_no_overlap (m_store_info, len - 1, true,
3386 : min_order, try_order,
3387 : merged_store->start, this_end,
3388 : first_earlier, end_earlier))
3389 : k = 0;
3390 617031 : if (k == 0)
3391 : {
3392 3 : if (last_order == try_order)
3393 : break;
3394 : /* If this failed, but only because we grew
3395 : try_order, retry with the last working one,
3396 : so that we merge at least something. */
3397 0 : try_order = last_order;
3398 0 : last_iter = true;
3399 0 : continue;
3400 : }
3401 617028 : last_order = try_order;
3402 : /* Retry with a larger try_order to see if we could
3403 : merge some further INTEGER_CST stores. */
3404 617028 : if (max_order
3405 617028 : && (first_nonmergeable_int_order
3406 617028 : < first_nonmergeable_order))
3407 : {
3408 23141 : try_order = MIN (max_order,
3409 : first_nonmergeable_order);
3410 23141 : try_order
3411 23141 : = MIN (try_order,
3412 : merged_store->first_nonmergeable_order);
3413 23141 : if (try_order > last_order && ++attempts < 16)
3414 23140 : continue;
3415 : }
3416 593888 : first_nonmergeable_order
3417 593888 : = MIN (first_nonmergeable_order,
3418 : first_nonmergeable_int_order);
3419 : end = this_end;
3420 : break;
3421 : }
3422 : while (1);
3423 :
3424 593891 : if (k != 0)
3425 : {
3426 593888 : merged_store->merge_overlapping (info);
3427 :
3428 593888 : merged_store->first_nonmergeable_order
3429 593888 : = MIN (merged_store->first_nonmergeable_order,
3430 : first_nonmergeable_order);
3431 :
3432 675222 : for (unsigned int j = i + 1; j <= k; j++)
3433 : {
3434 81334 : store_immediate_info *info2 = m_store_info[j];
3435 81334 : gcc_assert (info2->bitpos < end);
3436 81334 : if (info2->order < last_order)
3437 : {
3438 81276 : gcc_assert (info2->rhs_code == INTEGER_CST);
3439 81276 : if (info != info2)
3440 81276 : merged_store->merge_overlapping (info2);
3441 : }
3442 : /* Other stores are kept and not merged in any
3443 : way. */
3444 : }
3445 593888 : ignore = k;
3446 593888 : goto done;
3447 : }
3448 : }
3449 : }
3450 : }
3451 : /* |---store 1---||---store 2---|
3452 : This store is consecutive to the previous one.
3453 : Merge it into the current store group. There can be gaps in between
3454 : the stores, but there can't be gaps in between bitregions. */
3455 443492 : else if (info->bitregion_start <= merged_store->bitregion_end
3456 443492 : && merged_store->can_be_merged_into (info))
3457 : {
3458 133259 : store_immediate_info *infof = merged_store->stores[0];
3459 :
3460 : /* All the rhs_code ops that take 2 operands are commutative,
3461 : swap the operands if it could make the operands compatible. */
3462 133259 : if (infof->ops[0].base_addr
3463 122813 : && infof->ops[1].base_addr
3464 1363 : && info->ops[0].base_addr
3465 1363 : && info->ops[1].base_addr
3466 1363 : && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3467 : info->bitpos - infof->bitpos)
3468 134604 : && operand_equal_p (info->ops[1].base_addr,
3469 : infof->ops[0].base_addr, 0))
3470 : {
3471 1139 : std::swap (info->ops[0], info->ops[1]);
3472 1139 : info->ops_swapped_p = true;
3473 : }
3474 133259 : if (check_no_overlap (m_store_info, i, false,
3475 133259 : MIN (merged_store->first_order, info->order),
3476 133259 : MAX (merged_store->last_order, info->order),
3477 : merged_store->start,
3478 133259 : MAX (merged_store->start + merged_store->width,
3479 : info->bitpos + info->bitsize),
3480 : first_earlier, end_earlier))
3481 : {
3482 : /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3483 133257 : if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3484 : {
3485 766 : info->rhs_code = BIT_INSERT_EXPR;
3486 766 : info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3487 766 : info->ops[0].base_addr = NULL_TREE;
3488 : }
3489 132491 : else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3490 : {
3491 8 : for (store_immediate_info *infoj : merged_store->stores)
3492 : {
3493 2 : infoj->rhs_code = BIT_INSERT_EXPR;
3494 2 : infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3495 2 : infoj->ops[0].base_addr = NULL_TREE;
3496 : }
3497 2 : merged_store->bit_insertion = true;
3498 : }
3499 133257 : if ((infof->ops[0].base_addr
3500 122809 : ? compatible_load_p (merged_store, info, base_addr, 0)
3501 10448 : : !info->ops[0].base_addr)
3502 267877 : && (infof->ops[1].base_addr
3503 1363 : ? compatible_load_p (merged_store, info, base_addr, 1)
3504 110061 : : !info->ops[1].base_addr))
3505 : {
3506 111417 : merged_store->merge_into (info);
3507 111417 : goto done;
3508 : }
3509 : }
3510 : }
3511 :
3512 : /* |---store 1---| <gap> |---store 2---|.
3513 : Gap between stores or the rhs not compatible. Start a new group. */
3514 :
3515 : /* Try to apply all the stores recorded for the group to determine
3516 : the bitpattern they write and discard it if that fails.
3517 : This will also reject single-store groups. */
3518 341453 : if (merged_store->apply_stores ())
3519 57820 : m_merged_store_groups.safe_push (merged_store);
3520 : else
3521 283633 : delete merged_store;
3522 :
3523 341453 : merged_store = new merged_store_group (info);
3524 341453 : end_earlier = i;
3525 341453 : if (dump_file && (dump_flags & TDF_DETAILS))
3526 1 : fputs ("New store group\n", dump_file);
3527 :
3528 1622560 : done:
3529 1622560 : if (dump_file && (dump_flags & TDF_DETAILS))
3530 : {
3531 227 : fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3532 : " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3533 : i, info->bitsize, info->bitpos);
3534 227 : print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3535 227 : fputc ('\n', dump_file);
3536 : }
3537 : }
3538 :
3539 : /* Record or discard the last store group. */
3540 491934 : if (merged_store)
3541 : {
3542 491424 : if (merged_store->apply_stores ())
3543 338632 : m_merged_store_groups.safe_push (merged_store);
3544 : else
3545 152792 : delete merged_store;
3546 : }
3547 :
3548 1349012 : gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3549 :
3550 491934 : bool success
3551 491934 : = !m_merged_store_groups.is_empty ()
3552 365144 : && m_merged_store_groups.length () < m_store_info.length ();
3553 :
3554 365144 : if (success && dump_file)
3555 94 : fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3556 : m_merged_store_groups.length ());
3557 :
3558 : return success;
3559 : }
3560 :
3561 : /* Return the type to use for the merged stores or loads described by STMTS.
3562 : This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3563 : otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3564 : of the MEM_REFs if any. */
3565 :
3566 : static tree
3567 93194 : get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3568 : unsigned short *cliquep, unsigned short *basep)
3569 : {
3570 93194 : gimple *stmt;
3571 93194 : unsigned int i;
3572 93194 : tree type = NULL_TREE;
3573 93194 : tree ret = NULL_TREE;
3574 93194 : *cliquep = 0;
3575 93194 : *basep = 0;
3576 :
3577 318971 : FOR_EACH_VEC_ELT (stmts, i, stmt)
3578 : {
3579 225777 : tree ref = is_load ? gimple_assign_rhs1 (stmt)
3580 218929 : : gimple_assign_lhs (stmt);
3581 225777 : tree type1 = reference_alias_ptr_type (ref);
3582 225777 : tree base = get_base_address (ref);
3583 :
3584 225777 : if (i == 0)
3585 : {
3586 93194 : if (TREE_CODE (base) == MEM_REF)
3587 : {
3588 13726 : *cliquep = MR_DEPENDENCE_CLIQUE (base);
3589 13726 : *basep = MR_DEPENDENCE_BASE (base);
3590 : }
3591 93194 : ret = type = type1;
3592 93194 : continue;
3593 : }
3594 132583 : if (!alias_ptr_types_compatible_p (type, type1))
3595 84150 : ret = ptr_type_node;
3596 132583 : if (TREE_CODE (base) != MEM_REF
3597 19703 : || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3598 150842 : || *basep != MR_DEPENDENCE_BASE (base))
3599 : {
3600 114324 : *cliquep = 0;
3601 114324 : *basep = 0;
3602 : }
3603 : }
3604 93194 : return ret;
3605 : }
3606 :
3607 : /* Return the location_t information we can find among the statements
3608 : in STMTS. */
3609 :
3610 : static location_t
3611 93231 : get_location_for_stmts (vec<gimple *> &stmts)
3612 : {
3613 282666 : for (gimple *stmt : stmts)
3614 94971 : if (gimple_has_location (stmt))
3615 91998 : return gimple_location (stmt);
3616 :
3617 : return UNKNOWN_LOCATION;
3618 : }
3619 :
3620 : /* Used to describe a store resulting from splitting a wide store in smaller
3621 : regularly-sized stores in split_group. */
3622 :
3623 908999 : class split_store
3624 : {
3625 : public:
3626 : unsigned HOST_WIDE_INT bytepos;
3627 : unsigned HOST_WIDE_INT size;
3628 : unsigned HOST_WIDE_INT align;
3629 : auto_vec<store_immediate_info *> orig_stores;
3630 : /* True if there is a single orig stmt covering the whole split store. */
3631 : bool orig;
3632 : split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3633 : unsigned HOST_WIDE_INT);
3634 : };
3635 :
3636 : /* Simple constructor. */
3637 :
3638 908999 : split_store::split_store (unsigned HOST_WIDE_INT bp,
3639 : unsigned HOST_WIDE_INT sz,
3640 908999 : unsigned HOST_WIDE_INT al)
3641 908999 : : bytepos (bp), size (sz), align (al), orig (false)
3642 : {
3643 908999 : orig_stores.create (0);
3644 0 : }
3645 :
3646 : /* Record all stores in GROUP that write to the region starting at BITPOS and
3647 : is of size BITSIZE. Record infos for such statements in STORES if
3648 : non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3649 : if there is exactly one original store in the range (in that case ignore
3650 : clobber stmts, unless there are only clobber stmts). */
3651 :
3652 : static store_immediate_info *
3653 7213038 : find_constituent_stores (class merged_store_group *group,
3654 : vec<store_immediate_info *> *stores,
3655 : unsigned int *first,
3656 : unsigned HOST_WIDE_INT bitpos,
3657 : unsigned HOST_WIDE_INT bitsize)
3658 : {
3659 7213038 : store_immediate_info *info, *ret = NULL;
3660 7213038 : unsigned int i;
3661 7213038 : bool second = false;
3662 7213038 : bool update_first = true;
3663 7213038 : unsigned HOST_WIDE_INT end = bitpos + bitsize;
3664 18922530 : for (i = *first; group->stores.iterate (i, &info); ++i)
3665 : {
3666 15879814 : unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3667 15879814 : unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3668 15879814 : if (stmt_end <= bitpos)
3669 : {
3670 : /* BITPOS passed to this function never decreases from within the
3671 : same split_group call, so optimize and don't scan info records
3672 : which are known to end before or at BITPOS next time.
3673 : Only do it if all stores before this one also pass this. */
3674 4463538 : if (update_first)
3675 2061323 : *first = i + 1;
3676 4463538 : continue;
3677 : }
3678 : else
3679 11416276 : update_first = false;
3680 :
3681 : /* The stores in GROUP are ordered by bitposition so if we're past
3682 : the region for this group return early. */
3683 11416276 : if (stmt_start >= end)
3684 : return ret;
3685 :
3686 7620663 : if (gimple_clobber_p (info->stmt))
3687 : {
3688 702718 : if (stores)
3689 35305 : stores->safe_push (info);
3690 702718 : if (ret == NULL)
3691 680785 : ret = info;
3692 702718 : continue;
3693 : }
3694 6917945 : if (stores)
3695 : {
3696 1040558 : stores->safe_push (info);
3697 1040558 : if (ret && !gimple_clobber_p (ret->stmt))
3698 : {
3699 : ret = NULL;
3700 : second = true;
3701 : }
3702 : }
3703 5877387 : else if (ret && !gimple_clobber_p (ret->stmt))
3704 : return NULL;
3705 6445700 : if (!second)
3706 6409847 : ret = info;
3707 : }
3708 : return ret;
3709 : }
3710 :
3711 : /* Return how many SSA_NAMEs used to compute value to store in the INFO
3712 : store have multiple uses. If any SSA_NAME has multiple uses, also
3713 : count statements needed to compute it. */
3714 :
3715 : static unsigned
3716 1135784 : count_multiple_uses (store_immediate_info *info)
3717 : {
3718 1135784 : gimple *stmt = info->stmt;
3719 1135784 : unsigned ret = 0;
3720 1135784 : switch (info->rhs_code)
3721 : {
3722 : case INTEGER_CST:
3723 : case STRING_CST:
3724 : return 0;
3725 5073 : case BIT_AND_EXPR:
3726 5073 : case BIT_IOR_EXPR:
3727 5073 : case BIT_XOR_EXPR:
3728 5073 : if (info->bit_not_p)
3729 : {
3730 65 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3731 : ret = 1; /* Fall through below to return
3732 : the BIT_NOT_EXPR stmt and then
3733 : BIT_{AND,IOR,XOR}_EXPR and anything it
3734 : uses. */
3735 : else
3736 : /* stmt is after this the BIT_NOT_EXPR. */
3737 65 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3738 : }
3739 5073 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3740 : {
3741 26 : ret += 1 + info->ops[0].bit_not_p;
3742 26 : if (info->ops[1].base_addr)
3743 26 : ret += 1 + info->ops[1].bit_not_p;
3744 26 : return ret + 1;
3745 : }
3746 5047 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3747 : /* stmt is now the BIT_*_EXPR. */
3748 5047 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3749 2228 : ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3750 2819 : else if (info->ops[info->ops_swapped_p].bit_not_p)
3751 : {
3752 171 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3753 171 : if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3754 0 : ++ret;
3755 : }
3756 5047 : if (info->ops[1].base_addr == NULL_TREE)
3757 : {
3758 329 : gcc_checking_assert (!info->ops_swapped_p);
3759 : return ret;
3760 : }
3761 4718 : if (!has_single_use (gimple_assign_rhs2 (stmt)))
3762 2138 : ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3763 2580 : else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3764 : {
3765 19 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3766 19 : if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3767 0 : ++ret;
3768 : }
3769 : return ret;
3770 263288 : case MEM_REF:
3771 263288 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3772 166385 : return 1 + info->ops[0].bit_not_p;
3773 96903 : else if (info->ops[0].bit_not_p)
3774 : {
3775 118 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3776 118 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3777 : return 1;
3778 : }
3779 : return 0;
3780 9295 : case BIT_INSERT_EXPR:
3781 9295 : return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3782 0 : default:
3783 0 : gcc_unreachable ();
3784 : }
3785 : }
3786 :
3787 : /* Split a merged store described by GROUP by populating the SPLIT_STORES
3788 : vector (if non-NULL) with split_store structs describing the byte offset
3789 : (from the base), the bit size and alignment of each store as well as the
3790 : original statements involved in each such split group.
3791 : This is to separate the splitting strategy from the statement
3792 : building/emission/linking done in output_merged_store.
3793 : Return number of new stores.
3794 : If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3795 : If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3796 : BZERO_FIRST may be true only when the first store covers the whole group
3797 : and clears it; if BZERO_FIRST is true, keep that first store in the set
3798 : unmodified and emit further stores for the overrides only.
3799 : If SPLIT_STORES is NULL, it is just a dry run to count number of
3800 : new stores. */
3801 :
3802 : static unsigned int
3803 1000558 : split_group (merged_store_group *group, bool allow_unaligned_store,
3804 : bool allow_unaligned_load, bool bzero_first,
3805 : vec<split_store *> *split_stores,
3806 : unsigned *total_orig,
3807 : unsigned *total_new)
3808 : {
3809 1000558 : unsigned HOST_WIDE_INT pos = group->bitregion_start;
3810 1000558 : unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3811 1000558 : unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3812 1000558 : unsigned HOST_WIDE_INT group_align = group->align;
3813 1000558 : unsigned HOST_WIDE_INT align_base = group->align_base;
3814 1000558 : unsigned HOST_WIDE_INT group_load_align = group_align;
3815 1000558 : bool any_orig = false;
3816 :
3817 1000558 : gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3818 :
3819 : /* For bswap framework using sets of stores, all the checking has been done
3820 : earlier in try_coalesce_bswap and the result always needs to be emitted
3821 : as a single store. Likewise for string concatenation. */
3822 1000558 : if (group->stores[0]->rhs_code == LROTATE_EXPR
3823 999037 : || group->stores[0]->rhs_code == NOP_EXPR
3824 1998614 : || group->string_concatenation)
3825 : {
3826 2610 : gcc_assert (!bzero_first);
3827 2610 : if (total_orig)
3828 : {
3829 : /* Avoid the old/new stmt count heuristics. It should be
3830 : always beneficial. */
3831 870 : total_new[0] = 1;
3832 870 : total_orig[0] = 2;
3833 : }
3834 :
3835 2610 : if (split_stores)
3836 : {
3837 870 : unsigned HOST_WIDE_INT align_bitpos
3838 870 : = (group->start - align_base) & (group_align - 1);
3839 870 : unsigned HOST_WIDE_INT align = group_align;
3840 870 : if (align_bitpos)
3841 120 : align = least_bit_hwi (align_bitpos);
3842 870 : bytepos = group->start / BITS_PER_UNIT;
3843 870 : split_store *store
3844 870 : = new split_store (bytepos, group->width, align);
3845 870 : unsigned int first = 0;
3846 870 : find_constituent_stores (group, &store->orig_stores,
3847 : &first, group->start, group->width);
3848 870 : split_stores->safe_push (store);
3849 : }
3850 :
3851 2610 : return 1;
3852 : }
3853 :
3854 997948 : unsigned int ret = 0, first = 0;
3855 997948 : unsigned HOST_WIDE_INT try_pos = bytepos;
3856 :
3857 997948 : if (total_orig)
3858 : {
3859 326116 : unsigned int i;
3860 326116 : store_immediate_info *info = group->stores[0];
3861 :
3862 326116 : total_new[0] = 0;
3863 326116 : total_orig[0] = 1; /* The orig store. */
3864 326116 : info = group->stores[0];
3865 326116 : if (info->ops[0].base_addr)
3866 73856 : total_orig[0]++;
3867 326116 : if (info->ops[1].base_addr)
3868 1199 : total_orig[0]++;
3869 326116 : switch (info->rhs_code)
3870 : {
3871 1300 : case BIT_AND_EXPR:
3872 1300 : case BIT_IOR_EXPR:
3873 1300 : case BIT_XOR_EXPR:
3874 1300 : total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3875 1300 : break;
3876 : default:
3877 : break;
3878 : }
3879 326116 : total_orig[0] *= group->stores.length ();
3880 :
3881 1368148 : FOR_EACH_VEC_ELT (group->stores, i, info)
3882 : {
3883 1042032 : total_new[0] += count_multiple_uses (info);
3884 1042032 : total_orig[0] += (info->bit_not_p
3885 1042032 : + info->ops[0].bit_not_p
3886 1042032 : + info->ops[1].bit_not_p);
3887 : }
3888 : }
3889 :
3890 997948 : if (!allow_unaligned_load)
3891 0 : for (int i = 0; i < 2; ++i)
3892 0 : if (group->load_align[i])
3893 0 : group_load_align = MIN (group_load_align, group->load_align[i]);
3894 :
3895 997948 : if (bzero_first)
3896 : {
3897 : store_immediate_info *gstore;
3898 22539 : FOR_EACH_VEC_ELT (group->stores, first, gstore)
3899 22539 : if (!gimple_clobber_p (gstore->stmt))
3900 : break;
3901 21426 : ++first;
3902 21426 : ret = 1;
3903 21426 : if (split_stores)
3904 : {
3905 1826 : split_store *store
3906 1826 : = new split_store (bytepos, gstore->bitsize, align_base);
3907 1826 : store->orig_stores.safe_push (gstore);
3908 1826 : store->orig = true;
3909 1826 : any_orig = true;
3910 1826 : split_stores->safe_push (store);
3911 : }
3912 : }
3913 :
3914 5129837 : while (size > 0)
3915 : {
3916 4131889 : if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3917 1813533 : && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3918 1797620 : || (bzero_first && group->val[try_pos - bytepos] == 0)))
3919 : {
3920 : /* Skip padding bytes. */
3921 627841 : ++try_pos;
3922 627841 : size -= BITS_PER_UNIT;
3923 627841 : continue;
3924 : }
3925 :
3926 3504048 : unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3927 3504048 : unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3928 3504048 : unsigned HOST_WIDE_INT align_bitpos
3929 3504048 : = (try_bitpos - align_base) & (group_align - 1);
3930 3504048 : unsigned HOST_WIDE_INT align = group_align;
3931 3504048 : bool found_orig = false;
3932 3504048 : if (align_bitpos)
3933 927818 : align = least_bit_hwi (align_bitpos);
3934 3504048 : if (!allow_unaligned_store)
3935 2364858 : try_size = MIN (try_size, align);
3936 3504048 : if (!allow_unaligned_load)
3937 : {
3938 : /* If we can't do or don't want to do unaligned stores
3939 : as well as loads, we need to take the loads into account
3940 : as well. */
3941 0 : unsigned HOST_WIDE_INT load_align = group_load_align;
3942 0 : align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3943 0 : if (align_bitpos)
3944 0 : load_align = least_bit_hwi (align_bitpos);
3945 0 : for (int i = 0; i < 2; ++i)
3946 0 : if (group->load_align[i])
3947 : {
3948 0 : align_bitpos
3949 0 : = known_alignment (try_bitpos
3950 0 : - group->stores[0]->bitpos
3951 0 : + group->stores[0]->ops[i].bitpos
3952 0 : - group->load_align_base[i]);
3953 0 : if (align_bitpos & (group_load_align - 1))
3954 : {
3955 0 : unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3956 0 : load_align = MIN (load_align, a);
3957 : }
3958 : }
3959 0 : try_size = MIN (try_size, load_align);
3960 : }
3961 3504048 : store_immediate_info *info
3962 3504048 : = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3963 3504048 : if (info && !gimple_clobber_p (info->stmt))
3964 : {
3965 : /* If there is just one original statement for the range, see if
3966 : we can just reuse the original store which could be even larger
3967 : than try_size. */
3968 2587143 : unsigned HOST_WIDE_INT stmt_end
3969 2587143 : = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3970 2587143 : info = find_constituent_stores (group, NULL, &first, try_bitpos,
3971 : stmt_end - try_bitpos);
3972 2587143 : if (info && info->bitpos >= try_bitpos)
3973 : {
3974 2395819 : store_immediate_info *info2 = NULL;
3975 2395819 : unsigned int first_copy = first;
3976 2395819 : if (info->bitpos > try_bitpos
3977 5955 : && stmt_end - try_bitpos <= try_size)
3978 : {
3979 5710 : info2 = find_constituent_stores (group, NULL, &first_copy,
3980 : try_bitpos,
3981 : info->bitpos - try_bitpos);
3982 5710 : gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3983 : }
3984 2395405 : if (info2 == NULL && stmt_end - try_bitpos < try_size)
3985 : {
3986 417928 : info2 = find_constituent_stores (group, NULL, &first_copy,
3987 : stmt_end,
3988 208964 : (try_bitpos + try_size)
3989 : - stmt_end);
3990 208964 : gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3991 : }
3992 : if (info2 == NULL)
3993 : {
3994 2375359 : try_size = stmt_end - try_bitpos;
3995 2375359 : found_orig = true;
3996 2375359 : goto found;
3997 : }
3998 : }
3999 : }
4000 :
4001 : /* Approximate store bitsize for the case when there are no padding
4002 : bits. */
4003 1208156 : while (try_size > size)
4004 79467 : try_size /= 2;
4005 : /* Now look for whole padding bytes at the end of that bitsize. */
4006 2184774 : for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
4007 2018948 : if (group->mask[try_pos - bytepos + nonmasked - 1]
4008 : != (unsigned char) ~0U
4009 1976657 : && (!bzero_first
4010 1021480 : || group->val[try_pos - bytepos + nonmasked - 1] != 0))
4011 : break;
4012 1128689 : if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
4013 : {
4014 : /* If entire try_size range is padding, skip it. */
4015 588680 : try_pos += try_size / BITS_PER_UNIT;
4016 588680 : size -= try_size;
4017 588680 : continue;
4018 : }
4019 : /* Otherwise try to decrease try_size if second half, last 3 quarters
4020 : etc. are padding. */
4021 540009 : nonmasked *= BITS_PER_UNIT;
4022 549270 : while (nonmasked <= try_size / 2)
4023 : try_size /= 2;
4024 540009 : if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
4025 : {
4026 : /* Now look for whole padding bytes at the start of that bitsize. */
4027 305655 : unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
4028 315396 : for (masked = 0; masked < try_bytesize; ++masked)
4029 315396 : if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
4030 313057 : && (!bzero_first
4031 11277 : || group->val[try_pos - bytepos + masked] != 0))
4032 : break;
4033 305655 : masked *= BITS_PER_UNIT;
4034 305655 : gcc_assert (masked < try_size);
4035 305655 : if (masked >= try_size / 2)
4036 : {
4037 5224 : while (masked >= try_size / 2)
4038 : {
4039 2716 : try_size /= 2;
4040 2716 : try_pos += try_size / BITS_PER_UNIT;
4041 2716 : size -= try_size;
4042 2716 : masked -= try_size;
4043 : }
4044 : /* Need to recompute the alignment, so just retry at the new
4045 : position. */
4046 2508 : continue;
4047 : }
4048 : }
4049 :
4050 234354 : found:
4051 2912860 : ++ret;
4052 :
4053 2912860 : if (split_stores)
4054 : {
4055 906303 : split_store *store
4056 906303 : = new split_store (try_pos, try_size, align);
4057 906303 : info = find_constituent_stores (group, &store->orig_stores,
4058 : &first, try_bitpos, try_size);
4059 906303 : if (info
4060 809639 : && !gimple_clobber_p (info->stmt)
4061 809633 : && info->bitpos >= try_bitpos
4062 801112 : && info->bitpos + info->bitsize <= try_bitpos + try_size
4063 1705714 : && (store->orig_stores.length () == 1
4064 31186 : || found_orig
4065 6726 : || (info->bitpos == try_bitpos
4066 6628 : && (info->bitpos + info->bitsize
4067 : == try_bitpos + try_size))))
4068 : {
4069 792685 : store->orig = true;
4070 792685 : any_orig = true;
4071 : }
4072 906303 : split_stores->safe_push (store);
4073 : }
4074 :
4075 2912860 : try_pos += try_size / BITS_PER_UNIT;
4076 2912860 : size -= try_size;
4077 : }
4078 :
4079 997948 : if (total_orig)
4080 : {
4081 326116 : unsigned int i;
4082 326116 : split_store *store;
4083 : /* If we are reusing some original stores and any of the
4084 : original SSA_NAMEs had multiple uses, we need to subtract
4085 : those now before we add the new ones. */
4086 326116 : if (total_new[0] && any_orig)
4087 : {
4088 137629 : FOR_EACH_VEC_ELT (*split_stores, i, store)
4089 94777 : if (store->orig)
4090 93752 : total_new[0] -= count_multiple_uses (store->orig_stores[0]);
4091 : }
4092 326116 : total_new[0] += ret; /* The new store. */
4093 326116 : store_immediate_info *info = group->stores[0];
4094 326116 : if (info->ops[0].base_addr)
4095 73856 : total_new[0] += ret;
4096 326116 : if (info->ops[1].base_addr)
4097 1199 : total_new[0] += ret;
4098 326116 : switch (info->rhs_code)
4099 : {
4100 1300 : case BIT_AND_EXPR:
4101 1300 : case BIT_IOR_EXPR:
4102 1300 : case BIT_XOR_EXPR:
4103 1300 : total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
4104 1300 : break;
4105 : default:
4106 : break;
4107 : }
4108 1234245 : FOR_EACH_VEC_ELT (*split_stores, i, store)
4109 : {
4110 908129 : unsigned int j;
4111 908129 : bool bit_not_p[3] = { false, false, false };
4112 : /* If all orig_stores have certain bit_not_p set, then
4113 : we'd use a BIT_NOT_EXPR stmt and need to account for it.
4114 : If some orig_stores have certain bit_not_p set, then
4115 : we'd use a BIT_XOR_EXPR with a mask and need to account for
4116 : it. */
4117 1982466 : FOR_EACH_VEC_ELT (store->orig_stores, j, info)
4118 : {
4119 1074337 : if (info->ops[0].bit_not_p)
4120 331 : bit_not_p[0] = true;
4121 1074337 : if (info->ops[1].bit_not_p)
4122 17 : bit_not_p[1] = true;
4123 1074337 : if (info->bit_not_p)
4124 65 : bit_not_p[2] = true;
4125 : }
4126 908129 : total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
4127 : }
4128 :
4129 : }
4130 :
4131 : return ret;
4132 : }
4133 :
4134 : /* Return the operation through which the operand IDX (if < 2) or
4135 : result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
4136 : is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
4137 : the bits should be xored with mask. */
4138 :
4139 : static enum tree_code
4140 3460 : invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
4141 : {
4142 3460 : unsigned int i;
4143 3460 : store_immediate_info *info;
4144 3460 : unsigned int cnt = 0;
4145 3460 : bool any_paddings = false;
4146 13500 : FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4147 : {
4148 10040 : bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4149 10040 : if (bit_not_p)
4150 : {
4151 65 : ++cnt;
4152 65 : tree lhs = gimple_assign_lhs (info->stmt);
4153 130 : if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4154 130 : && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
4155 : any_paddings = true;
4156 : }
4157 : }
4158 3460 : mask = NULL_TREE;
4159 3460 : if (cnt == 0)
4160 : return NOP_EXPR;
4161 42 : if (cnt == split_store->orig_stores.length () && !any_paddings)
4162 : return BIT_NOT_EXPR;
4163 :
4164 14 : unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
4165 14 : unsigned buf_size = split_store->size / BITS_PER_UNIT;
4166 14 : unsigned char *buf
4167 14 : = XALLOCAVEC (unsigned char, buf_size);
4168 14 : memset (buf, ~0U, buf_size);
4169 70 : FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4170 : {
4171 56 : bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4172 56 : if (!bit_not_p)
4173 27 : continue;
4174 : /* Clear regions with bit_not_p and invert afterwards, rather than
4175 : clear regions with !bit_not_p, so that gaps in between stores aren't
4176 : set in the mask. */
4177 29 : unsigned HOST_WIDE_INT bitsize = info->bitsize;
4178 29 : unsigned HOST_WIDE_INT prec = bitsize;
4179 29 : unsigned int pos_in_buffer = 0;
4180 29 : if (any_paddings)
4181 : {
4182 12 : tree lhs = gimple_assign_lhs (info->stmt);
4183 24 : if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4184 24 : && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4185 12 : prec = TYPE_PRECISION (TREE_TYPE (lhs));
4186 : }
4187 29 : if (info->bitpos < try_bitpos)
4188 : {
4189 0 : gcc_assert (info->bitpos + bitsize > try_bitpos);
4190 0 : if (!BYTES_BIG_ENDIAN)
4191 : {
4192 0 : if (prec <= try_bitpos - info->bitpos)
4193 0 : continue;
4194 0 : prec -= try_bitpos - info->bitpos;
4195 : }
4196 0 : bitsize -= try_bitpos - info->bitpos;
4197 0 : if (BYTES_BIG_ENDIAN && prec > bitsize)
4198 : prec = bitsize;
4199 : }
4200 : else
4201 29 : pos_in_buffer = info->bitpos - try_bitpos;
4202 29 : if (prec < bitsize)
4203 : {
4204 : /* If this is a bool inversion, invert just the least significant
4205 : prec bits rather than all bits of it. */
4206 : if (BYTES_BIG_ENDIAN)
4207 : {
4208 : pos_in_buffer += bitsize - prec;
4209 : if (pos_in_buffer >= split_store->size)
4210 : continue;
4211 : }
4212 : bitsize = prec;
4213 : }
4214 29 : if (pos_in_buffer + bitsize > split_store->size)
4215 0 : bitsize = split_store->size - pos_in_buffer;
4216 29 : unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4217 29 : if (BYTES_BIG_ENDIAN)
4218 : clear_bit_region_be (p, (BITS_PER_UNIT - 1
4219 : - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4220 : else
4221 29 : clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4222 : }
4223 74 : for (unsigned int i = 0; i < buf_size; ++i)
4224 60 : buf[i] = ~buf[i];
4225 14 : mask = native_interpret_expr (int_type, buf, buf_size);
4226 14 : return BIT_XOR_EXPR;
4227 : }
4228 :
4229 : /* Given a merged store group GROUP output the widened version of it.
4230 : The store chain is against the base object BASE.
4231 : Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4232 : unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4233 : Make sure that the number of statements output is less than the number of
4234 : original statements. If a better sequence is possible emit it and
4235 : return true. */
4236 :
4237 : bool
4238 397286 : imm_store_chain_info::output_merged_store (merged_store_group *group)
4239 : {
4240 397286 : const unsigned HOST_WIDE_INT start_byte_pos
4241 397286 : = group->bitregion_start / BITS_PER_UNIT;
4242 467586 : unsigned int orig_num_stmts = group->stores.length ();
4243 397286 : if (orig_num_stmts < 2)
4244 : return false;
4245 :
4246 397286 : bool allow_unaligned_store
4247 397286 : = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4248 397286 : bool allow_unaligned_load = allow_unaligned_store;
4249 397286 : bool bzero_first = false;
4250 397286 : store_immediate_info *store;
4251 397286 : unsigned int num_clobber_stmts = 0;
4252 397286 : if (group->stores[0]->rhs_code == INTEGER_CST)
4253 : {
4254 : unsigned int i;
4255 479672 : FOR_EACH_VEC_ELT (group->stores, i, store)
4256 409372 : if (gimple_clobber_p (store->stmt))
4257 159720 : num_clobber_stmts++;
4258 249652 : else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4259 14419 : && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4260 14419 : && group->start == store->bitpos
4261 13973 : && group->width == store->bitsize
4262 9800 : && (group->start % BITS_PER_UNIT) == 0
4263 259452 : && (group->width % BITS_PER_UNIT) == 0)
4264 : {
4265 : bzero_first = true;
4266 : break;
4267 : }
4268 : else
4269 : break;
4270 1157973 : FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4271 838021 : if (gimple_clobber_p (store->stmt))
4272 1852 : num_clobber_stmts++;
4273 319952 : if (num_clobber_stmts == orig_num_stmts)
4274 : return false;
4275 249652 : orig_num_stmts -= num_clobber_stmts;
4276 : }
4277 326986 : if (allow_unaligned_store || bzero_first)
4278 : {
4279 : /* If unaligned stores are allowed, see how many stores we'd emit
4280 : for unaligned and how many stores we'd emit for aligned stores.
4281 : Only use unaligned stores if it allows fewer stores than aligned.
4282 : Similarly, if there is a whole region clear first, prefer expanding
4283 : it together compared to expanding clear first followed by merged
4284 : further stores. */
4285 326986 : unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4286 326986 : int pass_min = 0;
4287 1634930 : for (int pass = 0; pass < 4; ++pass)
4288 : {
4289 1307944 : if (!allow_unaligned_store && (pass & 1) != 0)
4290 0 : continue;
4291 1307944 : if (!bzero_first && (pass & 2) != 0)
4292 634372 : continue;
4293 1347144 : cnt[pass] = split_group (group, (pass & 1) != 0,
4294 673572 : allow_unaligned_load, (pass & 2) != 0,
4295 : NULL, NULL, NULL);
4296 673572 : if (cnt[pass] < cnt[pass_min])
4297 1307944 : pass_min = pass;
4298 : }
4299 326986 : if ((pass_min & 1) == 0)
4300 320835 : allow_unaligned_store = false;
4301 326986 : if ((pass_min & 2) == 0)
4302 325160 : bzero_first = false;
4303 : }
4304 :
4305 326986 : auto_vec<class split_store *, 32> split_stores;
4306 326986 : split_store *split_store;
4307 326986 : unsigned total_orig, total_new, i;
4308 326986 : split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4309 : &split_stores, &total_orig, &total_new);
4310 :
4311 : /* Determine if there is a clobber covering the whole group at the start,
4312 : followed by proposed split stores that cover the whole group. In that
4313 : case, prefer the transformation even if
4314 : split_stores.length () == orig_num_stmts. */
4315 326986 : bool clobber_first = false;
4316 326986 : if (num_clobber_stmts
4317 19299 : && gimple_clobber_p (group->stores[0]->stmt)
4318 18822 : && group->start == group->stores[0]->bitpos
4319 18822 : && group->width == group->stores[0]->bitsize
4320 18629 : && (group->start % BITS_PER_UNIT) == 0
4321 345615 : && (group->width % BITS_PER_UNIT) == 0)
4322 : {
4323 18629 : clobber_first = true;
4324 18629 : unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4325 46659 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4326 33168 : if (split_store->bytepos != pos)
4327 : {
4328 : clobber_first = false;
4329 : break;
4330 : }
4331 : else
4332 28030 : pos += split_store->size / BITS_PER_UNIT;
4333 18629 : if (pos != (group->start + group->width) / BITS_PER_UNIT)
4334 321954 : clobber_first = false;
4335 : }
4336 :
4337 653972 : if (split_stores.length () >= orig_num_stmts + clobber_first)
4338 : {
4339 :
4340 : /* We didn't manage to reduce the number of statements. Bail out. */
4341 241260 : if (dump_file && (dump_flags & TDF_DETAILS))
4342 2 : fprintf (dump_file, "Exceeded original number of stmts (%u)."
4343 : " Not profitable to emit new sequence.\n",
4344 : orig_num_stmts);
4345 842839 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4346 1203158 : delete split_store;
4347 : return false;
4348 : }
4349 85726 : if (total_orig <= total_new)
4350 : {
4351 : /* If number of estimated new statements is above estimated original
4352 : statements, bail out too. */
4353 1496 : if (dump_file && (dump_flags & TDF_DETAILS))
4354 0 : fprintf (dump_file, "Estimated number of original stmts (%u)"
4355 : " not larger than estimated number of new"
4356 : " stmts (%u).\n",
4357 : total_orig, total_new);
4358 3219 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4359 3446 : delete split_store;
4360 : return false;
4361 : }
4362 84230 : if (group->stores[0]->rhs_code == INTEGER_CST)
4363 : {
4364 166216 : bool all_orig = true;
4365 166216 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4366 162402 : if (!split_store->orig)
4367 : {
4368 : all_orig = false;
4369 : break;
4370 : }
4371 79214 : if (all_orig)
4372 : {
4373 : unsigned int cnt = split_stores.length ();
4374 : store_immediate_info *store;
4375 15605 : FOR_EACH_VEC_ELT (group->stores, i, store)
4376 11791 : if (gimple_clobber_p (store->stmt))
4377 3821 : ++cnt;
4378 : /* Punt if we wouldn't make any real changes, i.e. keep all
4379 : orig stmts + all clobbers. */
4380 3814 : if (cnt == group->stores.length ())
4381 : {
4382 3794 : if (dump_file && (dump_flags & TDF_DETAILS))
4383 0 : fprintf (dump_file, "Exceeded original number of stmts (%u)."
4384 : " Not profitable to emit new sequence.\n",
4385 : orig_num_stmts);
4386 11703 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4387 15818 : delete split_store;
4388 246550 : return false;
4389 : }
4390 : }
4391 : }
4392 :
4393 80436 : gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4394 80436 : gimple_seq seq = NULL;
4395 80436 : tree last_vdef, new_vuse;
4396 80436 : last_vdef = gimple_vdef (group->last_stmt);
4397 80436 : new_vuse = gimple_vuse (group->last_stmt);
4398 80436 : tree bswap_res = NULL_TREE;
4399 :
4400 : /* Clobbers are not removed. */
4401 80436 : if (gimple_clobber_p (group->last_stmt))
4402 : {
4403 6 : new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4404 6 : gimple_set_vdef (group->last_stmt, new_vuse);
4405 : }
4406 :
4407 80436 : if (group->stores[0]->rhs_code == LROTATE_EXPR
4408 80436 : || group->stores[0]->rhs_code == NOP_EXPR)
4409 : {
4410 834 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4411 834 : gimple *ins_stmt = group->stores[0]->ins_stmt;
4412 834 : struct symbolic_number *n = &group->stores[0]->n;
4413 834 : bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4414 :
4415 834 : switch (n->range)
4416 : {
4417 420 : case 16:
4418 420 : load_type = bswap_type = uint16_type_node;
4419 420 : break;
4420 219 : case 32:
4421 219 : load_type = uint32_type_node;
4422 219 : if (bswap)
4423 : {
4424 109 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4425 109 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4426 : }
4427 : break;
4428 195 : case 64:
4429 195 : load_type = uint64_type_node;
4430 195 : if (bswap)
4431 : {
4432 72 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4433 72 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4434 : }
4435 : break;
4436 0 : default:
4437 0 : gcc_unreachable ();
4438 : }
4439 :
4440 : /* If the loads have each vuse of the corresponding store,
4441 : we've checked the aliasing already in try_coalesce_bswap and
4442 : we want to sink the need load into seq. So need to use new_vuse
4443 : on the load. */
4444 834 : if (n->base_addr)
4445 : {
4446 421 : if (n->vuse == NULL)
4447 : {
4448 73 : n->vuse = new_vuse;
4449 73 : ins_stmt = NULL;
4450 : }
4451 : else
4452 : /* Update vuse in case it has changed by output_merged_stores. */
4453 696 : n->vuse = gimple_vuse (ins_stmt);
4454 : }
4455 834 : bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4456 : bswap_type, load_type, n, bswap,
4457 : ~(uint64_t) 0, 0);
4458 834 : gcc_assert (bswap_res);
4459 : }
4460 :
4461 80436 : gimple *stmt = NULL;
4462 160872 : auto_vec<gimple *, 32> orig_stmts;
4463 80436 : gimple_seq this_seq;
4464 80436 : tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4465 : is_gimple_mem_ref_addr, NULL_TREE);
4466 80436 : gimple_seq_add_seq_without_update (&seq, this_seq);
4467 :
4468 80436 : tree load_addr[2] = { NULL_TREE, NULL_TREE };
4469 80436 : gimple_seq load_seq[2] = { NULL, NULL };
4470 80436 : gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4471 241308 : for (int j = 0; j < 2; ++j)
4472 : {
4473 160872 : store_operand_info &op = group->stores[0]->ops[j];
4474 160872 : if (op.base_addr == NULL_TREE)
4475 158661 : continue;
4476 :
4477 2211 : store_immediate_info *infol = group->stores.last ();
4478 6633 : if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4479 : {
4480 : /* We can't pick the location randomly; while we've verified
4481 : all the loads have the same vuse, they can be still in different
4482 : basic blocks and we need to pick the one from the last bb:
4483 : int x = q[0];
4484 : if (x == N) return;
4485 : int y = q[1];
4486 : p[0] = x;
4487 : p[1] = y;
4488 : otherwise if we put the wider load at the q[0] load, we might
4489 : segfault if q[1] is not mapped. */
4490 1300 : basic_block bb = gimple_bb (op.stmt);
4491 1300 : gimple *ostmt = op.stmt;
4492 1300 : store_immediate_info *info;
4493 9691 : FOR_EACH_VEC_ELT (group->stores, i, info)
4494 : {
4495 8391 : gimple *tstmt = info->ops[j].stmt;
4496 8391 : basic_block tbb = gimple_bb (tstmt);
4497 8391 : if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4498 : {
4499 8371 : ostmt = tstmt;
4500 8371 : bb = tbb;
4501 : }
4502 : }
4503 1300 : load_gsi[j] = gsi_for_stmt (ostmt);
4504 1300 : load_addr[j]
4505 1300 : = force_gimple_operand_1 (unshare_expr (op.base_addr),
4506 : &load_seq[j], is_gimple_mem_ref_addr,
4507 : NULL_TREE);
4508 : }
4509 911 : else if (operand_equal_p (base_addr, op.base_addr, 0))
4510 38 : load_addr[j] = addr;
4511 : else
4512 : {
4513 873 : load_addr[j]
4514 873 : = force_gimple_operand_1 (unshare_expr (op.base_addr),
4515 : &this_seq, is_gimple_mem_ref_addr,
4516 : NULL_TREE);
4517 873 : gimple_seq_add_seq_without_update (&seq, this_seq);
4518 : }
4519 : }
4520 :
4521 378224 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4522 : {
4523 297788 : const unsigned HOST_WIDE_INT try_size = split_store->size;
4524 297788 : const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4525 297788 : const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4526 297788 : const unsigned HOST_WIDE_INT try_align = split_store->align;
4527 297788 : const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4528 297788 : tree dest, src;
4529 297788 : location_t loc;
4530 :
4531 297788 : if (split_store->orig)
4532 : {
4533 : /* If there is just a single non-clobber constituent store
4534 : which covers the whole area, just reuse the lhs and rhs. */
4535 213130 : gimple *orig_stmt = NULL;
4536 : store_immediate_info *store;
4537 : unsigned int j;
4538 213130 : FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4539 213130 : if (!gimple_clobber_p (store->stmt))
4540 : {
4541 : orig_stmt = store->stmt;
4542 : break;
4543 : }
4544 207183 : dest = gimple_assign_lhs (orig_stmt);
4545 207183 : src = gimple_assign_rhs1 (orig_stmt);
4546 207183 : loc = gimple_location (orig_stmt);
4547 : }
4548 : else
4549 : {
4550 : store_immediate_info *info;
4551 : unsigned short clique, base;
4552 : unsigned int k;
4553 309534 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4554 218929 : orig_stmts.safe_push (info->stmt);
4555 90605 : tree offset_type
4556 90605 : = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4557 90605 : tree dest_type;
4558 90605 : loc = get_location_for_stmts (orig_stmts);
4559 90605 : orig_stmts.truncate (0);
4560 :
4561 90605 : if (group->string_concatenation)
4562 36 : dest_type
4563 36 : = build_array_type_nelts (char_type_node,
4564 36 : try_size / BITS_PER_UNIT);
4565 : else
4566 : {
4567 90569 : dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4568 90569 : dest_type = build_aligned_type (dest_type, try_align);
4569 : }
4570 90605 : dest = fold_build2 (MEM_REF, dest_type, addr,
4571 : build_int_cst (offset_type, try_pos));
4572 90605 : if (TREE_CODE (dest) == MEM_REF)
4573 : {
4574 90605 : MR_DEPENDENCE_CLIQUE (dest) = clique;
4575 90605 : MR_DEPENDENCE_BASE (dest) = base;
4576 : }
4577 :
4578 90605 : tree mask;
4579 90605 : if (bswap_res || group->string_concatenation)
4580 870 : mask = integer_zero_node;
4581 : else
4582 89735 : mask = native_interpret_expr (dest_type,
4583 89735 : group->mask + try_offset,
4584 89735 : group->buf_size);
4585 :
4586 90605 : tree ops[2];
4587 181247 : for (int j = 0;
4588 362383 : j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4589 : ++j)
4590 : {
4591 90642 : store_operand_info &op = split_store->orig_stores[0]->ops[j];
4592 90642 : if (bswap_res)
4593 834 : ops[j] = bswap_res;
4594 89808 : else if (group->string_concatenation)
4595 : {
4596 72 : ops[j] = build_string (try_size / BITS_PER_UNIT,
4597 36 : (const char *) group->val + try_offset);
4598 36 : TREE_TYPE (ops[j]) = dest_type;
4599 : }
4600 89772 : else if (op.base_addr)
4601 : {
4602 9437 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4603 6848 : orig_stmts.safe_push (info->ops[j].stmt);
4604 :
4605 2589 : offset_type = get_alias_type_for_stmts (orig_stmts, true,
4606 : &clique, &base);
4607 2589 : location_t load_loc = get_location_for_stmts (orig_stmts);
4608 2589 : orig_stmts.truncate (0);
4609 :
4610 2589 : unsigned HOST_WIDE_INT load_align = group->load_align[j];
4611 2589 : unsigned HOST_WIDE_INT align_bitpos
4612 2589 : = known_alignment (try_bitpos
4613 2589 : - split_store->orig_stores[0]->bitpos
4614 2589 : + op.bitpos);
4615 2589 : if (align_bitpos & (load_align - 1))
4616 664 : load_align = least_bit_hwi (align_bitpos);
4617 :
4618 2589 : tree load_int_type
4619 2589 : = build_nonstandard_integer_type (try_size, UNSIGNED);
4620 2589 : load_int_type
4621 2589 : = build_aligned_type (load_int_type, load_align);
4622 :
4623 2589 : poly_uint64 load_pos
4624 2589 : = exact_div (try_bitpos
4625 2589 : - split_store->orig_stores[0]->bitpos
4626 2589 : + op.bitpos,
4627 : BITS_PER_UNIT);
4628 2589 : ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4629 : build_int_cst (offset_type, load_pos));
4630 2589 : if (TREE_CODE (ops[j]) == MEM_REF)
4631 : {
4632 2589 : MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4633 2589 : MR_DEPENDENCE_BASE (ops[j]) = base;
4634 : }
4635 2589 : if (!integer_zerop (mask))
4636 : {
4637 : /* The load might load some bits (that will be masked
4638 : off later on) uninitialized, avoid -W*uninitialized
4639 : warnings in that case. */
4640 105 : suppress_warning (ops[j], OPT_Wuninitialized);
4641 : }
4642 :
4643 2589 : stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4644 2589 : gimple_set_location (stmt, load_loc);
4645 2589 : if (gsi_bb (load_gsi[j]))
4646 : {
4647 3108 : gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4648 1554 : gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4649 : }
4650 : else
4651 : {
4652 1035 : gimple_set_vuse (stmt, new_vuse);
4653 1035 : gimple_seq_add_stmt_without_update (&seq, stmt);
4654 : }
4655 2589 : ops[j] = gimple_assign_lhs (stmt);
4656 2589 : tree xor_mask;
4657 2589 : enum tree_code inv_op
4658 2589 : = invert_op (split_store, j, dest_type, xor_mask);
4659 2589 : if (inv_op != NOP_EXPR)
4660 : {
4661 17 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4662 : inv_op, ops[j], xor_mask);
4663 17 : gimple_set_location (stmt, load_loc);
4664 17 : ops[j] = gimple_assign_lhs (stmt);
4665 :
4666 17 : if (gsi_bb (load_gsi[j]))
4667 7 : gimple_seq_add_stmt_without_update (&load_seq[j],
4668 : stmt);
4669 : else
4670 10 : gimple_seq_add_stmt_without_update (&seq, stmt);
4671 : }
4672 : }
4673 : else
4674 87183 : ops[j] = native_interpret_expr (dest_type,
4675 87183 : group->val + try_offset,
4676 87183 : group->buf_size);
4677 : }
4678 :
4679 90605 : switch (split_store->orig_stores[0]->rhs_code)
4680 : {
4681 : case BIT_AND_EXPR:
4682 : case BIT_IOR_EXPR:
4683 : case BIT_XOR_EXPR:
4684 185 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4685 : {
4686 148 : tree rhs1 = gimple_assign_rhs1 (info->stmt);
4687 148 : orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4688 : }
4689 37 : location_t bit_loc;
4690 37 : bit_loc = get_location_for_stmts (orig_stmts);
4691 37 : orig_stmts.truncate (0);
4692 :
4693 37 : stmt
4694 37 : = gimple_build_assign (make_ssa_name (dest_type),
4695 37 : split_store->orig_stores[0]->rhs_code,
4696 : ops[0], ops[1]);
4697 37 : gimple_set_location (stmt, bit_loc);
4698 : /* If there is just one load and there is a separate
4699 : load_seq[0], emit the bitwise op right after it. */
4700 37 : if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4701 0 : gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4702 : /* Otherwise, if at least one load is in seq, we need to
4703 : emit the bitwise op right before the store. If there
4704 : are two loads and are emitted somewhere else, it would
4705 : be better to emit the bitwise op as early as possible;
4706 : we don't track where that would be possible right now
4707 : though. */
4708 : else
4709 37 : gimple_seq_add_stmt_without_update (&seq, stmt);
4710 37 : src = gimple_assign_lhs (stmt);
4711 37 : tree xor_mask;
4712 37 : enum tree_code inv_op;
4713 37 : inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4714 37 : if (inv_op != NOP_EXPR)
4715 : {
4716 2 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4717 : inv_op, src, xor_mask);
4718 2 : gimple_set_location (stmt, bit_loc);
4719 2 : if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4720 0 : gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4721 : else
4722 2 : gimple_seq_add_stmt_without_update (&seq, stmt);
4723 2 : src = gimple_assign_lhs (stmt);
4724 : }
4725 : break;
4726 834 : case LROTATE_EXPR:
4727 834 : case NOP_EXPR:
4728 834 : src = ops[0];
4729 834 : if (!is_gimple_val (src))
4730 : {
4731 0 : stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4732 : src);
4733 0 : gimple_seq_add_stmt_without_update (&seq, stmt);
4734 0 : src = gimple_assign_lhs (stmt);
4735 : }
4736 834 : if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4737 : {
4738 86 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4739 : NOP_EXPR, src);
4740 86 : gimple_seq_add_stmt_without_update (&seq, stmt);
4741 86 : src = gimple_assign_lhs (stmt);
4742 : }
4743 834 : inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4744 834 : if (inv_op != NOP_EXPR)
4745 : {
4746 2 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4747 : inv_op, src, xor_mask);
4748 2 : gimple_set_location (stmt, loc);
4749 2 : gimple_seq_add_stmt_without_update (&seq, stmt);
4750 2 : src = gimple_assign_lhs (stmt);
4751 : }
4752 : break;
4753 89734 : default:
4754 89734 : src = ops[0];
4755 89734 : break;
4756 : }
4757 :
4758 : /* If bit insertion is required, we use the source as an accumulator
4759 : into which the successive bit-field values are manually inserted.
4760 : FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4761 90605 : if (group->bit_insertion)
4762 17231 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4763 13466 : if (info->rhs_code == BIT_INSERT_EXPR
4764 7400 : && info->bitpos < try_bitpos + try_size
4765 7400 : && info->bitpos + info->bitsize > try_bitpos)
4766 : {
4767 : /* Mask, truncate, convert to final type, shift and ior into
4768 : the accumulator. Note that every step can be a no-op. */
4769 7400 : const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4770 7400 : const HOST_WIDE_INT end_gap
4771 7400 : = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4772 7400 : tree tem = info->ops[0].val;
4773 7400 : if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4774 : {
4775 0 : const unsigned HOST_WIDE_INT size
4776 0 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4777 0 : tree integer_type
4778 0 : = build_nonstandard_integer_type (size, UNSIGNED);
4779 0 : tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4780 : integer_type, tem);
4781 : }
4782 7400 : if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4783 : {
4784 4419 : tree bitfield_type
4785 4419 : = build_nonstandard_integer_type (info->bitsize,
4786 : UNSIGNED);
4787 4419 : tem = gimple_convert (&seq, loc, bitfield_type, tem);
4788 : }
4789 2981 : else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4790 : {
4791 1931 : wide_int imask
4792 : = wi::mask (info->bitsize, false,
4793 1931 : TYPE_PRECISION (TREE_TYPE (tem)));
4794 5793 : tem = gimple_build (&seq, loc,
4795 1931 : BIT_AND_EXPR, TREE_TYPE (tem), tem,
4796 1931 : wide_int_to_tree (TREE_TYPE (tem),
4797 3862 : imask));
4798 1931 : }
4799 7400 : const HOST_WIDE_INT shift
4800 : = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4801 7400 : if (shift < 0)
4802 4 : tem = gimple_build (&seq, loc,
4803 4 : RSHIFT_EXPR, TREE_TYPE (tem), tem,
4804 4 : build_int_cst (NULL_TREE, -shift));
4805 7400 : tem = gimple_convert (&seq, loc, dest_type, tem);
4806 7400 : if (shift > 0)
4807 5785 : tem = gimple_build (&seq, loc,
4808 : LSHIFT_EXPR, dest_type, tem,
4809 5785 : build_int_cst (NULL_TREE, shift));
4810 7400 : src = gimple_build (&seq, loc,
4811 : BIT_IOR_EXPR, dest_type, tem, src);
4812 : }
4813 :
4814 90605 : if (!integer_zerop (mask))
4815 : {
4816 2500 : tree tem = make_ssa_name (dest_type);
4817 2500 : tree load_src = unshare_expr (dest);
4818 : /* The load might load some or all bits uninitialized,
4819 : avoid -W*uninitialized warnings in that case.
4820 : As optimization, it would be nice if all the bits are
4821 : provably uninitialized (no stores at all yet or previous
4822 : store a CLOBBER) we'd optimize away the load and replace
4823 : it e.g. with 0. */
4824 2500 : suppress_warning (load_src, OPT_Wuninitialized);
4825 2500 : stmt = gimple_build_assign (tem, load_src);
4826 2500 : gimple_set_location (stmt, loc);
4827 2500 : gimple_set_vuse (stmt, new_vuse);
4828 2500 : gimple_seq_add_stmt_without_update (&seq, stmt);
4829 :
4830 : /* FIXME: If there is a single chunk of zero bits in mask,
4831 : perhaps use BIT_INSERT_EXPR instead? */
4832 2500 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4833 : BIT_AND_EXPR, tem, mask);
4834 2500 : gimple_set_location (stmt, loc);
4835 2500 : gimple_seq_add_stmt_without_update (&seq, stmt);
4836 2500 : tem = gimple_assign_lhs (stmt);
4837 :
4838 2500 : if (TREE_CODE (src) == INTEGER_CST)
4839 945 : src = wide_int_to_tree (dest_type,
4840 945 : wi::bit_and_not (wi::to_wide (src),
4841 1890 : wi::to_wide (mask)));
4842 : else
4843 : {
4844 1555 : tree nmask
4845 1555 : = wide_int_to_tree (dest_type,
4846 1555 : wi::bit_not (wi::to_wide (mask)));
4847 1555 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4848 : BIT_AND_EXPR, src, nmask);
4849 1555 : gimple_set_location (stmt, loc);
4850 1555 : gimple_seq_add_stmt_without_update (&seq, stmt);
4851 1555 : src = gimple_assign_lhs (stmt);
4852 : }
4853 2500 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4854 : BIT_IOR_EXPR, tem, src);
4855 2500 : gimple_set_location (stmt, loc);
4856 2500 : gimple_seq_add_stmt_without_update (&seq, stmt);
4857 2500 : src = gimple_assign_lhs (stmt);
4858 : }
4859 : }
4860 :
4861 297788 : stmt = gimple_build_assign (dest, src);
4862 297788 : gimple_set_location (stmt, loc);
4863 297788 : gimple_set_vuse (stmt, new_vuse);
4864 297788 : gimple_seq_add_stmt_without_update (&seq, stmt);
4865 :
4866 297788 : if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4867 53 : add_stmt_to_eh_lp (stmt, group->lp_nr);
4868 :
4869 297788 : tree new_vdef;
4870 595576 : if (i < split_stores.length () - 1)
4871 217352 : new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4872 : else
4873 : new_vdef = last_vdef;
4874 :
4875 297788 : gimple_set_vdef (stmt, new_vdef);
4876 297788 : SSA_NAME_DEF_STMT (new_vdef) = stmt;
4877 297788 : new_vuse = new_vdef;
4878 : }
4879 :
4880 378224 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4881 595576 : delete split_store;
4882 :
4883 80436 : gcc_assert (seq);
4884 80436 : if (dump_file)
4885 : {
4886 184 : fprintf (dump_file,
4887 : "New sequence of %u stores to replace old one of %u stores\n",
4888 : split_stores.length (), orig_num_stmts);
4889 92 : if (dump_flags & TDF_DETAILS)
4890 23 : print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4891 : }
4892 :
4893 80436 : if (gimple_clobber_p (group->last_stmt))
4894 6 : update_stmt (group->last_stmt);
4895 :
4896 80436 : if (group->lp_nr > 0)
4897 : {
4898 : /* We're going to insert a sequence of (potentially) throwing stores
4899 : into an active EH region. This means that we're going to create
4900 : new basic blocks with EH edges pointing to the post landing pad
4901 : and, therefore, to have to update its PHI nodes, if any. For the
4902 : virtual PHI node, we're going to use the VDEFs created above, but
4903 : for the other nodes, we need to record the original reaching defs. */
4904 46 : eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4905 46 : basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4906 46 : basic_block last_bb = gimple_bb (group->last_stmt);
4907 46 : edge last_edge = find_edge (last_bb, lp_bb);
4908 46 : auto_vec<tree, 16> last_defs;
4909 46 : gphi_iterator gpi;
4910 98 : for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4911 : {
4912 52 : gphi *phi = gpi.phi ();
4913 52 : tree last_def;
4914 104 : if (virtual_operand_p (gimple_phi_result (phi)))
4915 46 : last_def = NULL_TREE;
4916 : else
4917 6 : last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4918 52 : last_defs.safe_push (last_def);
4919 : }
4920 :
4921 : /* Do the insertion. Then, if new basic blocks have been created in the
4922 : process, rewind the chain of VDEFs create above to walk the new basic
4923 : blocks and update the corresponding arguments of the PHI nodes. */
4924 46 : update_modified_stmts (seq);
4925 46 : if (gimple_find_sub_bbs (seq, &last_gsi))
4926 99 : while (last_vdef != gimple_vuse (group->last_stmt))
4927 : {
4928 53 : gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4929 53 : if (stmt_could_throw_p (cfun, stmt))
4930 : {
4931 53 : edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4932 53 : unsigned int i;
4933 53 : for (gpi = gsi_start_phis (lp_bb), i = 0;
4934 112 : !gsi_end_p (gpi);
4935 59 : gsi_next (&gpi), i++)
4936 : {
4937 59 : gphi *phi = gpi.phi ();
4938 59 : tree new_def;
4939 118 : if (virtual_operand_p (gimple_phi_result (phi)))
4940 : new_def = last_vdef;
4941 : else
4942 6 : new_def = last_defs[i];
4943 59 : add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4944 : }
4945 : }
4946 152 : last_vdef = gimple_vuse (stmt);
4947 : }
4948 46 : }
4949 : else
4950 80390 : gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4951 :
4952 241308 : for (int j = 0; j < 2; ++j)
4953 160872 : if (load_seq[j])
4954 1300 : gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4955 :
4956 80436 : return true;
4957 326986 : }
4958 :
4959 : /* Process the merged_store_group objects created in the coalescing phase.
4960 : The stores are all against the base object BASE.
4961 : Try to output the widened stores and delete the original statements if
4962 : successful. Return true iff any changes were made. */
4963 :
4964 : bool
4965 365144 : imm_store_chain_info::output_merged_stores ()
4966 : {
4967 365144 : unsigned int i;
4968 365144 : merged_store_group *merged_store;
4969 365144 : bool ret = false;
4970 762430 : FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4971 : {
4972 397286 : if (dbg_cnt (store_merging)
4973 397286 : && output_merged_store (merged_store))
4974 : {
4975 : unsigned int j;
4976 : store_immediate_info *store;
4977 897617 : FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4978 : {
4979 419895 : gimple *stmt = store->stmt;
4980 419895 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4981 : /* Don't remove clobbers, they are still useful even if
4982 : everything is overwritten afterwards. */
4983 419895 : if (gimple_clobber_p (stmt))
4984 2768 : continue;
4985 417127 : gsi_remove (&gsi, true);
4986 417127 : if (store->lp_nr)
4987 165 : remove_stmt_from_eh_lp (stmt);
4988 417127 : if (stmt != merged_store->last_stmt)
4989 : {
4990 336697 : unlink_stmt_vdef (stmt);
4991 336697 : release_defs (stmt);
4992 : }
4993 : }
4994 : ret = true;
4995 : }
4996 : }
4997 365144 : if (ret && dump_file)
4998 92 : fprintf (dump_file, "Merging successful!\n");
4999 :
5000 365144 : return ret;
5001 : }
5002 :
5003 : /* Coalesce the store_immediate_info objects recorded against the base object
5004 : BASE in the first phase and output them.
5005 : Delete the allocated structures.
5006 : Return true if any changes were made. */
5007 :
5008 : bool
5009 1907401 : imm_store_chain_info::terminate_and_process_chain ()
5010 : {
5011 1907401 : if (dump_file && (dump_flags & TDF_DETAILS))
5012 98 : fprintf (dump_file, "Terminating chain with %u stores\n",
5013 : m_store_info.length ());
5014 : /* Process store chain. */
5015 1907401 : bool ret = false;
5016 1907401 : if (m_store_info.length () > 1)
5017 : {
5018 491934 : ret = coalesce_immediate_stores ();
5019 491934 : if (ret)
5020 365144 : ret = output_merged_stores ();
5021 : }
5022 :
5023 : /* Delete all the entries we allocated ourselves. */
5024 1907401 : store_immediate_info *info;
5025 1907401 : unsigned int i;
5026 4945428 : FOR_EACH_VEC_ELT (m_store_info, i, info)
5027 3038027 : delete info;
5028 :
5029 : merged_store_group *merged_info;
5030 2304687 : FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
5031 397286 : delete merged_info;
5032 :
5033 1907401 : return ret;
5034 : }
5035 :
5036 : /* Return true iff LHS is a destination potentially interesting for
5037 : store merging. In practice these are the codes that get_inner_reference
5038 : can process. */
5039 :
5040 : static bool
5041 9362223 : lhs_valid_for_store_merging_p (tree lhs)
5042 : {
5043 9362223 : if (DECL_P (lhs))
5044 : return true;
5045 :
5046 7189395 : switch (TREE_CODE (lhs))
5047 : {
5048 : case ARRAY_REF:
5049 : case ARRAY_RANGE_REF:
5050 : case BIT_FIELD_REF:
5051 : case COMPONENT_REF:
5052 : case MEM_REF:
5053 : case VIEW_CONVERT_EXPR:
5054 : return true;
5055 : default:
5056 : return false;
5057 : }
5058 : }
5059 :
5060 : /* Return true if the tree RHS is a constant we want to consider
5061 : during store merging. In practice accept all codes that
5062 : native_encode_expr accepts. */
5063 :
5064 : static bool
5065 5038743 : rhs_valid_for_store_merging_p (tree rhs)
5066 : {
5067 5038743 : unsigned HOST_WIDE_INT size;
5068 5038743 : if (TREE_CODE (rhs) == CONSTRUCTOR
5069 935919 : && CONSTRUCTOR_NELTS (rhs) == 0
5070 935919 : && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
5071 5974662 : && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
5072 : return true;
5073 8205648 : return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
5074 4102824 : && native_encode_expr (rhs, NULL, size) != 0);
5075 : }
5076 :
5077 : /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
5078 : and return true on success or false on failure. */
5079 :
5080 : static bool
5081 1065595 : adjust_bit_pos (poly_offset_int byte_off,
5082 : poly_int64 *pbitpos,
5083 : poly_uint64 *pbitregion_start,
5084 : poly_uint64 *pbitregion_end)
5085 : {
5086 1065595 : poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
5087 1065595 : bit_off += *pbitpos;
5088 :
5089 1065595 : if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
5090 : {
5091 1054736 : if (maybe_ne (*pbitregion_end, 0U))
5092 : {
5093 6824 : bit_off = byte_off << LOG2_BITS_PER_UNIT;
5094 6824 : bit_off += *pbitregion_start;
5095 6824 : if (bit_off.to_uhwi (pbitregion_start))
5096 : {
5097 6824 : bit_off = byte_off << LOG2_BITS_PER_UNIT;
5098 6824 : bit_off += *pbitregion_end;
5099 6824 : if (!bit_off.to_uhwi (pbitregion_end))
5100 0 : *pbitregion_end = 0;
5101 : }
5102 : else
5103 0 : *pbitregion_end = 0;
5104 : }
5105 1054736 : return true;
5106 : }
5107 : else
5108 10859 : return false;
5109 : }
5110 :
5111 : /* If MEM is a memory reference usable for store merging (either as
5112 : store destination or for loads), return the non-NULL base_addr
5113 : and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
5114 : Otherwise return NULL, *PBITPOS should be still valid even for that
5115 : case. */
5116 :
5117 : static tree
5118 5597512 : mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
5119 : poly_uint64 *pbitpos,
5120 : poly_uint64 *pbitregion_start,
5121 : poly_uint64 *pbitregion_end)
5122 : {
5123 5597512 : poly_int64 bitsize, bitpos;
5124 5597512 : poly_uint64 bitregion_start = 0, bitregion_end = 0;
5125 5597512 : machine_mode mode;
5126 5597512 : int unsignedp = 0, reversep = 0, volatilep = 0;
5127 5597512 : tree offset;
5128 5597512 : tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
5129 : &unsignedp, &reversep, &volatilep);
5130 5597512 : *pbitsize = bitsize;
5131 5597512 : if (known_le (bitsize, 0))
5132 : return NULL_TREE;
5133 :
5134 5596312 : if (TREE_CODE (mem) == COMPONENT_REF
5135 5596312 : && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
5136 : {
5137 45057 : get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
5138 45057 : if (maybe_ne (bitregion_end, 0U))
5139 45057 : bitregion_end += 1;
5140 : }
5141 :
5142 5596312 : if (reversep)
5143 : return NULL_TREE;
5144 :
5145 : /* We do not want to rewrite TARGET_MEM_REFs. */
5146 5595818 : if (TREE_CODE (base_addr) == TARGET_MEM_REF)
5147 : return NULL_TREE;
5148 : /* In some cases get_inner_reference may return a
5149 : MEM_REF [ptr + byteoffset]. For the purposes of this pass
5150 : canonicalize the base_addr to MEM_REF [ptr] and take
5151 : byteoffset into account in the bitpos. This occurs in
5152 : PR 23684 and this way we can catch more chains. */
5153 5546880 : else if (TREE_CODE (base_addr) == MEM_REF)
5154 : {
5155 1062161 : if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
5156 : &bitregion_start, &bitregion_end))
5157 : return NULL_TREE;
5158 1051302 : base_addr = TREE_OPERAND (base_addr, 0);
5159 : }
5160 : /* get_inner_reference returns the base object, get at its
5161 : address now. */
5162 : else
5163 : {
5164 4484719 : if (maybe_lt (bitpos, 0))
5165 : return NULL_TREE;
5166 4484591 : base_addr = build_fold_addr_expr (base_addr);
5167 : }
5168 :
5169 5535893 : if (offset)
5170 : {
5171 : /* If the access is variable offset then a base decl has to be
5172 : address-taken to be able to emit pointer-based stores to it.
5173 : ??? We might be able to get away with re-using the original
5174 : base up to the first variable part and then wrapping that inside
5175 : a BIT_FIELD_REF. */
5176 32222 : tree base = get_base_address (base_addr);
5177 32222 : if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
5178 : return NULL_TREE;
5179 :
5180 : /* Similarly to above for the base, remove constant from the offset. */
5181 32222 : if (TREE_CODE (offset) == PLUS_EXPR
5182 3490 : && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
5183 35712 : && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5184 : &bitpos, &bitregion_start, &bitregion_end))
5185 3434 : offset = TREE_OPERAND (offset, 0);
5186 :
5187 32222 : base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5188 : base_addr, offset);
5189 : }
5190 :
5191 5535893 : if (known_eq (bitregion_end, 0U))
5192 : {
5193 5491261 : bitregion_start = round_down_to_byte_boundary (bitpos);
5194 5491261 : bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5195 : }
5196 :
5197 5535893 : *pbitsize = bitsize;
5198 5535893 : *pbitpos = bitpos;
5199 5535893 : *pbitregion_start = bitregion_start;
5200 5535893 : *pbitregion_end = bitregion_end;
5201 5535893 : return base_addr;
5202 : }
5203 :
5204 : /* Return true if STMT is a load that can be used for store merging.
5205 : In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5206 : BITREGION_END are properties of the corresponding store. */
5207 :
5208 : static bool
5209 1036668 : handled_load (gimple *stmt, store_operand_info *op,
5210 : poly_uint64 bitsize, poly_uint64 bitpos,
5211 : poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5212 : {
5213 1036668 : if (!is_gimple_assign (stmt))
5214 : return false;
5215 1036564 : if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5216 : {
5217 1469 : tree rhs1 = gimple_assign_rhs1 (stmt);
5218 1469 : if (TREE_CODE (rhs1) == SSA_NAME
5219 1469 : && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5220 : bitregion_start, bitregion_end))
5221 : {
5222 : /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5223 : been optimized earlier, but if allowed here, would confuse the
5224 : multiple uses counting. */
5225 743 : if (op->bit_not_p)
5226 : return false;
5227 743 : op->bit_not_p = !op->bit_not_p;
5228 743 : return true;
5229 : }
5230 726 : return false;
5231 : }
5232 1035095 : if (gimple_vuse (stmt)
5233 544513 : && gimple_assign_load_p (stmt)
5234 544480 : && !stmt_can_throw_internal (cfun, stmt)
5235 1576266 : && !gimple_has_volatile_ops (stmt))
5236 : {
5237 540871 : tree mem = gimple_assign_rhs1 (stmt);
5238 540871 : op->base_addr
5239 540871 : = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5240 : &op->bitregion_start,
5241 : &op->bitregion_end);
5242 540871 : if (op->base_addr != NULL_TREE
5243 488502 : && known_eq (op->bitsize, bitsize)
5244 976977 : && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5245 488485 : && known_ge (op->bitpos - op->bitregion_start,
5246 : bitpos - bitregion_start)
5247 1029122 : && known_ge (op->bitregion_end - op->bitpos,
5248 : bitregion_end - bitpos))
5249 : {
5250 488196 : op->stmt = stmt;
5251 488196 : op->val = mem;
5252 488196 : op->bit_not_p = false;
5253 488196 : return true;
5254 : }
5255 : }
5256 : return false;
5257 : }
5258 :
5259 : /* Return the index number of the landing pad for STMT, if any. */
5260 :
5261 : static int
5262 3038027 : lp_nr_for_store (gimple *stmt)
5263 : {
5264 3038027 : if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5265 : return 0;
5266 :
5267 800640 : if (!stmt_could_throw_p (cfun, stmt))
5268 : return 0;
5269 :
5270 78080 : return lookup_stmt_eh_lp (stmt);
5271 : }
5272 :
5273 : /* Record the store STMT for store merging optimization if it can be
5274 : optimized. Return true if any changes were made. */
5275 :
5276 : bool
5277 5056641 : pass_store_merging::process_store (gimple *stmt)
5278 : {
5279 5056641 : tree lhs = gimple_assign_lhs (stmt);
5280 5056641 : tree rhs = gimple_assign_rhs1 (stmt);
5281 5056641 : poly_uint64 bitsize, bitpos = 0;
5282 5056641 : poly_uint64 bitregion_start = 0, bitregion_end = 0;
5283 5056641 : tree base_addr
5284 5056641 : = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5285 5056641 : &bitregion_start, &bitregion_end);
5286 5056641 : if (known_eq (bitsize, 0U))
5287 : return false;
5288 :
5289 5055447 : bool invalid = (base_addr == NULL_TREE
5290 5055447 : || (maybe_gt (bitsize,
5291 : (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5292 157050 : && TREE_CODE (rhs) != INTEGER_CST
5293 157050 : && (TREE_CODE (rhs) != CONSTRUCTOR
5294 143804 : || CONSTRUCTOR_NELTS (rhs) != 0)));
5295 5055447 : enum tree_code rhs_code = ERROR_MARK;
5296 5055447 : bool bit_not_p = false;
5297 5055447 : struct symbolic_number n;
5298 5055447 : gimple *ins_stmt = NULL;
5299 15166341 : store_operand_info ops[2];
5300 5055447 : if (invalid)
5301 : ;
5302 5034145 : else if (TREE_CODE (rhs) == STRING_CST)
5303 : {
5304 3681 : rhs_code = STRING_CST;
5305 3681 : ops[0].val = rhs;
5306 : }
5307 5030464 : else if (rhs_valid_for_store_merging_p (rhs))
5308 : {
5309 2476799 : rhs_code = INTEGER_CST;
5310 2476799 : ops[0].val = rhs;
5311 : }
5312 2553665 : else if (TREE_CODE (rhs) == SSA_NAME)
5313 : {
5314 1654301 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5315 1654301 : if (!is_gimple_assign (def_stmt))
5316 : invalid = true;
5317 1016127 : else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5318 : bitregion_start, bitregion_end))
5319 : rhs_code = MEM_REF;
5320 539137 : else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5321 : {
5322 606 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
5323 606 : if (TREE_CODE (rhs1) == SSA_NAME
5324 606 : && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5325 : {
5326 : bit_not_p = true;
5327 : def_stmt = SSA_NAME_DEF_STMT (rhs1);
5328 : }
5329 : }
5330 :
5331 1654301 : if (rhs_code == ERROR_MARK && !invalid)
5332 539137 : switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5333 : {
5334 15995 : case BIT_AND_EXPR:
5335 15995 : case BIT_IOR_EXPR:
5336 15995 : case BIT_XOR_EXPR:
5337 15995 : tree rhs1, rhs2;
5338 15995 : rhs1 = gimple_assign_rhs1 (def_stmt);
5339 15995 : rhs2 = gimple_assign_rhs2 (def_stmt);
5340 15995 : invalid = true;
5341 15995 : if (TREE_CODE (rhs1) != SSA_NAME)
5342 : break;
5343 15995 : def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5344 15995 : if (!is_gimple_assign (def_stmt1)
5345 15995 : || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5346 : bitregion_start, bitregion_end))
5347 : break;
5348 8279 : if (rhs_valid_for_store_merging_p (rhs2))
5349 4140 : ops[1].val = rhs2;
5350 4139 : else if (TREE_CODE (rhs2) != SSA_NAME)
5351 : break;
5352 : else
5353 : {
5354 4139 : def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5355 4139 : if (!is_gimple_assign (def_stmt2))
5356 : break;
5357 4036 : else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5358 : bitregion_start, bitregion_end))
5359 : break;
5360 : }
5361 : invalid = false;
5362 : break;
5363 : default:
5364 : invalid = true;
5365 : break;
5366 : }
5367 :
5368 1654301 : unsigned HOST_WIDE_INT const_bitsize;
5369 1654301 : if (bitsize.is_constant (&const_bitsize)
5370 1654301 : && (const_bitsize % BITS_PER_UNIT) == 0
5371 1641607 : && const_bitsize <= 64
5372 1471222 : && multiple_p (bitpos, BITS_PER_UNIT))
5373 : {
5374 1470954 : ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5375 1470954 : if (ins_stmt)
5376 : {
5377 472994 : uint64_t nn = n.n;
5378 472994 : for (unsigned HOST_WIDE_INT i = 0;
5379 3348298 : i < const_bitsize;
5380 2875304 : i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5381 2887073 : if ((nn & MARKER_MASK) == 0
5382 2887073 : || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5383 : {
5384 : ins_stmt = NULL;
5385 : break;
5386 : }
5387 472994 : if (ins_stmt)
5388 : {
5389 461225 : if (invalid)
5390 : {
5391 62105 : rhs_code = LROTATE_EXPR;
5392 62105 : ops[0].base_addr = NULL_TREE;
5393 62105 : ops[1].base_addr = NULL_TREE;
5394 : }
5395 : invalid = false;
5396 : }
5397 : }
5398 : }
5399 :
5400 1193076 : if (invalid
5401 1108139 : && bitsize.is_constant (&const_bitsize)
5402 1108139 : && ((const_bitsize % BITS_PER_UNIT) != 0
5403 1096708 : || !multiple_p (bitpos, BITS_PER_UNIT))
5404 1216472 : && const_bitsize <= MAX_FIXED_MODE_SIZE)
5405 : {
5406 : /* Bypass a conversion to the bit-field type. */
5407 11698 : if (!bit_not_p
5408 11659 : && is_gimple_assign (def_stmt)
5409 18821 : && CONVERT_EXPR_CODE_P (rhs_code))
5410 : {
5411 5221 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
5412 5221 : if (TREE_CODE (rhs1) == SSA_NAME
5413 5221 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5414 : rhs = rhs1;
5415 : }
5416 11698 : rhs_code = BIT_INSERT_EXPR;
5417 11698 : bit_not_p = false;
5418 11698 : ops[0].val = rhs;
5419 11698 : ops[0].base_addr = NULL_TREE;
5420 11698 : ops[1].base_addr = NULL_TREE;
5421 11698 : invalid = false;
5422 : }
5423 : }
5424 : else
5425 : invalid = true;
5426 :
5427 4134781 : unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5428 4134781 : unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5429 11698 : if (invalid
5430 3038340 : || !bitsize.is_constant (&const_bitsize)
5431 3038340 : || !bitpos.is_constant (&const_bitpos)
5432 3038340 : || !bitregion_start.is_constant (&const_bitregion_start)
5433 3038340 : || !bitregion_end.is_constant (&const_bitregion_end)
5434 4123083 : || ((const_bitregion_end - const_bitregion_start + 1) / BITS_PER_UNIT
5435 3038340 : > (unsigned) param_store_merging_max_size))
5436 2017420 : return terminate_all_aliasing_chains (NULL, stmt);
5437 :
5438 3038027 : if (!ins_stmt)
5439 2576811 : memset (&n, 0, sizeof (n));
5440 :
5441 3038027 : class imm_store_chain_info **chain_info = NULL;
5442 3038027 : bool ret = false;
5443 3038027 : if (base_addr)
5444 3038027 : chain_info = m_stores.get (base_addr);
5445 :
5446 3038027 : store_immediate_info *info;
5447 3038027 : if (chain_info)
5448 : {
5449 1130626 : unsigned int ord = (*chain_info)->m_store_info.length ();
5450 2261252 : info = new store_immediate_info (const_bitsize, const_bitpos,
5451 : const_bitregion_start,
5452 : const_bitregion_end,
5453 : stmt, ord, rhs_code, n, ins_stmt,
5454 : bit_not_p, lp_nr_for_store (stmt),
5455 1130626 : ops[0], ops[1]);
5456 1130626 : if (dump_file && (dump_flags & TDF_DETAILS))
5457 : {
5458 202 : fprintf (dump_file, "Recording immediate store from stmt:\n");
5459 202 : print_gimple_stmt (dump_file, stmt, 0);
5460 : }
5461 1130626 : (*chain_info)->m_store_info.safe_push (info);
5462 1130626 : m_n_stores++;
5463 1130626 : ret |= terminate_all_aliasing_chains (chain_info, stmt);
5464 : /* If we reach the limit of stores to merge in a chain terminate and
5465 : process the chain now. */
5466 1130626 : if ((*chain_info)->m_store_info.length ()
5467 1130626 : == (unsigned int) param_max_stores_to_merge)
5468 : {
5469 460 : if (dump_file && (dump_flags & TDF_DETAILS))
5470 0 : fprintf (dump_file,
5471 : "Reached maximum number of statements to merge:\n");
5472 460 : ret |= terminate_and_process_chain (*chain_info);
5473 : }
5474 : }
5475 : else
5476 : {
5477 : /* Store aliases any existing chain? */
5478 1907401 : ret |= terminate_all_aliasing_chains (NULL, stmt);
5479 :
5480 : /* Start a new chain. */
5481 1907401 : class imm_store_chain_info *new_chain
5482 1907401 : = new imm_store_chain_info (m_stores_head, base_addr);
5483 3814802 : info = new store_immediate_info (const_bitsize, const_bitpos,
5484 : const_bitregion_start,
5485 : const_bitregion_end,
5486 : stmt, 0, rhs_code, n, ins_stmt,
5487 : bit_not_p, lp_nr_for_store (stmt),
5488 1907401 : ops[0], ops[1]);
5489 1907401 : new_chain->m_store_info.safe_push (info);
5490 1907401 : m_n_stores++;
5491 1907401 : m_stores.put (base_addr, new_chain);
5492 1907401 : m_n_chains++;
5493 1907401 : if (dump_file && (dump_flags & TDF_DETAILS))
5494 : {
5495 49 : fprintf (dump_file, "Starting active chain number %u with statement:\n",
5496 : m_n_chains);
5497 49 : print_gimple_stmt (dump_file, stmt, 0);
5498 49 : fprintf (dump_file, "The base object is:\n");
5499 49 : print_generic_expr (dump_file, base_addr);
5500 49 : fprintf (dump_file, "\n");
5501 : }
5502 : }
5503 :
5504 : /* Prune oldest chains so that after adding the chain or store above
5505 : we're again within the limits set by the params. */
5506 3038027 : if (m_n_chains > (unsigned)param_max_store_chains_to_track
5507 3036015 : || m_n_stores > (unsigned)param_max_stores_to_track)
5508 : {
5509 2016 : if (dump_file && (dump_flags & TDF_DETAILS))
5510 0 : fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5511 : "terminating oldest chain(s).\n", m_n_chains,
5512 : param_max_store_chains_to_track, m_n_stores,
5513 : param_max_stores_to_track);
5514 2016 : imm_store_chain_info **e = &m_stores_head;
5515 2016 : unsigned idx = 0;
5516 2016 : unsigned n_stores = 0;
5517 132550 : while (*e)
5518 : {
5519 130534 : if (idx >= (unsigned)param_max_store_chains_to_track
5520 130534 : || (n_stores + (*e)->m_store_info.length ()
5521 128522 : > (unsigned)param_max_stores_to_track))
5522 2016 : ret |= terminate_and_process_chain (*e);
5523 : else
5524 : {
5525 128518 : n_stores += (*e)->m_store_info.length ();
5526 128518 : e = &(*e)->next;
5527 128518 : ++idx;
5528 : }
5529 : }
5530 : }
5531 :
5532 : return ret;
5533 : }
5534 :
5535 : /* Return true if STMT is a store valid for store merging. */
5536 :
5537 : static bool
5538 35715014 : store_valid_for_store_merging_p (gimple *stmt)
5539 : {
5540 35715014 : return gimple_assign_single_p (stmt)
5541 16333171 : && gimple_vdef (stmt)
5542 9362223 : && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5543 44813858 : && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5544 : }
5545 :
5546 : enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5547 :
5548 : /* Return the status of basic block BB wrt store merging. */
5549 :
5550 : static enum basic_block_status
5551 8821256 : get_status_for_store_merging (basic_block bb)
5552 : {
5553 8821256 : unsigned int num_statements = 0;
5554 8821256 : unsigned int num_constructors = 0;
5555 8821256 : gimple_stmt_iterator gsi;
5556 8821256 : edge e;
5557 8821256 : gimple *last_stmt = NULL;
5558 :
5559 71760067 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5560 : {
5561 64054208 : gimple *stmt = gsi_stmt (gsi);
5562 :
5563 64054208 : if (is_gimple_debug (stmt))
5564 39012596 : continue;
5565 :
5566 25041612 : last_stmt = stmt;
5567 :
5568 25041612 : if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5569 : break;
5570 :
5571 23943577 : if (is_gimple_assign (stmt)
5572 23943577 : && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5573 14865253 : || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR))
5574 : {
5575 621906 : tree rhs = gimple_assign_rhs1 (stmt);
5576 621906 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5577 124973 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5578 734953 : && gimple_assign_lhs (stmt) != NULL_TREE)
5579 : {
5580 113047 : HOST_WIDE_INT sz
5581 113047 : = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5582 113047 : if (sz == 16 || sz == 32 || sz == 64)
5583 : {
5584 : num_constructors = 1;
5585 : break;
5586 : }
5587 : }
5588 : }
5589 : }
5590 :
5591 8821256 : if (num_statements == 0 && num_constructors == 0)
5592 : return BB_INVALID;
5593 :
5594 872353 : if (cfun->can_throw_non_call_exceptions && cfun->eh
5595 872353 : && store_valid_for_store_merging_p (last_stmt)
5596 668233 : && (e = find_fallthru_edge (bb->succs))
5597 2595968 : && e->dest == bb->next_bb)
5598 : return BB_EXTENDED_VALID;
5599 :
5600 2045395 : return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5601 : }
5602 :
5603 : /* Entry point for the pass. Go over each basic block recording chains of
5604 : immediate stores. Upon encountering a terminating statement (as defined
5605 : by stmt_terminates_chain_p) process the recorded stores and emit the widened
5606 : variants. */
5607 :
5608 : unsigned int
5609 961563 : pass_store_merging::execute (function *fun)
5610 : {
5611 961563 : basic_block bb;
5612 961563 : hash_set<gimple *> orig_stmts;
5613 961563 : bool changed = false, open_chains = false;
5614 :
5615 : /* If the function can throw and catch non-call exceptions, we'll be trying
5616 : to merge stores across different basic blocks so we need to first unsplit
5617 : the EH edges in order to streamline the CFG of the function. */
5618 961563 : if (cfun->can_throw_non_call_exceptions && cfun->eh)
5619 241067 : unsplit_eh_edges ();
5620 :
5621 961563 : calculate_dominance_info (CDI_DOMINATORS);
5622 :
5623 9782819 : FOR_EACH_BB_FN (bb, fun)
5624 : {
5625 8821256 : const basic_block_status bb_status = get_status_for_store_merging (bb);
5626 8821256 : gimple_stmt_iterator gsi;
5627 :
5628 8860566 : if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5629 : {
5630 119829 : changed |= terminate_and_process_all_chains ();
5631 119829 : open_chains = false;
5632 : }
5633 :
5634 8821256 : if (bb_status == BB_INVALID)
5635 7659695 : continue;
5636 :
5637 1161561 : if (dump_file && (dump_flags & TDF_DETAILS))
5638 23 : fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5639 :
5640 29189139 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5641 : {
5642 28027578 : gimple *stmt = gsi_stmt (gsi);
5643 28027578 : gsi_next (&gsi);
5644 :
5645 28027578 : if (is_gimple_debug (stmt))
5646 18214818 : continue;
5647 :
5648 19168078 : if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5649 : {
5650 : /* Terminate all chains. */
5651 9509 : if (dump_file && (dump_flags & TDF_DETAILS))
5652 1 : fprintf (dump_file, "Volatile access terminates "
5653 : "all chains\n");
5654 9509 : changed |= terminate_and_process_all_chains ();
5655 9509 : open_chains = false;
5656 9509 : continue;
5657 : }
5658 :
5659 9803251 : if (is_gimple_assign (stmt)
5660 7890735 : && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5661 6867092 : || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR)
5662 10829585 : && maybe_optimize_vector_constructor (stmt))
5663 2202 : continue;
5664 :
5665 9801049 : if (store_valid_for_store_merging_p (stmt))
5666 5056641 : changed |= process_store (stmt);
5667 : else
5668 4744408 : changed |= terminate_all_aliasing_chains (NULL, stmt);
5669 : }
5670 :
5671 1161561 : if (bb_status == BB_EXTENDED_VALID)
5672 : open_chains = true;
5673 : else
5674 : {
5675 1024109 : changed |= terminate_and_process_all_chains ();
5676 1024109 : open_chains = false;
5677 : }
5678 : }
5679 :
5680 961563 : if (open_chains)
5681 0 : changed |= terminate_and_process_all_chains ();
5682 :
5683 : /* If the function can throw and catch non-call exceptions and something
5684 : changed during the pass, then the CFG has (very likely) changed too. */
5685 961563 : if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5686 : {
5687 1013 : free_dominance_info (CDI_DOMINATORS);
5688 1013 : return TODO_cleanup_cfg;
5689 : }
5690 :
5691 : return 0;
5692 961563 : }
5693 :
5694 : } // anon namespace
5695 :
5696 : /* Construct and return a store merging pass object. */
5697 :
5698 : gimple_opt_pass *
5699 298828 : make_pass_store_merging (gcc::context *ctxt)
5700 : {
5701 298828 : return new pass_store_merging (ctxt);
5702 : }
5703 :
5704 : #if CHECKING_P
5705 :
5706 : namespace selftest {
5707 :
5708 : /* Selftests for store merging helpers. */
5709 :
5710 : /* Assert that all elements of the byte arrays X and Y, both of length N
5711 : are equal. */
5712 :
5713 : static void
5714 32 : verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5715 : {
5716 112 : for (unsigned int i = 0; i < n; i++)
5717 : {
5718 80 : if (x[i] != y[i])
5719 : {
5720 0 : fprintf (stderr, "Arrays do not match. X:\n");
5721 0 : dump_char_array (stderr, x, n);
5722 0 : fprintf (stderr, "Y:\n");
5723 0 : dump_char_array (stderr, y, n);
5724 : }
5725 80 : ASSERT_EQ (x[i], y[i]);
5726 : }
5727 32 : }
5728 :
5729 : /* Test shift_bytes_in_array_left and that it carries bits across between
5730 : bytes correctly. */
5731 :
5732 : static void
5733 4 : verify_shift_bytes_in_array_left (void)
5734 : {
5735 : /* byte 1 | byte 0
5736 : 00011111 | 11100000. */
5737 4 : unsigned char orig[2] = { 0xe0, 0x1f };
5738 4 : unsigned char in[2];
5739 4 : memcpy (in, orig, sizeof orig);
5740 :
5741 4 : unsigned char expected[2] = { 0x80, 0x7f };
5742 4 : shift_bytes_in_array_left (in, sizeof (in), 2);
5743 4 : verify_array_eq (in, expected, sizeof (in));
5744 :
5745 4 : memcpy (in, orig, sizeof orig);
5746 4 : memcpy (expected, orig, sizeof orig);
5747 : /* Check that shifting by zero doesn't change anything. */
5748 4 : shift_bytes_in_array_left (in, sizeof (in), 0);
5749 4 : verify_array_eq (in, expected, sizeof (in));
5750 :
5751 4 : }
5752 :
5753 : /* Test shift_bytes_in_array_right and that it carries bits across between
5754 : bytes correctly. */
5755 :
5756 : static void
5757 4 : verify_shift_bytes_in_array_right (void)
5758 : {
5759 : /* byte 1 | byte 0
5760 : 00011111 | 11100000. */
5761 4 : unsigned char orig[2] = { 0x1f, 0xe0};
5762 4 : unsigned char in[2];
5763 4 : memcpy (in, orig, sizeof orig);
5764 4 : unsigned char expected[2] = { 0x07, 0xf8};
5765 4 : shift_bytes_in_array_right (in, sizeof (in), 2);
5766 4 : verify_array_eq (in, expected, sizeof (in));
5767 :
5768 4 : memcpy (in, orig, sizeof orig);
5769 4 : memcpy (expected, orig, sizeof orig);
5770 : /* Check that shifting by zero doesn't change anything. */
5771 4 : shift_bytes_in_array_right (in, sizeof (in), 0);
5772 4 : verify_array_eq (in, expected, sizeof (in));
5773 4 : }
5774 :
5775 : /* Test clear_bit_region that it clears exactly the bits asked and
5776 : nothing more. */
5777 :
5778 : static void
5779 4 : verify_clear_bit_region (void)
5780 : {
5781 : /* Start with all bits set and test clearing various patterns in them. */
5782 4 : unsigned char orig[3] = { 0xff, 0xff, 0xff};
5783 4 : unsigned char in[3];
5784 4 : unsigned char expected[3];
5785 4 : memcpy (in, orig, sizeof in);
5786 :
5787 : /* Check zeroing out all the bits. */
5788 4 : clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5789 4 : expected[0] = expected[1] = expected[2] = 0;
5790 4 : verify_array_eq (in, expected, sizeof in);
5791 :
5792 4 : memcpy (in, orig, sizeof in);
5793 : /* Leave the first and last bits intact. */
5794 4 : clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5795 4 : expected[0] = 0x1;
5796 4 : expected[1] = 0;
5797 4 : expected[2] = 0x80;
5798 4 : verify_array_eq (in, expected, sizeof in);
5799 4 : }
5800 :
5801 : /* Test clear_bit_region_be that it clears exactly the bits asked and
5802 : nothing more. */
5803 :
5804 : static void
5805 4 : verify_clear_bit_region_be (void)
5806 : {
5807 : /* Start with all bits set and test clearing various patterns in them. */
5808 4 : unsigned char orig[3] = { 0xff, 0xff, 0xff};
5809 4 : unsigned char in[3];
5810 4 : unsigned char expected[3];
5811 4 : memcpy (in, orig, sizeof in);
5812 :
5813 : /* Check zeroing out all the bits. */
5814 4 : clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5815 4 : expected[0] = expected[1] = expected[2] = 0;
5816 4 : verify_array_eq (in, expected, sizeof in);
5817 :
5818 4 : memcpy (in, orig, sizeof in);
5819 : /* Leave the first and last bits intact. */
5820 4 : clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5821 4 : expected[0] = 0x80;
5822 4 : expected[1] = 0;
5823 4 : expected[2] = 0x1;
5824 4 : verify_array_eq (in, expected, sizeof in);
5825 4 : }
5826 :
5827 :
5828 : /* Run all of the selftests within this file. */
5829 :
5830 : void
5831 4 : store_merging_cc_tests (void)
5832 : {
5833 4 : verify_shift_bytes_in_array_left ();
5834 4 : verify_shift_bytes_in_array_right ();
5835 4 : verify_clear_bit_region ();
5836 4 : verify_clear_bit_region_be ();
5837 4 : }
5838 :
5839 : } // namespace selftest
5840 : #endif /* CHECKING_P. */
|