Line data Source code
1 : /* GIMPLE store merging and byte swapping passes.
2 : Copyright (C) 2009-2026 Free Software Foundation, Inc.
3 : Contributed by ARM Ltd.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* The purpose of the store merging pass is to combine multiple memory stores
22 : of constant values, values loaded from memory, bitwise operations on those,
23 : or bit-field values, to consecutive locations, into fewer wider stores.
24 :
25 : For example, if we have a sequence peforming four byte stores to
26 : consecutive memory locations:
27 : [p ] := imm1;
28 : [p + 1B] := imm2;
29 : [p + 2B] := imm3;
30 : [p + 3B] := imm4;
31 : we can transform this into a single 4-byte store if the target supports it:
32 : [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 :
34 : Or:
35 : [p ] := [q ];
36 : [p + 1B] := [q + 1B];
37 : [p + 2B] := [q + 2B];
38 : [p + 3B] := [q + 3B];
39 : if there is no overlap can be transformed into a single 4-byte
40 : load followed by single 4-byte store.
41 :
42 : Or:
43 : [p ] := [q ] ^ imm1;
44 : [p + 1B] := [q + 1B] ^ imm2;
45 : [p + 2B] := [q + 2B] ^ imm3;
46 : [p + 3B] := [q + 3B] ^ imm4;
47 : if there is no overlap can be transformed into a single 4-byte
48 : load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 :
50 : Or:
51 : [p:1 ] := imm;
52 : [p:31] := val & 0x7FFFFFFF;
53 : we can transform this into a single 4-byte store if the target supports it:
54 : [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 :
56 : The algorithm is applied to each basic block in three phases:
57 :
58 : 1) Scan through the basic block and record assignments to destinations
59 : that can be expressed as a store to memory of a certain size at a certain
60 : bit offset from base expressions we can handle. For bit-fields we also
61 : record the surrounding bit region, i.e. bits that could be stored in
62 : a read-modify-write operation when storing the bit-field. Record store
63 : chains to different bases in a hash_map (m_stores) and make sure to
64 : terminate such chains when appropriate (for example when the stored
65 : values get used subsequently).
66 : These stores can be a result of structure element initializers, array stores
67 : etc. A store_immediate_info object is recorded for every such store.
68 : Record as many such assignments to a single base as possible until a
69 : statement that interferes with the store sequence is encountered.
70 : Each store has up to 2 operands, which can be a either constant, a memory
71 : load or an SSA name, from which the value to be stored can be computed.
72 : At most one of the operands can be a constant. The operands are recorded
73 : in store_operand_info struct.
74 :
75 : 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 : store_immediate_info objects) and coalesce contiguous stores into
77 : merged_store_group objects. For bit-field stores, we don't need to
78 : require the stores to be contiguous, just their surrounding bit regions
79 : have to be contiguous. If the expression being stored is different
80 : between adjacent stores, such as one store storing a constant and
81 : following storing a value loaded from memory, or if the loaded memory
82 : objects are not adjacent, a new merged_store_group is created as well.
83 :
84 : For example, given the stores:
85 : [p ] := 0;
86 : [p + 1B] := 1;
87 : [p + 3B] := 0;
88 : [p + 4B] := 1;
89 : [p + 5B] := 0;
90 : [p + 6B] := 0;
91 : This phase would produce two merged_store_group objects, one recording the
92 : two bytes stored in the memory region [p : p + 1] and another
93 : recording the four bytes stored in the memory region [p + 3 : p + 6].
94 :
95 : 3) The merged_store_group objects produced in phase 2) are processed
96 : to generate the sequence of wider stores that set the contiguous memory
97 : regions to the sequence of bytes that correspond to it. This may emit
98 : multiple stores per store group to handle contiguous stores that are not
99 : of a size that is a power of 2. For example it can try to emit a 40-bit
100 : store as a 32-bit store followed by an 8-bit store.
101 : We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 : or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 :
104 : Note on endianness and example:
105 : Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 : [p ] := 0x1234;
107 : [p + 2B] := 0x5678;
108 : [p + 4B] := 0xab;
109 : [p + 5B] := 0xcd;
110 :
111 : The memory layout for little-endian (LE) and big-endian (BE) must be:
112 : p |LE|BE|
113 : ---------
114 : 0 |34|12|
115 : 1 |12|34|
116 : 2 |78|56|
117 : 3 |56|78|
118 : 4 |ab|ab|
119 : 5 |cd|cd|
120 :
121 : To merge these into a single 48-bit merged value 'val' in phase 2)
122 : on little-endian we insert stores to higher (consecutive) bitpositions
123 : into the most significant bits of the merged value.
124 : The final merged value would be: 0xcdab56781234
125 :
126 : For big-endian we insert stores to higher bitpositions into the least
127 : significant bits of the merged value.
128 : The final merged value would be: 0x12345678abcd
129 :
130 : Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 : followed by a 16-bit store. Again, we must consider endianness when
132 : breaking down the 48-bit value 'val' computed above.
133 : For little endian we emit:
134 : [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 : [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136 :
137 : Whereas for big-endian we emit:
138 : [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 : [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140 :
141 : #include "config.h"
142 : #include "system.h"
143 : #include "coretypes.h"
144 : #include "backend.h"
145 : #include "tree.h"
146 : #include "gimple.h"
147 : #include "builtins.h"
148 : #include "fold-const.h"
149 : #include "tree-pass.h"
150 : #include "ssa.h"
151 : #include "gimple-pretty-print.h"
152 : #include "alias.h"
153 : #include "fold-const.h"
154 : #include "print-tree.h"
155 : #include "tree-hash-traits.h"
156 : #include "gimple-iterator.h"
157 : #include "gimplify.h"
158 : #include "gimple-fold.h"
159 : #include "stor-layout.h"
160 : #include "timevar.h"
161 : #include "cfganal.h"
162 : #include "cfgcleanup.h"
163 : #include "tree-cfg.h"
164 : #include "except.h"
165 : #include "tree-eh.h"
166 : #include "target.h"
167 : #include "gimplify-me.h"
168 : #include "rtl.h"
169 : #include "expr.h" /* For get_bit_range. */
170 : #include "optabs-tree.h"
171 : #include "dbgcnt.h"
172 : #include "selftest.h"
173 :
174 : /* The maximum size (in bits) of the stores this pass should generate. */
175 : #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 : #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 :
178 : /* Limit to bound the number of aliasing checks for loads with the same
179 : vuse as the corresponding store. */
180 : #define MAX_STORE_ALIAS_CHECKS 64
181 :
182 : namespace {
183 :
184 : struct bswap_stat
185 : {
186 : /* Number of hand-written 16-bit nop / bswaps found. */
187 : int found_16bit;
188 :
189 : /* Number of hand-written 32-bit nop / bswaps found. */
190 : int found_32bit;
191 :
192 : /* Number of hand-written 64-bit nop / bswaps found. */
193 : int found_64bit;
194 : } nop_stats, bswap_stats;
195 :
196 : /* A symbolic number structure is used to detect byte permutation and selection
197 : patterns of a source. To achieve that, its field N contains an artificial
198 : number consisting of BITS_PER_MARKER sized markers tracking where does each
199 : byte come from in the source:
200 :
201 : 0 - target byte has the value 0
202 : FF - target byte has an unknown value (eg. due to sign extension)
203 : 1..size - marker value is the byte index in the source (0 for lsb).
204 :
205 : To detect permutations on memory sources (arrays and structures), a symbolic
206 : number is also associated:
207 : - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 : - a range which gives the difference between the highest and lowest accessed
209 : memory location to make such a symbolic number;
210 : - the address SRC of the source element of lowest address as a convenience
211 : to easily get BASE_ADDR + offset + lowest bytepos;
212 : - number of expressions N_OPS bitwise ored together to represent
213 : approximate cost of the computation.
214 :
215 : Note 1: the range is different from size as size reflects the size of the
216 : type of the current expression. For instance, for an array char a[],
217 : (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 : (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 : time a range of 1.
220 :
221 : Note 2: for non-memory sources, range holds the same value as size.
222 :
223 : Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224 :
225 : struct symbolic_number {
226 : uint64_t n;
227 : tree type;
228 : tree base_addr;
229 : tree offset;
230 : poly_int64 bytepos;
231 : tree src;
232 : tree alias_set;
233 : tree vuse;
234 : unsigned HOST_WIDE_INT range;
235 : int n_ops;
236 : };
237 :
238 : #define BITS_PER_MARKER 8
239 : #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 : #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 : #define HEAD_MARKER(n, size) \
242 : ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 :
244 : /* The number which the find_bswap_or_nop_1 result should match in
245 : order to have a nop. The number is masked according to the size of
246 : the symbolic number before using it. */
247 : #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 : (uint64_t)0x08070605 << 32 | 0x04030201)
249 :
250 : /* The number which the find_bswap_or_nop_1 result should match in
251 : order to have a byte swap. The number is masked according to the
252 : size of the symbolic number before using it. */
253 : #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 : (uint64_t)0x01020304 << 32 | 0x05060708)
255 :
256 : /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 : number N. Return false if the requested operation is not permitted
258 : on a symbolic number. */
259 :
260 : inline bool
261 191306 : do_shift_rotate (enum tree_code code,
262 : struct symbolic_number *n,
263 : int count)
264 : {
265 191306 : int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 191306 : uint64_t head_marker;
267 :
268 191306 : if (count < 0
269 191306 : || count >= TYPE_PRECISION (n->type)
270 382612 : || count % BITS_PER_UNIT != 0)
271 : return false;
272 149799 : count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 :
274 : /* Zero out the extra bits of N in order to avoid them being shifted
275 : into the significant bits. */
276 149799 : if (size < 64 / BITS_PER_MARKER)
277 42959 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 :
279 149799 : switch (code)
280 : {
281 120880 : case LSHIFT_EXPR:
282 120880 : n->n <<= count;
283 120880 : break;
284 26063 : case RSHIFT_EXPR:
285 26063 : head_marker = HEAD_MARKER (n->n, size);
286 26063 : n->n >>= count;
287 : /* Arithmetic shift of signed type: result is dependent on the value. */
288 26063 : if (!TYPE_UNSIGNED (n->type) && head_marker)
289 2925 : for (i = 0; i < count / BITS_PER_MARKER; i++)
290 1862 : n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 1862 : << ((size - 1 - i) * BITS_PER_MARKER);
292 : break;
293 25 : case LROTATE_EXPR:
294 25 : n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 25 : break;
296 2831 : case RROTATE_EXPR:
297 2831 : n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 2831 : break;
299 : default:
300 : return false;
301 : }
302 : /* Zero unused bits for size. */
303 149799 : if (size < 64 / BITS_PER_MARKER)
304 42959 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 : return true;
306 : }
307 :
308 : /* Perform sanity checking for the symbolic number N and the gimple
309 : statement STMT. */
310 :
311 : inline bool
312 339268 : verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 : {
314 339268 : tree lhs_type;
315 :
316 339268 : lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
317 :
318 339268 : if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 339268 : && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 : return false;
321 :
322 327233 : if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 0 : return false;
324 :
325 : return true;
326 : }
327 :
328 : /* Initialize the symbolic number N for the bswap pass from the base element
329 : SRC manipulated by the bitwise OR expression. */
330 :
331 : bool
332 1125241 : init_symbolic_number (struct symbolic_number *n, tree src)
333 : {
334 1125241 : int size;
335 :
336 1125241 : if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337 : return false;
338 :
339 975989 : n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 975989 : n->src = src;
341 :
342 : /* Set up the symbolic number N by setting each byte to a value between 1 and
343 : the byte size of rhs1. The highest order byte is set to n->size and the
344 : lowest order byte to 1. */
345 975989 : n->type = TREE_TYPE (src);
346 975989 : size = TYPE_PRECISION (n->type);
347 975989 : if (size % BITS_PER_UNIT != 0)
348 : return false;
349 961208 : size /= BITS_PER_UNIT;
350 961208 : if (size > 64 / BITS_PER_MARKER)
351 : return false;
352 960364 : n->range = size;
353 960364 : n->n = CMPNOP;
354 960364 : n->n_ops = 1;
355 :
356 960364 : if (size < 64 / BITS_PER_MARKER)
357 445926 : n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 :
359 : return true;
360 : }
361 :
362 : /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 : the answer. If so, REF is that memory source and the base of the memory area
364 : accessed and the offset of the access from that base are recorded in N. */
365 :
366 : static bool
367 5219536 : find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 : {
369 : /* Leaf node is an array or component ref. Memorize its base and
370 : offset from base to compare to other such leaf node. */
371 5219536 : poly_int64 bitsize, bitpos, bytepos;
372 5219536 : machine_mode mode;
373 5219536 : int unsignedp, reversep, volatilep;
374 5219536 : tree offset, base_addr;
375 :
376 : /* Not prepared to handle PDP endian. */
377 5219536 : if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 : return false;
379 :
380 6119070 : if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 : return false;
382 :
383 894319 : base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 : &unsignedp, &reversep, &volatilep);
385 :
386 894319 : if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 : /* Do not rewrite TARGET_MEM_REF. */
388 : return false;
389 847586 : else if (TREE_CODE (base_addr) == MEM_REF)
390 : {
391 464123 : poly_offset_int bit_offset = 0;
392 464123 : tree off = TREE_OPERAND (base_addr, 1);
393 :
394 464123 : if (!integer_zerop (off))
395 : {
396 99063 : poly_offset_int boff = mem_ref_offset (base_addr);
397 99063 : boff <<= LOG2_BITS_PER_UNIT;
398 99063 : bit_offset += boff;
399 : }
400 :
401 464123 : base_addr = TREE_OPERAND (base_addr, 0);
402 :
403 : /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 464123 : if (maybe_lt (bit_offset, 0))
405 : {
406 4367 : tree byte_offset = wide_int_to_tree
407 4367 : (sizetype, bits_to_bytes_round_down (bit_offset));
408 4367 : bit_offset = num_trailing_bits (bit_offset);
409 4367 : if (offset)
410 0 : offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 : else
412 4367 : offset = byte_offset;
413 : }
414 :
415 464123 : bitpos += bit_offset.force_shwi ();
416 : }
417 : else
418 383463 : base_addr = build_fold_addr_expr (base_addr);
419 :
420 5356221 : if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 : return false;
422 846626 : if (!multiple_p (bitsize, BITS_PER_UNIT))
423 : return false;
424 845344 : if (reversep)
425 : return false;
426 :
427 845335 : if (!init_symbolic_number (n, ref))
428 : return false;
429 709941 : n->base_addr = base_addr;
430 709941 : n->offset = offset;
431 709941 : n->bytepos = bytepos;
432 709941 : n->alias_set = reference_alias_ptr_type (ref);
433 709941 : n->vuse = gimple_vuse (stmt);
434 709941 : return true;
435 : }
436 :
437 : /* Compute the symbolic number N representing the result of a bitwise OR,
438 : bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 : are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
440 :
441 : gimple *
442 158824 : perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 : gimple *source_stmt2, struct symbolic_number *n2,
444 : struct symbolic_number *n, enum tree_code code)
445 : {
446 158824 : int i, size;
447 158824 : uint64_t mask;
448 158824 : gimple *source_stmt;
449 158824 : struct symbolic_number *n_start;
450 :
451 158824 : tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 158824 : if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 158824 : && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 385 : rhs1 = TREE_OPERAND (rhs1, 0);
455 158824 : tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 158824 : if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 158824 : && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 380 : rhs2 = TREE_OPERAND (rhs2, 0);
459 :
460 : /* Sources are different, cancel bswap if they are not memory location with
461 : the same base (array, structure, ...). */
462 158824 : if (rhs1 != rhs2)
463 : {
464 151176 : uint64_t inc;
465 151176 : HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 151176 : struct symbolic_number *toinc_n_ptr, *n_end;
467 151176 : basic_block bb1, bb2;
468 :
469 124178 : if (!n1->base_addr || !n2->base_addr
470 275344 : || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 86309 : return NULL;
472 :
473 64867 : if (!n1->offset != !n2->offset
474 64867 : || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 3370 : return NULL;
476 :
477 61497 : start1 = 0;
478 61497 : if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 : return NULL;
480 :
481 61497 : if (start1 < start2)
482 : {
483 : n_start = n1;
484 : start_sub = start2 - start1;
485 : }
486 : else
487 : {
488 14536 : n_start = n2;
489 14536 : start_sub = start1 - start2;
490 : }
491 :
492 61497 : bb1 = gimple_bb (source_stmt1);
493 61497 : bb2 = gimple_bb (source_stmt2);
494 61497 : if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 : source_stmt = source_stmt1;
496 : else
497 4606 : source_stmt = source_stmt2;
498 :
499 : /* Find the highest address at which a load is performed and
500 : compute related info. */
501 61497 : end1 = start1 + (n1->range - 1);
502 61497 : end2 = start2 + (n2->range - 1);
503 61497 : if (end1 < end2)
504 : {
505 61497 : end = end2;
506 : end_sub = end2 - end1;
507 : }
508 : else
509 : {
510 : end = end1;
511 : end_sub = end1 - end2;
512 : }
513 61497 : n_end = (end2 > end1) ? n2 : n1;
514 :
515 : /* Find symbolic number whose lsb is the most significant. */
516 61497 : if (BYTES_BIG_ENDIAN)
517 : toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 : else
519 61497 : toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 :
521 61497 : n->range = end - MIN (start1, start2) + 1;
522 :
523 : /* Check that the range of memory covered can be represented by
524 : a symbolic number. */
525 61497 : if (n->range > 64 / BITS_PER_MARKER)
526 : return NULL;
527 :
528 : /* Reinterpret byte marks in symbolic number holding the value of
529 : bigger weight according to target endianness. */
530 47344 : inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 47344 : size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 372157 : for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 : {
534 324813 : unsigned marker
535 324813 : = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 324813 : if (marker && marker != MARKER_BYTE_UNKNOWN)
537 123372 : toinc_n_ptr->n += inc;
538 : }
539 : }
540 : else
541 : {
542 7648 : n->range = n1->range;
543 7648 : n_start = n1;
544 7648 : source_stmt = source_stmt1;
545 : }
546 :
547 54992 : if (!n1->alias_set
548 54992 : || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 47312 : n->alias_set = n1->alias_set;
550 : else
551 7680 : n->alias_set = ptr_type_node;
552 54992 : n->vuse = n_start->vuse;
553 54992 : n->base_addr = n_start->base_addr;
554 54992 : n->offset = n_start->offset;
555 54992 : n->src = n_start->src;
556 54992 : n->bytepos = n_start->bytepos;
557 54992 : n->type = n_start->type;
558 54992 : size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 54992 : uint64_t res_n = n1->n | n2->n;
560 :
561 425926 : for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 : {
563 372181 : uint64_t masked1, masked2;
564 :
565 372181 : masked1 = n1->n & mask;
566 372181 : masked2 = n2->n & mask;
567 : /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 372181 : if (masked1 && masked2)
569 : {
570 : /* + can carry into upper bits, just punt. */
571 6413 : if (code == PLUS_EXPR)
572 : return NULL;
573 : /* x | x is still x. */
574 5166 : if (code == BIT_IOR_EXPR && masked1 == masked2)
575 220 : continue;
576 4946 : if (code == BIT_XOR_EXPR)
577 : {
578 : /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 : unknown values and unknown ^ unknown is unknown. */
580 926 : if (masked1 == masked2
581 196 : && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 134 : << i * BITS_PER_MARKER))
583 : {
584 62 : res_n &= ~mask;
585 62 : continue;
586 : }
587 : }
588 : /* Otherwise set the byte to unknown, it might still be
589 : later masked off. */
590 4884 : res_n |= mask;
591 : }
592 : }
593 53745 : n->n = res_n;
594 53745 : n->n_ops = n1->n_ops + n2->n_ops;
595 :
596 53745 : return source_stmt;
597 : }
598 :
599 : /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 : the operation given by the rhs of STMT on the result. If the operation
601 : could successfully be executed the function returns a gimple stmt whose
602 : rhs's first tree is the expression of the source operand and NULL
603 : otherwise. */
604 :
605 : gimple *
606 6210579 : find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607 : {
608 6210579 : enum tree_code code;
609 6210579 : tree rhs1, rhs2 = NULL;
610 6210579 : gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 6210579 : enum gimple_rhs_class rhs_class;
612 :
613 6210579 : if (!limit
614 6158954 : || !is_gimple_assign (stmt)
615 11437138 : || stmt_can_throw_internal (cfun, stmt))
616 991043 : return NULL;
617 :
618 5219536 : rhs1 = gimple_assign_rhs1 (stmt);
619 :
620 5219536 : if (find_bswap_or_nop_load (stmt, rhs1, n))
621 : return stmt;
622 :
623 : /* Handle BIT_FIELD_REF. */
624 4509595 : if (TREE_CODE (rhs1) == BIT_FIELD_REF
625 4509595 : && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
626 : {
627 11852 : if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
628 11852 : || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
629 : return NULL;
630 :
631 11852 : unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
632 11852 : unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
633 11852 : if (bitpos % BITS_PER_UNIT == 0
634 11852 : && bitsize % BITS_PER_UNIT == 0
635 23704 : && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
636 : {
637 : /* Handle big-endian bit numbering in BIT_FIELD_REF. */
638 546 : if (BYTES_BIG_ENDIAN)
639 : bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
640 :
641 : /* Shift. */
642 546 : if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
643 : return NULL;
644 :
645 : /* Mask. */
646 : uint64_t mask = 0;
647 1357 : for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; i++)
648 811 : mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 546 : n->n &= mask;
650 :
651 : /* Convert. */
652 546 : n->type = TREE_TYPE (rhs1);
653 546 : if (!verify_symbolic_number_p (n, stmt))
654 : return NULL;
655 :
656 534 : if (!n->base_addr)
657 534 : n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
658 :
659 534 : return stmt;
660 : }
661 :
662 11306 : return NULL;
663 : }
664 :
665 4497743 : if (TREE_CODE (rhs1) != SSA_NAME)
666 : return NULL;
667 :
668 4198457 : code = gimple_assign_rhs_code (stmt);
669 4198457 : rhs_class = gimple_assign_rhs_class (stmt);
670 4198457 : rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
671 :
672 4198457 : if (rhs_class == GIMPLE_BINARY_RHS)
673 3889263 : rhs2 = gimple_assign_rhs2 (stmt);
674 :
675 : /* Handle unary rhs and binary rhs with integer constants as second
676 : operand. */
677 :
678 4198457 : if (rhs_class == GIMPLE_UNARY_RHS
679 3892667 : || (rhs_class == GIMPLE_BINARY_RHS
680 3889263 : && TREE_CODE (rhs2) == INTEGER_CST))
681 : {
682 1963003 : if (code != BIT_AND_EXPR
683 1963003 : && code != LSHIFT_EXPR
684 : && code != RSHIFT_EXPR
685 1887821 : && code != LROTATE_EXPR
686 1832022 : && code != RROTATE_EXPR
687 1826367 : && !CONVERT_EXPR_CODE_P (code))
688 : return NULL;
689 :
690 402994 : source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
691 :
692 : /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
693 : we have to initialize the symbolic number. */
694 402994 : if (!source_stmt1)
695 : {
696 268054 : if (gimple_assign_load_p (stmt)
697 268054 : || !init_symbolic_number (n, rhs1))
698 18177 : return NULL;
699 : source_stmt1 = stmt;
700 : }
701 :
702 384817 : switch (code)
703 : {
704 40030 : case BIT_AND_EXPR:
705 40030 : {
706 40030 : int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
707 40030 : uint64_t val = int_cst_value (rhs2), mask = 0;
708 40030 : uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
709 :
710 : /* Only constants masking full bytes are allowed. */
711 226746 : for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
712 206116 : if ((val & tmp) != 0 && (val & tmp) != tmp)
713 : return NULL;
714 186716 : else if (val & tmp)
715 96078 : mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
716 :
717 20630 : n->n &= mask;
718 : }
719 20630 : break;
720 96114 : case LSHIFT_EXPR:
721 96114 : case RSHIFT_EXPR:
722 96114 : case LROTATE_EXPR:
723 96114 : case RROTATE_EXPR:
724 96114 : if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
725 : return NULL;
726 : break;
727 248673 : CASE_CONVERT:
728 248673 : {
729 248673 : int i, type_size, old_type_size;
730 248673 : tree type;
731 :
732 248673 : type = TREE_TYPE (gimple_assign_lhs (stmt));
733 248673 : type_size = TYPE_PRECISION (type);
734 248673 : if (type_size % BITS_PER_UNIT != 0)
735 : return NULL;
736 246018 : type_size /= BITS_PER_UNIT;
737 246018 : if (type_size > 64 / BITS_PER_MARKER)
738 : return NULL;
739 :
740 : /* Sign extension: result is dependent on the value. */
741 245465 : old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
742 347487 : if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
743 283070 : && HEAD_MARKER (n->n, old_type_size))
744 180516 : for (i = 0; i < type_size - old_type_size; i++)
745 143017 : n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
746 143017 : << ((type_size - 1 - i) * BITS_PER_MARKER);
747 :
748 245465 : if (type_size < 64 / BITS_PER_MARKER)
749 : {
750 : /* If STMT casts to a smaller type mask out the bits not
751 : belonging to the target type. */
752 121008 : n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
753 : }
754 245465 : n->type = type;
755 245465 : if (!n->base_addr)
756 164867 : n->range = type_size;
757 : }
758 : break;
759 : default:
760 : return NULL;
761 320702 : };
762 320702 : return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
763 : }
764 :
765 : /* Handle binary rhs. */
766 :
767 2232050 : if (rhs_class == GIMPLE_BINARY_RHS)
768 : {
769 2232050 : struct symbolic_number n1, n2;
770 2232050 : gimple *source_stmt, *source_stmt2;
771 :
772 2232050 : if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
773 : return NULL;
774 :
775 2173568 : rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
776 :
777 2173568 : switch (code)
778 : {
779 1939413 : case BIT_IOR_EXPR:
780 1939413 : case BIT_XOR_EXPR:
781 1939413 : case PLUS_EXPR:
782 1939413 : source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
783 :
784 1939413 : if (!source_stmt1)
785 : return NULL;
786 :
787 265263 : source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
788 :
789 265263 : if (!source_stmt2)
790 : return NULL;
791 :
792 135414 : if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
793 : return NULL;
794 :
795 135414 : if (n1.vuse != n2.vuse)
796 : return NULL;
797 :
798 108400 : source_stmt
799 108400 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
800 : code);
801 :
802 108400 : if (!source_stmt)
803 : return NULL;
804 :
805 18020 : if (!verify_symbolic_number_p (n, stmt))
806 : return NULL;
807 :
808 : break;
809 : default:
810 : return NULL;
811 : }
812 : return source_stmt;
813 : }
814 : return NULL;
815 : }
816 :
817 : /* Like find_bswap_or_nop_1, but also handles CONSTRUCTOR and
818 : VEC_PACK_TRUNC_EXPR. */
819 :
820 : gimple *
821 2103152 : find_bswap_or_nop_2 (gimple *stmt, struct symbolic_number *n, int limit)
822 : {
823 2103152 : if (gimple *ret = find_bswap_or_nop_1 (stmt, n, limit))
824 : return ret;
825 :
826 2095024 : if (!limit
827 2095024 : || !is_gimple_assign (stmt)
828 2095024 : || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
829 4190040 : || stmt_can_throw_internal (cfun, stmt))
830 42 : return NULL;
831 :
832 2094982 : tree rhs1 = gimple_assign_rhs1 (stmt);
833 2094982 : if (gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR
834 2094982 : && TREE_CODE (rhs1) == SSA_NAME)
835 : {
836 1187 : if (BYTES_BIG_ENDIAN)
837 : return NULL; /* For now... */
838 1187 : gimple *rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
839 1187 : tree rhs2 = gimple_assign_rhs2 (stmt);
840 1187 : if (TREE_CODE (rhs2) != SSA_NAME)
841 : return NULL;
842 1187 : gimple *rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
843 1187 : struct symbolic_number n1, n2;
844 :
845 1187 : gimple *source_stmt1 = find_bswap_or_nop_2 (rhs1_stmt, &n1, limit - 1);
846 1187 : if (!source_stmt1)
847 : return NULL;
848 59 : gimple *source_stmt2 = find_bswap_or_nop_2 (rhs2_stmt, &n2, limit - 1);
849 59 : if (!source_stmt2)
850 : return NULL;
851 :
852 59 : if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
853 : return NULL;
854 59 : if (n1.vuse != n2.vuse)
855 : return NULL;
856 59 : auto nn2 = n2.n;
857 59 : n2.n = 0;
858 : /* We need to take into account number of elements of each vector,
859 : which perform_symbolic_merge doesn't know. So, handle it as
860 : two separate BIT_IOR_EXPR merges, each time with one operand
861 : with changed mastk to all 0s, and then merge here. */
862 59 : gimple *source_stmt
863 59 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
864 : BIT_IOR_EXPR);
865 59 : if (!source_stmt)
866 : return NULL;
867 59 : n2.n = nn2;
868 59 : auto nn1 = n->n;
869 59 : n1.n = 0;
870 59 : gimple *source_stmt3
871 59 : = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
872 : BIT_IOR_EXPR);
873 59 : gcc_assert (source_stmt == source_stmt3);
874 59 : nn2 = n->n;
875 59 : tree lhs = gimple_assign_lhs (stmt);
876 59 : int eltsize
877 59 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) / BITS_PER_UNIT;
878 59 : int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs)).to_constant ();
879 59 : int size = eltsize * nelts;
880 59 : int hsize = size / 2;
881 59 : n->n = 0;
882 59 : if (!BYTES_BIG_ENDIAN)
883 407 : for (int i = 0; i < size; i++)
884 348 : n->n |= ((((i < hsize ? nn1 : nn2)
885 348 : >> (((i % hsize) / eltsize * 2 * eltsize
886 348 : + (i % eltsize))) * BITS_PER_MARKER) & MARKER_MASK)
887 348 : << (i * BITS_PER_MARKER));
888 : else
889 : gcc_unreachable ();
890 : return source_stmt;
891 : }
892 :
893 2093795 : if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
894 : return NULL;
895 :
896 25597 : tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
897 25597 : unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
898 25597 : if (sz != 16 && sz != 32 && sz != 64)
899 : return NULL;
900 2111975 : if (CONSTRUCTOR_NELTS (rhs1) == 0)
901 : return NULL;
902 21852 : tree eltype = TREE_TYPE (TREE_TYPE (rhs1));
903 21852 : unsigned HOST_WIDE_INT eltsz = int_size_in_bytes (eltype) * BITS_PER_UNIT;
904 21852 : if (TYPE_PRECISION (eltype) != eltsz)
905 : return NULL;
906 21712 : constructor_elt *elt;
907 21712 : unsigned int i;
908 21712 : tree type = build_nonstandard_integer_type (sz, 1);
909 21712 : gimple *ins_stmt = NULL;
910 38248 : FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs1), i, elt)
911 : {
912 34534 : if (TREE_CODE (elt->value) != SSA_NAME
913 34534 : || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
914 17998 : return NULL;
915 33906 : struct symbolic_number n1;
916 33906 : gimple *source_stmt
917 33906 : = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1, limit - 1);
918 :
919 33906 : if (!source_stmt)
920 : return NULL;
921 :
922 23604 : n1.type = type;
923 23604 : if (!n1.base_addr)
924 3707 : n1.range = sz / BITS_PER_UNIT;
925 :
926 23604 : if (i == 0)
927 : {
928 11336 : ins_stmt = source_stmt;
929 11336 : *n = n1;
930 : }
931 : else
932 : {
933 12268 : if (n->vuse != n1.vuse)
934 7068 : return NULL;
935 :
936 7479 : struct symbolic_number n0 = *n;
937 :
938 7479 : if (!BYTES_BIG_ENDIAN)
939 : {
940 7479 : if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
941 : return NULL;
942 : }
943 : else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
944 : return NULL;
945 7479 : ins_stmt
946 7479 : = perform_symbolic_merge (ins_stmt, &n0, source_stmt,
947 : &n1, n, BIT_IOR_EXPR);
948 :
949 7479 : if (!ins_stmt)
950 : return NULL;
951 : }
952 : }
953 : return ins_stmt;
954 : }
955 :
956 : /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
957 : *CMPXCHG, *CMPNOP and adjust *N. */
958 :
959 : void
960 35627 : find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
961 : uint64_t *cmpnop, bool *cast64_to_32)
962 : {
963 35627 : unsigned rsize;
964 35627 : uint64_t tmpn, mask;
965 :
966 : /* The number which the find_bswap_or_nop_1 result should match in order
967 : to have a full byte swap. The number is shifted to the right
968 : according to the size of the symbolic number before using it. */
969 35627 : *cmpxchg = CMPXCHG;
970 35627 : *cmpnop = CMPNOP;
971 35627 : *cast64_to_32 = false;
972 :
973 : /* Find real size of result (highest non-zero byte). */
974 35627 : if (n->base_addr)
975 253437 : for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
976 : else
977 1818 : rsize = n->range;
978 :
979 : /* Zero out the bits corresponding to untouched bytes in original gimple
980 : expression. */
981 35627 : if (n->range < (int) sizeof (int64_t))
982 : {
983 11446 : mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
984 11446 : if (n->base_addr == NULL
985 999 : && n->range == 4
986 12160 : && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
987 : {
988 : /* If all bytes in n->n are either 0 or in [5..8] range, this
989 : might be a candidate for (unsigned) __builtin_bswap64 (src).
990 : It is not worth it for (unsigned short) __builtin_bswap64 (src)
991 : or (unsigned short) __builtin_bswap32 (src). */
992 129 : *cast64_to_32 = true;
993 269 : for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
994 232 : if ((tmpn & MARKER_MASK)
995 232 : && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
996 : {
997 92 : *cast64_to_32 = false;
998 92 : break;
999 : }
1000 : }
1001 11446 : if (*cast64_to_32)
1002 37 : *cmpxchg &= mask;
1003 : else
1004 11409 : *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
1005 11446 : *cmpnop &= mask;
1006 : }
1007 :
1008 : /* Zero out the bits corresponding to unused bytes in the result of the
1009 : gimple expression. */
1010 35627 : if (rsize < n->range)
1011 : {
1012 2552 : if (BYTES_BIG_ENDIAN)
1013 : {
1014 : mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
1015 : *cmpxchg &= mask;
1016 : if (n->range - rsize == sizeof (int64_t))
1017 : *cmpnop = 0;
1018 : else
1019 : *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
1020 : }
1021 : else
1022 : {
1023 2552 : mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
1024 2552 : if (n->range - rsize == sizeof (int64_t))
1025 6 : *cmpxchg = 0;
1026 : else
1027 2546 : *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
1028 2552 : *cmpnop &= mask;
1029 : }
1030 2552 : n->range = rsize;
1031 : }
1032 :
1033 35627 : if (*cast64_to_32)
1034 37 : n->range = 8;
1035 35627 : n->range *= BITS_PER_UNIT;
1036 35627 : }
1037 :
1038 : /* Helper function for find_bswap_or_nop,
1039 : Return true if N is a swap or nop with MASK. */
1040 : static bool
1041 13110 : is_bswap_or_nop_p (uint64_t n, uint64_t cmpxchg,
1042 : uint64_t cmpnop, uint64_t* mask,
1043 : bool* bswap)
1044 : {
1045 13110 : *mask = ~(uint64_t) 0;
1046 13110 : if (n == cmpnop)
1047 5219 : *bswap = false;
1048 7891 : else if (n == cmpxchg)
1049 2282 : *bswap = true;
1050 : else
1051 : {
1052 : int set = 0;
1053 12851 : for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1054 12277 : if ((n & msk) == 0)
1055 4732 : *mask &= ~msk;
1056 7545 : else if ((n & msk) == (cmpxchg & msk))
1057 2510 : set++;
1058 : else
1059 : return false;
1060 :
1061 574 : if (set < 2)
1062 : return false;
1063 573 : *bswap = true;
1064 : }
1065 : return true;
1066 : }
1067 :
1068 :
1069 : /* Check if STMT completes a bswap implementation or a read in a given
1070 : endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
1071 : accordingly. It also sets N to represent the kind of operations
1072 : performed: size of the resulting expression and whether it works on
1073 : a memory source, and if so alias-set and vuse. At last, the
1074 : function returns a stmt whose rhs's first tree is the source
1075 : expression. */
1076 :
1077 : gimple *
1078 2101906 : find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
1079 : bool *cast64_to_32, uint64_t *mask, uint64_t* l_rotate)
1080 : {
1081 2101906 : tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
1082 2101906 : if (!tree_fits_uhwi_p (type_size))
1083 : return NULL;
1084 :
1085 : /* The last parameter determines the depth search limit. It usually
1086 : correlates directly to the number n of bytes to be touched. We
1087 : increase that number by 2 * (log2(n) + 1) here in order to also
1088 : cover signed -> unsigned conversions of the src operand as can be seen
1089 : in libgcc, and for initial shift/and operation of the src operand. */
1090 2101906 : int limit = tree_to_uhwi (type_size);
1091 2101906 : limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
1092 2101906 : gimple *ins_stmt = find_bswap_or_nop_2 (stmt, n, limit);
1093 2101906 : if (!ins_stmt)
1094 : return NULL;
1095 :
1096 11783 : uint64_t cmpxchg, cmpnop;
1097 11783 : uint64_t orig_range = n->range * BITS_PER_UNIT;
1098 11783 : find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
1099 :
1100 : /* A complete byte swap should make the symbolic number to start with
1101 : the largest digit in the highest order byte. Unchanged symbolic
1102 : number indicates a read with same endianness as target architecture. */
1103 11783 : *l_rotate = 0;
1104 11783 : uint64_t tmp_n = n->n;
1105 11783 : if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
1106 : {
1107 : /* Try bswap + lrotate. */
1108 : /* TODO, handle cast64_to_32 and big/litte_endian memory
1109 : source when rsize < range. */
1110 3831 : if (n->range == orig_range
1111 : /* There're case like 0x300000200 for uint32->uint64 cast,
1112 : Don't handle this. */
1113 2928 : && n->range == TYPE_PRECISION (n->type)
1114 1673 : && ((orig_range == 32
1115 576 : && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
1116 1097 : || (orig_range == 64
1117 1078 : && optab_handler (rotl_optab, DImode) != CODE_FOR_nothing))
1118 5485 : && (tmp_n & MARKER_MASK) < orig_range / BITS_PER_UNIT)
1119 : {
1120 1331 : uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER;
1121 1331 : uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER;
1122 : /* .i.e. handle 0x203040506070800 when lower byte is zero. */
1123 1331 : if (!count)
1124 : {
1125 60 : for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++)
1126 : {
1127 60 : count = (tmp_n >> i * BITS_PER_MARKER) & MARKER_MASK;
1128 60 : if (count)
1129 : {
1130 : /* Count should be meaningful not 0xff. */
1131 31 : if (count <= range / BITS_PER_MARKER)
1132 : {
1133 31 : count = (count + i) * BITS_PER_MARKER % range;
1134 31 : if (!count)
1135 : return NULL;
1136 : break;
1137 : }
1138 : else
1139 : return NULL;
1140 : }
1141 : }
1142 : }
1143 1327 : tmp_n = tmp_n >> count | tmp_n << (range - count);
1144 1327 : if (orig_range == 32)
1145 366 : tmp_n &= (1ULL << 32) - 1;
1146 1327 : if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
1147 : return NULL;
1148 122 : *l_rotate = count / BITS_PER_MARKER * BITS_PER_UNIT;
1149 122 : gcc_assert (*bswap);
1150 : }
1151 : else
1152 2500 : return NULL;
1153 : }
1154 :
1155 : /* Useless bit manipulation performed by code. */
1156 8074 : if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1157 : return NULL;
1158 :
1159 : return ins_stmt;
1160 : }
1161 :
1162 : const pass_data pass_data_optimize_bswap =
1163 : {
1164 : GIMPLE_PASS, /* type */
1165 : "bswap", /* name */
1166 : OPTGROUP_NONE, /* optinfo_flags */
1167 : TV_NONE, /* tv_id */
1168 : PROP_ssa, /* properties_required */
1169 : 0, /* properties_provided */
1170 : 0, /* properties_destroyed */
1171 : 0, /* todo_flags_start */
1172 : 0, /* todo_flags_finish */
1173 : };
1174 :
1175 : class pass_optimize_bswap : public gimple_opt_pass
1176 : {
1177 : public:
1178 285722 : pass_optimize_bswap (gcc::context *ctxt)
1179 571444 : : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1180 : {}
1181 :
1182 : /* opt_pass methods: */
1183 1041484 : bool gate (function *) final override
1184 : {
1185 1041484 : return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1186 : }
1187 :
1188 : unsigned int execute (function *) final override;
1189 :
1190 : }; // class pass_optimize_bswap
1191 :
1192 : /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1193 : VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1194 : first. */
1195 :
1196 : static tree
1197 2250 : bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1198 : bool before)
1199 : {
1200 2250 : gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1201 : || POINTER_TYPE_P (TREE_TYPE (val)));
1202 2250 : if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1203 : {
1204 1 : HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1205 1 : if (POINTER_TYPE_P (TREE_TYPE (val)))
1206 : {
1207 0 : gimple *g
1208 0 : = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1209 : NOP_EXPR, val);
1210 0 : if (before)
1211 0 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
1212 : else
1213 0 : gsi_insert_after (gsi, g, GSI_NEW_STMT);
1214 0 : val = gimple_assign_lhs (g);
1215 : }
1216 1 : tree itype = build_nonstandard_integer_type (prec, 1);
1217 1 : gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1218 1 : if (before)
1219 0 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
1220 : else
1221 1 : gsi_insert_after (gsi, g, GSI_NEW_STMT);
1222 1 : val = gimple_assign_lhs (g);
1223 : }
1224 2250 : return build1 (VIEW_CONVERT_EXPR, type, val);
1225 : }
1226 :
1227 : /* Perform the bswap optimization: replace the expression computed in the rhs
1228 : of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1229 : bswap, load or load + bswap expression.
1230 : Which of these alternatives replace the rhs is given by N->base_addr (non
1231 : null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1232 : load to perform are also given in N while the builtin bswap invoke is given
1233 : in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1234 : load statements involved to construct the rhs in gsi_stmt (GSI) and
1235 : N->range gives the size of the rhs expression for maintaining some
1236 : statistics.
1237 :
1238 : Note that if the replacement involve a load and if gsi_stmt (GSI) is
1239 : non-NULL, that stmt is moved just after INS_STMT to do the load with the
1240 : same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1241 :
1242 : tree
1243 6581 : bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1244 : tree bswap_type, tree load_type, struct symbolic_number *n,
1245 : bool bswap, uint64_t mask, uint64_t l_rotate)
1246 : {
1247 6581 : tree src, tmp, tgt = NULL_TREE;
1248 6581 : gimple *bswap_stmt, *mask_stmt = NULL, *rotl_stmt = NULL;
1249 6581 : tree_code conv_code = NOP_EXPR;
1250 :
1251 6581 : gimple *cur_stmt = gsi_stmt (gsi);
1252 6581 : src = n->src;
1253 6581 : if (cur_stmt)
1254 : {
1255 5747 : tgt = gimple_assign_lhs (cur_stmt);
1256 5747 : if ((gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1257 3528 : || gimple_assign_rhs_code (cur_stmt) == VEC_PACK_TRUNC_EXPR)
1258 2250 : && tgt
1259 7997 : && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1260 : conv_code = VIEW_CONVERT_EXPR;
1261 : }
1262 :
1263 : /* Need to load the value from memory first. */
1264 6581 : if (n->base_addr)
1265 : {
1266 5872 : gimple_stmt_iterator gsi_ins = gsi;
1267 5872 : if (ins_stmt)
1268 5799 : gsi_ins = gsi_for_stmt (ins_stmt);
1269 5872 : tree addr_expr, addr_tmp, val_expr, val_tmp;
1270 5872 : tree load_offset_ptr, aligned_load_type;
1271 5872 : gimple *load_stmt;
1272 5872 : unsigned align = get_object_alignment (src);
1273 5872 : poly_int64 load_offset = 0;
1274 :
1275 5872 : if (cur_stmt)
1276 : {
1277 5451 : basic_block ins_bb = gimple_bb (ins_stmt);
1278 5451 : basic_block cur_bb = gimple_bb (cur_stmt);
1279 5451 : if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1280 4117 : return NULL_TREE;
1281 :
1282 : /* Move cur_stmt just before one of the load of the original
1283 : to ensure it has the same VUSE. See PR61517 for what could
1284 : go wrong. */
1285 5451 : if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1286 1017 : reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1287 5451 : gsi_move_before (&gsi, &gsi_ins);
1288 5451 : gsi = gsi_for_stmt (cur_stmt);
1289 : }
1290 : else
1291 421 : gsi = gsi_ins;
1292 :
1293 : /* Compute address to load from and cast according to the size
1294 : of the load. */
1295 5872 : addr_expr = build_fold_addr_expr (src);
1296 5872 : if (is_gimple_mem_ref_addr (addr_expr))
1297 460 : addr_tmp = unshare_expr (addr_expr);
1298 : else
1299 : {
1300 5412 : addr_tmp = unshare_expr (n->base_addr);
1301 5412 : if (!is_gimple_mem_ref_addr (addr_tmp))
1302 0 : addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1303 : is_gimple_mem_ref_addr,
1304 : NULL_TREE, true,
1305 : GSI_SAME_STMT);
1306 5412 : load_offset = n->bytepos;
1307 5412 : if (n->offset)
1308 : {
1309 0 : tree off
1310 0 : = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1311 : true, NULL_TREE, true,
1312 : GSI_SAME_STMT);
1313 0 : gimple *stmt
1314 0 : = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1315 : POINTER_PLUS_EXPR, addr_tmp, off);
1316 0 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1317 0 : addr_tmp = gimple_assign_lhs (stmt);
1318 : }
1319 : }
1320 :
1321 : /* Perform the load. */
1322 5872 : aligned_load_type = load_type;
1323 5872 : if (align < TYPE_ALIGN (load_type))
1324 5135 : aligned_load_type = build_aligned_type (load_type, align);
1325 5872 : load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1326 5872 : val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1327 : load_offset_ptr);
1328 :
1329 5872 : if (!bswap)
1330 : {
1331 4117 : if (n->range == 16)
1332 1184 : nop_stats.found_16bit++;
1333 2933 : else if (n->range == 32)
1334 623 : nop_stats.found_32bit++;
1335 : else
1336 : {
1337 2310 : gcc_assert (n->range == 64);
1338 2310 : nop_stats.found_64bit++;
1339 : }
1340 :
1341 : /* Convert the result of load if necessary. */
1342 4117 : if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1343 : {
1344 3586 : val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1345 : "load_dst");
1346 3586 : load_stmt = gimple_build_assign (val_tmp, val_expr);
1347 3586 : gimple_set_vuse (load_stmt, n->vuse);
1348 3586 : gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1349 3586 : if (conv_code == VIEW_CONVERT_EXPR)
1350 2110 : val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1351 : true);
1352 3586 : gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1353 3586 : update_stmt (cur_stmt);
1354 : }
1355 531 : else if (cur_stmt)
1356 : {
1357 444 : gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1358 444 : gimple_set_vuse (cur_stmt, n->vuse);
1359 444 : update_stmt (cur_stmt);
1360 : }
1361 : else
1362 : {
1363 87 : tgt = make_ssa_name (load_type);
1364 87 : cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1365 87 : gimple_set_vuse (cur_stmt, n->vuse);
1366 87 : gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1367 : }
1368 :
1369 4117 : if (dump_file)
1370 : {
1371 31 : fprintf (dump_file,
1372 : "%d bit load in target endianness found at: ",
1373 31 : (int) n->range);
1374 31 : print_gimple_stmt (dump_file, cur_stmt, 0);
1375 : }
1376 4117 : return tgt;
1377 : }
1378 : else
1379 : {
1380 1755 : val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1381 1755 : load_stmt = gimple_build_assign (val_tmp, val_expr);
1382 1755 : gimple_set_vuse (load_stmt, n->vuse);
1383 1755 : gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1384 : }
1385 1755 : src = val_tmp;
1386 : }
1387 709 : else if (!bswap)
1388 : {
1389 249 : gimple *g = NULL;
1390 249 : if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1391 : {
1392 2 : if (!is_gimple_val (src))
1393 : return NULL_TREE;
1394 2 : if (conv_code == VIEW_CONVERT_EXPR)
1395 2 : src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1396 2 : g = gimple_build_assign (tgt, conv_code, src);
1397 : }
1398 247 : else if (cur_stmt)
1399 7 : g = gimple_build_assign (tgt, src);
1400 : else
1401 : tgt = src;
1402 249 : if (n->range == 16)
1403 67 : nop_stats.found_16bit++;
1404 182 : else if (n->range == 32)
1405 100 : nop_stats.found_32bit++;
1406 : else
1407 : {
1408 82 : gcc_assert (n->range == 64);
1409 82 : nop_stats.found_64bit++;
1410 : }
1411 249 : if (dump_file)
1412 : {
1413 1 : fprintf (dump_file,
1414 : "%d bit reshuffle in target endianness found at: ",
1415 : (int) n->range);
1416 1 : if (cur_stmt)
1417 0 : print_gimple_stmt (dump_file, cur_stmt, 0);
1418 : else
1419 : {
1420 1 : print_generic_expr (dump_file, tgt, TDF_NONE);
1421 1 : fprintf (dump_file, "\n");
1422 : }
1423 : }
1424 249 : if (cur_stmt)
1425 9 : gsi_replace (&gsi, g, true);
1426 249 : return tgt;
1427 : }
1428 460 : else if (TREE_CODE (src) == BIT_FIELD_REF)
1429 0 : src = TREE_OPERAND (src, 0);
1430 :
1431 2215 : if (n->range == 16)
1432 993 : bswap_stats.found_16bit++;
1433 1222 : else if (n->range == 32)
1434 811 : bswap_stats.found_32bit++;
1435 : else
1436 : {
1437 411 : gcc_assert (n->range == 64);
1438 411 : bswap_stats.found_64bit++;
1439 : }
1440 :
1441 2215 : tmp = src;
1442 :
1443 : /* Convert the src expression if necessary. */
1444 2215 : if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1445 : {
1446 166 : gimple *convert_stmt;
1447 :
1448 166 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1449 166 : convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1450 166 : gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1451 : }
1452 :
1453 : /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1454 : are considered as rotation of 2N bit values by N bits is generally not
1455 : equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1456 : gives 0x03040102 while a bswap for that value is 0x04030201. */
1457 2215 : if (bswap && n->range == 16)
1458 : {
1459 993 : tree count = build_int_cst (NULL, BITS_PER_UNIT);
1460 993 : src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1461 993 : bswap_stmt = gimple_build_assign (NULL, src);
1462 993 : }
1463 : else
1464 1222 : bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1465 :
1466 2215 : if (tgt == NULL_TREE)
1467 507 : tgt = make_ssa_name (bswap_type);
1468 2215 : tmp = tgt;
1469 :
1470 2215 : if (mask != ~(uint64_t) 0)
1471 : {
1472 507 : tree m = build_int_cst (bswap_type, mask);
1473 507 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1474 507 : gimple_set_lhs (bswap_stmt, tmp);
1475 507 : mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1476 507 : tmp = tgt;
1477 : }
1478 :
1479 2215 : if (l_rotate)
1480 : {
1481 122 : tree m = build_int_cst (bswap_type, l_rotate);
1482 177 : tmp = make_temp_ssa_name (bswap_type, NULL,
1483 : mask_stmt ? "bswapmaskdst" : "bswapdst");
1484 177 : gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1485 122 : rotl_stmt = gimple_build_assign (tgt, LROTATE_EXPR, tmp, m);
1486 122 : tmp = tgt;
1487 : }
1488 :
1489 : /* Convert the result if necessary. */
1490 2215 : if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1491 : {
1492 756 : tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1493 756 : tree atmp = tmp;
1494 756 : gimple_stmt_iterator gsi2 = gsi;
1495 756 : if (conv_code == VIEW_CONVERT_EXPR)
1496 138 : atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1497 756 : gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1498 756 : gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1499 : }
1500 :
1501 4308 : gimple_set_lhs (rotl_stmt ? rotl_stmt
1502 2093 : : mask_stmt ? mask_stmt : bswap_stmt, tmp);
1503 :
1504 2215 : if (dump_file)
1505 : {
1506 37 : fprintf (dump_file, "%d bit bswap implementation found at: ",
1507 37 : (int) n->range);
1508 37 : if (cur_stmt)
1509 28 : print_gimple_stmt (dump_file, cur_stmt, 0);
1510 : else
1511 : {
1512 9 : print_generic_expr (dump_file, tgt, TDF_NONE);
1513 9 : fprintf (dump_file, "\n");
1514 : }
1515 : }
1516 :
1517 2215 : if (cur_stmt)
1518 : {
1519 1708 : if (rotl_stmt)
1520 122 : gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
1521 1708 : if (mask_stmt)
1522 507 : gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1523 1708 : gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1524 1708 : gsi_remove (&gsi, true);
1525 : }
1526 : else
1527 : {
1528 507 : gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1529 507 : if (mask_stmt)
1530 0 : gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1531 507 : if (rotl_stmt)
1532 0 : gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
1533 : }
1534 : return tgt;
1535 : }
1536 :
1537 : /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR/VEC_PACK_TRUNC_EXPR
1538 : on the rhs using bswap optimizations. CDI_DOMINATORS need to be
1539 : computed on entry. Return true if it has been optimized and
1540 : TODO_update_ssa is needed. */
1541 :
1542 : static bool
1543 1019504 : maybe_optimize_vector_constructor (gimple *cur_stmt)
1544 : {
1545 1019504 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1546 1019504 : struct symbolic_number n;
1547 1019504 : bool bswap;
1548 :
1549 1019504 : gcc_assert (is_gimple_assign (cur_stmt));
1550 1019504 : switch (gimple_assign_rhs_code (cur_stmt))
1551 : {
1552 1019504 : case CONSTRUCTOR:
1553 1019504 : case VEC_PACK_TRUNC_EXPR:
1554 1019504 : break;
1555 0 : default:
1556 0 : gcc_unreachable ();
1557 : }
1558 :
1559 1019504 : tree rhs = gimple_assign_rhs1 (cur_stmt);
1560 1019504 : if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1561 88975 : || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1562 1105264 : || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1563 : return false;
1564 :
1565 85760 : HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1566 85760 : switch (sz)
1567 : {
1568 929 : case 16:
1569 929 : load_type = bswap_type = uint16_type_node;
1570 929 : break;
1571 656 : case 32:
1572 656 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1573 1306 : && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1574 : {
1575 650 : load_type = uint32_type_node;
1576 650 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1577 650 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1578 : }
1579 : else
1580 6 : return false;
1581 650 : break;
1582 21785 : case 64:
1583 21785 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1584 42625 : && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1585 17570 : || (word_mode == SImode
1586 17570 : && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1587 17570 : && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1588 : {
1589 20840 : load_type = uint64_type_node;
1590 20840 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1591 20840 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1592 : }
1593 : else
1594 945 : return false;
1595 20840 : break;
1596 : default:
1597 : return false;
1598 : }
1599 :
1600 22419 : bool cast64_to_32;
1601 22419 : uint64_t mask, l_rotate;
1602 22419 : gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1603 : &cast64_to_32, &mask, &l_rotate);
1604 22419 : if (!ins_stmt
1605 2215 : || n.range != (unsigned HOST_WIDE_INT) sz
1606 2215 : || cast64_to_32
1607 2215 : || mask != ~(uint64_t) 0)
1608 : return false;
1609 :
1610 2215 : if (bswap && !fndecl && n.range != 16)
1611 : return false;
1612 :
1613 2215 : memset (&nop_stats, 0, sizeof (nop_stats));
1614 2215 : memset (&bswap_stats, 0, sizeof (bswap_stats));
1615 2215 : return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1616 : bswap_type, load_type, &n, bswap, mask,
1617 2215 : l_rotate) != NULL_TREE;
1618 : }
1619 :
1620 : /* Find manual byte swap implementations as well as load in a given
1621 : endianness. Byte swaps are turned into a bswap builtin invocation
1622 : while endian loads are converted to bswap builtin invocation or
1623 : simple load according to the target endianness. */
1624 :
1625 : unsigned int
1626 964161 : pass_optimize_bswap::execute (function *fun)
1627 : {
1628 964161 : basic_block bb;
1629 964161 : bool bswap32_p, bswap64_p;
1630 964161 : bool changed = false;
1631 964161 : tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1632 :
1633 964161 : bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1634 1831080 : && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1635 964161 : bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1636 1831080 : && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1637 120897 : || (bswap32_p && word_mode == SImode)));
1638 :
1639 : /* Determine the argument type of the builtins. The code later on
1640 : assumes that the return and argument type are the same. */
1641 964161 : if (bswap32_p)
1642 : {
1643 866919 : tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1644 866919 : bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1645 : }
1646 :
1647 964161 : if (bswap64_p)
1648 : {
1649 866919 : tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1650 866919 : bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1651 : }
1652 :
1653 964161 : memset (&nop_stats, 0, sizeof (nop_stats));
1654 964161 : memset (&bswap_stats, 0, sizeof (bswap_stats));
1655 964161 : calculate_dominance_info (CDI_DOMINATORS);
1656 :
1657 9867877 : FOR_EACH_BB_FN (bb, fun)
1658 : {
1659 8903716 : gimple_stmt_iterator gsi;
1660 :
1661 : /* We do a reverse scan for bswap patterns to make sure we get the
1662 : widest match. As bswap pattern matching doesn't handle previously
1663 : inserted smaller bswap replacements as sub-patterns, the wider
1664 : variant wouldn't be detected. */
1665 97430377 : for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1666 : {
1667 79622945 : gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1668 79622945 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1669 79622945 : enum tree_code code;
1670 79622945 : struct symbolic_number n;
1671 79622945 : bool bswap, cast64_to_32;
1672 79622945 : uint64_t mask, l_rotate;
1673 :
1674 : /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1675 : might be moved to a different basic block by bswap_replace and gsi
1676 : must not points to it if that's the case. Moving the gsi_prev
1677 : there make sure that gsi points to the statement previous to
1678 : cur_stmt while still making sure that all statements are
1679 : considered in this basic block. */
1680 79622945 : gsi_prev (&gsi);
1681 :
1682 79622945 : if (!is_gimple_assign (cur_stmt))
1683 79619413 : continue;
1684 :
1685 20341004 : code = gimple_assign_rhs_code (cur_stmt);
1686 20341004 : switch (code)
1687 : {
1688 6602 : case LROTATE_EXPR:
1689 6602 : case RROTATE_EXPR:
1690 6602 : if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1691 6602 : || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1692 4066 : % BITS_PER_UNIT)
1693 5557 : continue;
1694 : /* Fall through. */
1695 : case BIT_IOR_EXPR:
1696 : case BIT_XOR_EXPR:
1697 : case PLUS_EXPR:
1698 : break;
1699 1254658 : case CONSTRUCTOR:
1700 1254658 : case VEC_PACK_TRUNC_EXPR:
1701 1254658 : {
1702 1254658 : tree rhs = gimple_assign_rhs1 (cur_stmt);
1703 1254658 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1704 1254658 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1705 : break;
1706 : }
1707 1250755 : continue;
1708 17005205 : default:
1709 17005205 : continue;
1710 18255960 : }
1711 :
1712 2079487 : ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1713 : &cast64_to_32, &mask, &l_rotate);
1714 :
1715 2079487 : if (!ins_stmt)
1716 2073628 : continue;
1717 :
1718 5859 : switch (n.range)
1719 : {
1720 2286 : case 16:
1721 : /* Already in canonical form, nothing to do. */
1722 2286 : if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1723 625 : continue;
1724 1661 : load_type = bswap_type = uint16_type_node;
1725 1661 : break;
1726 1276 : case 32:
1727 1276 : load_type = uint32_type_node;
1728 1276 : if (bswap32_p)
1729 : {
1730 1276 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1731 1276 : bswap_type = bswap32_type;
1732 : }
1733 : break;
1734 595 : case 64:
1735 595 : load_type = uint64_type_node;
1736 595 : if (bswap64_p)
1737 : {
1738 595 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1739 595 : bswap_type = bswap64_type;
1740 : }
1741 : break;
1742 1702 : default:
1743 1702 : continue;
1744 : }
1745 :
1746 3532 : if (bswap && !fndecl && n.range != 16)
1747 0 : continue;
1748 :
1749 3532 : if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1750 : bswap_type, load_type, &n, bswap, mask,
1751 : l_rotate))
1752 3532 : changed = true;
1753 : }
1754 : }
1755 :
1756 964161 : statistics_counter_event (fun, "16-bit nop implementations found",
1757 : nop_stats.found_16bit);
1758 964161 : statistics_counter_event (fun, "32-bit nop implementations found",
1759 : nop_stats.found_32bit);
1760 964161 : statistics_counter_event (fun, "64-bit nop implementations found",
1761 : nop_stats.found_64bit);
1762 964161 : statistics_counter_event (fun, "16-bit bswap implementations found",
1763 : bswap_stats.found_16bit);
1764 964161 : statistics_counter_event (fun, "32-bit bswap implementations found",
1765 : bswap_stats.found_32bit);
1766 964161 : statistics_counter_event (fun, "64-bit bswap implementations found",
1767 : bswap_stats.found_64bit);
1768 :
1769 964161 : return (changed ? TODO_update_ssa : 0);
1770 : }
1771 :
1772 : } // anon namespace
1773 :
1774 : gimple_opt_pass *
1775 285722 : make_pass_optimize_bswap (gcc::context *ctxt)
1776 : {
1777 285722 : return new pass_optimize_bswap (ctxt);
1778 : }
1779 :
1780 : namespace {
1781 :
1782 : /* Struct recording one operand for the store, which is either a constant,
1783 : then VAL represents the constant and all the other fields are zero, or
1784 : a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1785 : and the other fields also reflect the memory load, or an SSA name, then
1786 : VAL represents the SSA name and all the other fields are zero. */
1787 :
1788 : class store_operand_info
1789 : {
1790 : public:
1791 : tree val;
1792 : tree base_addr;
1793 : poly_uint64 bitsize;
1794 : poly_uint64 bitpos;
1795 : poly_uint64 bitregion_start;
1796 : poly_uint64 bitregion_end;
1797 : gimple *stmt;
1798 : bool bit_not_p;
1799 : store_operand_info ();
1800 : };
1801 :
1802 10082550 : store_operand_info::store_operand_info ()
1803 10082550 : : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1804 10082550 : bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1805 : {
1806 0 : }
1807 :
1808 : /* Struct recording the information about a single store of an immediate
1809 : to memory. These are created in the first phase and coalesced into
1810 : merged_store_group objects in the second phase. */
1811 :
1812 : class store_immediate_info
1813 : {
1814 : public:
1815 : unsigned HOST_WIDE_INT bitsize;
1816 : unsigned HOST_WIDE_INT bitpos;
1817 : unsigned HOST_WIDE_INT bitregion_start;
1818 : /* This is one past the last bit of the bit region. */
1819 : unsigned HOST_WIDE_INT bitregion_end;
1820 : gimple *stmt;
1821 : unsigned int order;
1822 : /* INTEGER_CST for constant store, STRING_CST for string store,
1823 : MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1824 : BIT_INSERT_EXPR for bit insertion.
1825 : LROTATE_EXPR if it can be only bswap optimized and
1826 : ops are not really meaningful.
1827 : NOP_EXPR if bswap optimization detected identity, ops
1828 : are not meaningful. */
1829 : enum tree_code rhs_code;
1830 : /* Two fields for bswap optimization purposes. */
1831 : struct symbolic_number n;
1832 : gimple *ins_stmt;
1833 : /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1834 : bool bit_not_p;
1835 : /* True if ops have been swapped and thus ops[1] represents
1836 : rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1837 : bool ops_swapped_p;
1838 : /* The index number of the landing pad, or 0 if there is none. */
1839 : int lp_nr;
1840 : /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1841 : just the first one. */
1842 : store_operand_info ops[2];
1843 : store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1844 : unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1845 : gimple *, unsigned int, enum tree_code,
1846 : struct symbolic_number &, gimple *, bool, int,
1847 : const store_operand_info &,
1848 : const store_operand_info &);
1849 : };
1850 :
1851 3031543 : store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1852 : unsigned HOST_WIDE_INT bp,
1853 : unsigned HOST_WIDE_INT brs,
1854 : unsigned HOST_WIDE_INT bre,
1855 : gimple *st,
1856 : unsigned int ord,
1857 : enum tree_code rhscode,
1858 : struct symbolic_number &nr,
1859 : gimple *ins_stmtp,
1860 : bool bitnotp,
1861 : int nr2,
1862 : const store_operand_info &op0r,
1863 3031543 : const store_operand_info &op1r)
1864 3031543 : : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1865 3031543 : stmt (st), order (ord), rhs_code (rhscode), n (nr),
1866 3031543 : ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1867 3031543 : lp_nr (nr2), ops { op0r, op1r }
1868 : {
1869 0 : }
1870 :
1871 : /* Struct representing a group of stores to contiguous memory locations.
1872 : These are produced by the second phase (coalescing) and consumed in the
1873 : third phase that outputs the widened stores. */
1874 :
1875 : class merged_store_group
1876 : {
1877 : public:
1878 : unsigned HOST_WIDE_INT start;
1879 : unsigned HOST_WIDE_INT width;
1880 : unsigned HOST_WIDE_INT bitregion_start;
1881 : unsigned HOST_WIDE_INT bitregion_end;
1882 : /* The size of the allocated memory for val and mask. */
1883 : unsigned HOST_WIDE_INT buf_size;
1884 : unsigned HOST_WIDE_INT align_base;
1885 : poly_uint64 load_align_base[2];
1886 :
1887 : unsigned int align;
1888 : unsigned int load_align[2];
1889 : unsigned int first_order;
1890 : unsigned int last_order;
1891 : bool bit_insertion;
1892 : bool string_concatenation;
1893 : bool only_constants;
1894 : bool consecutive;
1895 : unsigned int first_nonmergeable_order;
1896 : int lp_nr;
1897 :
1898 : auto_vec<store_immediate_info *> stores;
1899 : /* We record the first and last original statements in the sequence because
1900 : we'll need their vuse/vdef and replacement position. It's easier to keep
1901 : track of them separately as 'stores' is reordered by apply_stores. */
1902 : gimple *last_stmt;
1903 : gimple *first_stmt;
1904 : unsigned char *val;
1905 : unsigned char *mask;
1906 :
1907 : merged_store_group (store_immediate_info *);
1908 : ~merged_store_group ();
1909 : bool can_be_merged_into (store_immediate_info *);
1910 : void merge_into (store_immediate_info *);
1911 : void merge_overlapping (store_immediate_info *);
1912 : bool apply_stores ();
1913 : private:
1914 : void do_merge (store_immediate_info *);
1915 : };
1916 :
1917 : /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1918 :
1919 : static void
1920 444 : dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1921 : {
1922 444 : if (!fd)
1923 : return;
1924 :
1925 22044 : for (unsigned int i = 0; i < len; i++)
1926 21600 : fprintf (fd, "%02x ", ptr[i]);
1927 444 : fprintf (fd, "\n");
1928 : }
1929 :
1930 : /* Clear out LEN bits starting from bit START in the byte array
1931 : PTR. This clears the bits to the *right* from START.
1932 : START must be within [0, BITS_PER_UNIT) and counts starting from
1933 : the least significant bit. */
1934 :
1935 : static void
1936 12 : clear_bit_region_be (unsigned char *ptr, unsigned int start,
1937 : unsigned int len)
1938 : {
1939 20 : if (len == 0)
1940 : return;
1941 : /* Clear len bits to the right of start. */
1942 20 : else if (len <= start + 1)
1943 : {
1944 8 : unsigned char mask = (~(~0U << len));
1945 8 : mask = mask << (start + 1U - len);
1946 8 : ptr[0] &= ~mask;
1947 : }
1948 12 : else if (start != BITS_PER_UNIT - 1)
1949 : {
1950 4 : clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1951 4 : clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1952 4 : len - (start % BITS_PER_UNIT) - 1);
1953 : }
1954 8 : else if (start == BITS_PER_UNIT - 1
1955 : && len > BITS_PER_UNIT)
1956 : {
1957 8 : unsigned int nbytes = len / BITS_PER_UNIT;
1958 8 : memset (ptr, 0, nbytes);
1959 8 : if (len % BITS_PER_UNIT != 0)
1960 4 : clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1961 : len % BITS_PER_UNIT);
1962 : }
1963 : else
1964 0 : gcc_unreachable ();
1965 : }
1966 :
1967 : /* In the byte array PTR clear the bit region starting at bit
1968 : START and is LEN bits wide.
1969 : For regions spanning multiple bytes do this recursively until we reach
1970 : zero LEN or a region contained within a single byte. */
1971 :
1972 : static void
1973 1290307 : clear_bit_region (unsigned char *ptr, unsigned int start,
1974 : unsigned int len)
1975 : {
1976 : /* Degenerate base case. */
1977 1384869 : if (len == 0)
1978 : return;
1979 1384869 : else if (start >= BITS_PER_UNIT)
1980 45463 : clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1981 : /* Second base case. */
1982 1339406 : else if ((start + len) <= BITS_PER_UNIT)
1983 : {
1984 218110 : unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1985 218110 : mask >>= BITS_PER_UNIT - (start + len);
1986 :
1987 218110 : ptr[0] &= ~mask;
1988 :
1989 218110 : return;
1990 : }
1991 : /* Clear most significant bits in a byte and proceed with the next byte. */
1992 1121296 : else if (start != 0)
1993 : {
1994 46227 : clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1995 46227 : clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1996 : }
1997 : /* Whole bytes need to be cleared. */
1998 1075069 : else if (start == 0 && len > BITS_PER_UNIT)
1999 : {
2000 1075069 : unsigned int nbytes = len / BITS_PER_UNIT;
2001 : /* We could recurse on each byte but we clear whole bytes, so a simple
2002 : memset will do. */
2003 1075069 : memset (ptr, '\0', nbytes);
2004 : /* Clear the remaining sub-byte region if there is one. */
2005 1075069 : if (len % BITS_PER_UNIT != 0)
2006 2872 : clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
2007 : }
2008 : else
2009 0 : gcc_unreachable ();
2010 : }
2011 :
2012 : /* Write BITLEN bits of EXPR to the byte array PTR at
2013 : bit position BITPOS. PTR should contain TOTAL_BYTES elements.
2014 : Return true if the operation succeeded. */
2015 :
2016 : static bool
2017 1005354 : encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
2018 : unsigned int total_bytes)
2019 : {
2020 1005354 : unsigned int first_byte = bitpos / BITS_PER_UNIT;
2021 1005354 : bool empty_ctor_p
2022 1005354 : = (TREE_CODE (expr) == CONSTRUCTOR
2023 219927 : && CONSTRUCTOR_NELTS (expr) == 0
2024 219927 : && TYPE_SIZE_UNIT (TREE_TYPE (expr))
2025 1225281 : && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
2026 1005354 : bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
2027 977506 : || (bitpos % BITS_PER_UNIT)
2028 1982313 : || (!int_mode_for_size (bitlen, 0).exists ()
2029 61066 : && !empty_ctor_p));
2030 :
2031 976667 : if (!sub_byte_op_p)
2032 : {
2033 976667 : if (first_byte >= total_bytes)
2034 : return false;
2035 976667 : total_bytes -= first_byte;
2036 976667 : if (empty_ctor_p)
2037 : {
2038 219927 : unsigned HOST_WIDE_INT rhs_bytes
2039 219927 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
2040 219927 : if (rhs_bytes > total_bytes)
2041 : return false;
2042 219927 : memset (ptr + first_byte, '\0', rhs_bytes);
2043 219927 : return true;
2044 : }
2045 756740 : return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
2046 : }
2047 :
2048 : /* LITTLE-ENDIAN
2049 : We are writing a non byte-sized quantity or at a position that is not
2050 : at a byte boundary.
2051 : |--------|--------|--------| ptr + first_byte
2052 : ^ ^
2053 : xxx xxxxxxxx xxx< bp>
2054 : |______EXPR____|
2055 :
2056 : First native_encode_expr EXPR into a temporary buffer and shift each
2057 : byte in the buffer by 'bp' (carrying the bits over as necessary).
2058 : |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
2059 : <------bitlen---->< bp>
2060 : Then we clear the destination bits:
2061 : |---00000|00000000|000-----| ptr + first_byte
2062 : <-------bitlen--->< bp>
2063 :
2064 : Finally we ORR the bytes of the shifted EXPR into the cleared region:
2065 : |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
2066 :
2067 : BIG-ENDIAN
2068 : We are writing a non byte-sized quantity or at a position that is not
2069 : at a byte boundary.
2070 : ptr + first_byte |--------|--------|--------|
2071 : ^ ^
2072 : <bp >xxx xxxxxxxx xxx
2073 : |_____EXPR_____|
2074 :
2075 : First native_encode_expr EXPR into a temporary buffer and shift each
2076 : byte in the buffer to the right by (carrying the bits over as necessary).
2077 : We shift by as much as needed to align the most significant bit of EXPR
2078 : with bitpos:
2079 : |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
2080 : <---bitlen----> <bp ><-----bitlen----->
2081 : Then we clear the destination bits:
2082 : ptr + first_byte |-----000||00000000||00000---|
2083 : <bp ><-------bitlen----->
2084 :
2085 : Finally we ORR the bytes of the shifted EXPR into the cleared region:
2086 : ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
2087 : The awkwardness comes from the fact that bitpos is counted from the
2088 : most significant bit of a byte. */
2089 :
2090 : /* We must be dealing with fixed-size data at this point, since the
2091 : total size is also fixed. */
2092 28687 : unsigned int byte_size;
2093 28687 : if (empty_ctor_p)
2094 : {
2095 0 : unsigned HOST_WIDE_INT rhs_bytes
2096 0 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
2097 0 : if (rhs_bytes > total_bytes)
2098 : return false;
2099 0 : byte_size = rhs_bytes;
2100 : }
2101 : else
2102 : {
2103 28687 : fixed_size_mode mode
2104 28687 : = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
2105 28687 : byte_size
2106 28687 : = mode == BLKmode
2107 198 : ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
2108 28489 : : GET_MODE_SIZE (mode);
2109 : }
2110 : /* Allocate an extra byte so that we have space to shift into. */
2111 28687 : byte_size++;
2112 28687 : unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
2113 28687 : memset (tmpbuf, '\0', byte_size);
2114 : /* The store detection code should only have allowed constants that are
2115 : accepted by native_encode_expr or empty ctors. */
2116 28687 : if (!empty_ctor_p
2117 28687 : && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
2118 0 : gcc_unreachable ();
2119 :
2120 : /* The native_encode_expr machinery uses TYPE_MODE to determine how many
2121 : bytes to write. This means it can write more than
2122 : ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
2123 : write 8 bytes for a bitlen of 40). Skip the bytes that are not within
2124 : bitlen and zero out the bits that are not relevant as well (that may
2125 : contain a sign bit due to sign-extension). */
2126 28687 : unsigned int padding
2127 28687 : = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
2128 : /* On big-endian the padding is at the 'front' so just skip the initial
2129 : bytes. */
2130 28687 : if (BYTES_BIG_ENDIAN)
2131 : tmpbuf += padding;
2132 :
2133 28687 : byte_size -= padding;
2134 :
2135 28687 : if (bitlen % BITS_PER_UNIT != 0)
2136 : {
2137 27848 : if (BYTES_BIG_ENDIAN)
2138 : clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
2139 : BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
2140 : else
2141 27848 : clear_bit_region (tmpbuf, bitlen,
2142 27848 : byte_size * BITS_PER_UNIT - bitlen);
2143 : }
2144 : /* Left shifting relies on the last byte being clear if bitlen is
2145 : a multiple of BITS_PER_UNIT, which might not be clear if
2146 : there are padding bytes. */
2147 839 : else if (!BYTES_BIG_ENDIAN)
2148 839 : tmpbuf[byte_size - 1] = '\0';
2149 :
2150 : /* Clear the bit region in PTR where the bits from TMPBUF will be
2151 : inserted into. */
2152 28687 : if (BYTES_BIG_ENDIAN)
2153 : clear_bit_region_be (ptr + first_byte,
2154 : BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
2155 : else
2156 28687 : clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
2157 :
2158 28687 : int shift_amnt;
2159 28687 : int bitlen_mod = bitlen % BITS_PER_UNIT;
2160 28687 : int bitpos_mod = bitpos % BITS_PER_UNIT;
2161 :
2162 28687 : bool skip_byte = false;
2163 28687 : if (BYTES_BIG_ENDIAN)
2164 : {
2165 : /* BITPOS and BITLEN are exactly aligned and no shifting
2166 : is necessary. */
2167 : if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
2168 : || (bitpos_mod == 0 && bitlen_mod == 0))
2169 : shift_amnt = 0;
2170 : /* |. . . . . . . .|
2171 : <bp > <blen >.
2172 : We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2173 : of the value until it aligns with 'bp' in the next byte over. */
2174 : else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2175 : {
2176 : shift_amnt = bitlen_mod + bitpos_mod;
2177 : skip_byte = bitlen_mod != 0;
2178 : }
2179 : /* |. . . . . . . .|
2180 : <----bp--->
2181 : <---blen---->.
2182 : Shift the value right within the same byte so it aligns with 'bp'. */
2183 : else
2184 : shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2185 : }
2186 : else
2187 28687 : shift_amnt = bitpos % BITS_PER_UNIT;
2188 :
2189 : /* Create the shifted version of EXPR. */
2190 28687 : if (!BYTES_BIG_ENDIAN)
2191 : {
2192 28687 : shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2193 28687 : if (shift_amnt == 0)
2194 12713 : byte_size--;
2195 : }
2196 : else
2197 : {
2198 : gcc_assert (BYTES_BIG_ENDIAN);
2199 : shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2200 : /* If shifting right forced us to move into the next byte skip the now
2201 : empty byte. */
2202 : if (skip_byte)
2203 : {
2204 : tmpbuf++;
2205 : byte_size--;
2206 : }
2207 : }
2208 :
2209 : /* Insert the bits from TMPBUF. */
2210 120812 : for (unsigned int i = 0; i < byte_size; i++)
2211 92125 : ptr[first_byte + i] |= tmpbuf[i];
2212 :
2213 : return true;
2214 : }
2215 :
2216 : /* Sorting function for store_immediate_info objects.
2217 : Sorts them by bitposition. */
2218 :
2219 : static int
2220 17605603 : sort_by_bitpos (const void *x, const void *y)
2221 : {
2222 17605603 : store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2223 17605603 : store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2224 :
2225 17605603 : if ((*tmp)->bitpos < (*tmp2)->bitpos)
2226 : return -1;
2227 9025898 : else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2228 : return 1;
2229 : else
2230 : /* If they are the same let's use the order which is guaranteed to
2231 : be different. */
2232 746652 : return (*tmp)->order - (*tmp2)->order;
2233 : }
2234 :
2235 : /* Sorting function for store_immediate_info objects.
2236 : Sorts them by the order field. */
2237 :
2238 : static int
2239 6702182 : sort_by_order (const void *x, const void *y)
2240 : {
2241 6702182 : store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2242 6702182 : store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2243 :
2244 6702182 : if ((*tmp)->order < (*tmp2)->order)
2245 : return -1;
2246 3241463 : else if ((*tmp)->order > (*tmp2)->order)
2247 : return 1;
2248 :
2249 0 : gcc_unreachable ();
2250 : }
2251 :
2252 : /* Initialize a merged_store_group object from a store_immediate_info
2253 : object. */
2254 :
2255 837725 : merged_store_group::merged_store_group (store_immediate_info *info)
2256 : {
2257 837725 : start = info->bitpos;
2258 837725 : width = info->bitsize;
2259 837725 : bitregion_start = info->bitregion_start;
2260 837725 : bitregion_end = info->bitregion_end;
2261 : /* VAL has memory allocated for it in apply_stores once the group
2262 : width has been finalized. */
2263 837725 : val = NULL;
2264 837725 : mask = NULL;
2265 837725 : bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2266 837725 : string_concatenation = info->rhs_code == STRING_CST;
2267 837725 : only_constants = info->rhs_code == INTEGER_CST;
2268 837725 : consecutive = true;
2269 837725 : first_nonmergeable_order = ~0U;
2270 837725 : lp_nr = info->lp_nr;
2271 837725 : unsigned HOST_WIDE_INT align_bitpos = 0;
2272 837725 : get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2273 : &align, &align_bitpos);
2274 837725 : align_base = start - align_bitpos;
2275 2513175 : for (int i = 0; i < 2; ++i)
2276 : {
2277 1675450 : store_operand_info &op = info->ops[i];
2278 1675450 : if (op.base_addr == NULL_TREE)
2279 : {
2280 1488703 : load_align[i] = 0;
2281 1488703 : load_align_base[i] = 0;
2282 : }
2283 : else
2284 : {
2285 186747 : get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2286 186747 : load_align_base[i] = op.bitpos - align_bitpos;
2287 : }
2288 : }
2289 837725 : stores.create (1);
2290 837725 : stores.safe_push (info);
2291 837725 : last_stmt = info->stmt;
2292 837725 : last_order = info->order;
2293 837725 : first_stmt = last_stmt;
2294 837725 : first_order = last_order;
2295 837725 : buf_size = 0;
2296 837725 : }
2297 :
2298 837725 : merged_store_group::~merged_store_group ()
2299 : {
2300 837725 : if (val)
2301 400352 : XDELETEVEC (val);
2302 837725 : }
2303 :
2304 : /* Return true if the store described by INFO can be merged into the group. */
2305 :
2306 : bool
2307 687702 : merged_store_group::can_be_merged_into (store_immediate_info *info)
2308 : {
2309 : /* Do not merge bswap patterns. */
2310 687702 : if (info->rhs_code == LROTATE_EXPR)
2311 : return false;
2312 :
2313 674109 : if (info->lp_nr != lp_nr)
2314 : return false;
2315 :
2316 : /* The canonical case. */
2317 674109 : if (info->rhs_code == stores[0]->rhs_code)
2318 : return true;
2319 :
2320 : /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2321 57635 : if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2322 2242 : return !string_concatenation;
2323 :
2324 55393 : if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2325 6743 : return !string_concatenation;
2326 :
2327 : /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2328 : only for small regions since this can generate a lot of instructions. */
2329 48650 : if (info->rhs_code == MEM_REF
2330 21344 : && (stores[0]->rhs_code == INTEGER_CST
2331 2887 : || stores[0]->rhs_code == BIT_INSERT_EXPR)
2332 18931 : && info->bitregion_start == stores[0]->bitregion_start
2333 765 : && info->bitregion_end == stores[0]->bitregion_end
2334 50180 : && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2335 763 : return !string_concatenation;
2336 :
2337 47887 : if (stores[0]->rhs_code == MEM_REF
2338 21616 : && (info->rhs_code == INTEGER_CST
2339 21616 : || info->rhs_code == BIT_INSERT_EXPR)
2340 21337 : && info->bitregion_start == stores[0]->bitregion_start
2341 4 : && info->bitregion_end == stores[0]->bitregion_end
2342 47891 : && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2343 2 : return !string_concatenation;
2344 :
2345 : /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2346 47885 : if (info->rhs_code == STRING_CST
2347 128 : && stores[0]->rhs_code == INTEGER_CST
2348 48013 : && stores[0]->bitsize == CHAR_BIT)
2349 66 : return !bit_insertion;
2350 :
2351 47819 : if (stores[0]->rhs_code == STRING_CST
2352 153 : && info->rhs_code == INTEGER_CST
2353 47972 : && info->bitsize == CHAR_BIT)
2354 14 : return !bit_insertion;
2355 :
2356 : return false;
2357 : }
2358 :
2359 : /* Helper method for merge_into and merge_overlapping to do
2360 : the common part. */
2361 :
2362 : void
2363 789342 : merged_store_group::do_merge (store_immediate_info *info)
2364 : {
2365 789342 : bitregion_start = MIN (bitregion_start, info->bitregion_start);
2366 789342 : bitregion_end = MAX (bitregion_end, info->bitregion_end);
2367 :
2368 789342 : unsigned int this_align;
2369 789342 : unsigned HOST_WIDE_INT align_bitpos = 0;
2370 789342 : get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2371 : &this_align, &align_bitpos);
2372 789342 : if (this_align > align)
2373 : {
2374 1091 : align = this_align;
2375 1091 : align_base = info->bitpos - align_bitpos;
2376 : }
2377 2368026 : for (int i = 0; i < 2; ++i)
2378 : {
2379 1578684 : store_operand_info &op = info->ops[i];
2380 1578684 : if (!op.base_addr)
2381 1480869 : continue;
2382 :
2383 97815 : get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2384 97815 : if (this_align > load_align[i])
2385 : {
2386 130 : load_align[i] = this_align;
2387 130 : load_align_base[i] = op.bitpos - align_bitpos;
2388 : }
2389 : }
2390 :
2391 789342 : gimple *stmt = info->stmt;
2392 789342 : stores.safe_push (info);
2393 789342 : if (info->order > last_order)
2394 : {
2395 530027 : last_order = info->order;
2396 530027 : last_stmt = stmt;
2397 : }
2398 259315 : else if (info->order < first_order)
2399 : {
2400 110518 : first_order = info->order;
2401 110518 : first_stmt = stmt;
2402 : }
2403 :
2404 789342 : if (info->bitpos != start + width)
2405 189166 : consecutive = false;
2406 :
2407 : /* We need to use extraction if there is any bit-field. */
2408 789342 : if (info->rhs_code == BIT_INSERT_EXPR)
2409 : {
2410 9547 : bit_insertion = true;
2411 9547 : gcc_assert (!string_concatenation);
2412 : }
2413 :
2414 : /* We want to use concatenation if there is any string. */
2415 789342 : if (info->rhs_code == STRING_CST)
2416 : {
2417 460 : string_concatenation = true;
2418 460 : gcc_assert (!bit_insertion);
2419 : }
2420 :
2421 : /* But we cannot use it if we don't have consecutive stores. */
2422 789342 : if (!consecutive)
2423 300373 : string_concatenation = false;
2424 :
2425 789342 : if (info->rhs_code != INTEGER_CST)
2426 108382 : only_constants = false;
2427 789342 : }
2428 :
2429 : /* Merge a store recorded by INFO into this merged store.
2430 : The store is not overlapping with the existing recorded
2431 : stores. */
2432 :
2433 : void
2434 115424 : merged_store_group::merge_into (store_immediate_info *info)
2435 : {
2436 115424 : do_merge (info);
2437 :
2438 : /* Make sure we're inserting in the position we think we're inserting. */
2439 115424 : gcc_assert (info->bitpos >= start + width
2440 : && info->bitregion_start <= bitregion_end);
2441 :
2442 115424 : width = info->bitpos + info->bitsize - start;
2443 115424 : }
2444 :
2445 : /* Merge a store described by INFO into this merged store.
2446 : INFO overlaps in some way with the current store (i.e. it's not contiguous
2447 : which is handled by merged_store_group::merge_into). */
2448 :
2449 : void
2450 673918 : merged_store_group::merge_overlapping (store_immediate_info *info)
2451 : {
2452 673918 : do_merge (info);
2453 :
2454 : /* If the store extends the size of the group, extend the width. */
2455 673918 : if (info->bitpos + info->bitsize > start + width)
2456 492067 : width = info->bitpos + info->bitsize - start;
2457 673918 : }
2458 :
2459 : /* Go through all the recorded stores in this group in program order and
2460 : apply their values to the VAL byte array to create the final merged
2461 : value. Return true if the operation succeeded. */
2462 :
2463 : bool
2464 836891 : merged_store_group::apply_stores ()
2465 : {
2466 836891 : store_immediate_info *info;
2467 836891 : unsigned int i;
2468 :
2469 : /* Make sure we have more than one store in the group, otherwise we cannot
2470 : merge anything. */
2471 836891 : if (bitregion_start % BITS_PER_UNIT != 0
2472 836891 : || bitregion_end % BITS_PER_UNIT != 0
2473 1673782 : || stores.length () == 1)
2474 : return false;
2475 :
2476 400352 : buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2477 :
2478 : /* Really do string concatenation for large strings only. */
2479 400352 : if (buf_size <= MOVE_MAX)
2480 199124 : string_concatenation = false;
2481 :
2482 : /* String concatenation only works for byte aligned start and end. */
2483 400352 : if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2484 4979 : string_concatenation = false;
2485 :
2486 : /* Create a power-of-2-sized buffer for native_encode_expr. */
2487 400352 : if (!string_concatenation)
2488 800570 : buf_size = 1 << ceil_log2 (buf_size);
2489 :
2490 400352 : val = XNEWVEC (unsigned char, 2 * buf_size);
2491 400352 : mask = val + buf_size;
2492 400352 : memset (val, 0, buf_size);
2493 400352 : memset (mask, ~0U, buf_size);
2494 :
2495 400352 : stores.qsort (sort_by_order);
2496 :
2497 1587836 : FOR_EACH_VEC_ELT (stores, i, info)
2498 : {
2499 1187484 : unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2500 1187484 : tree cst;
2501 1187484 : if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2502 : cst = info->ops[0].val;
2503 168603 : else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2504 : cst = info->ops[1].val;
2505 : else
2506 : cst = NULL_TREE;
2507 1019182 : bool ret = true;
2508 1019182 : if (cst && info->rhs_code != BIT_INSERT_EXPR)
2509 1005354 : ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2510 1005354 : buf_size);
2511 1187484 : unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2512 1187484 : if (BYTES_BIG_ENDIAN)
2513 : clear_bit_region_be (m, (BITS_PER_UNIT - 1
2514 : - (pos_in_buffer % BITS_PER_UNIT)),
2515 : info->bitsize);
2516 : else
2517 1187484 : clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2518 1187484 : if (cst && dump_file && (dump_flags & TDF_DETAILS))
2519 : {
2520 222 : if (ret)
2521 : {
2522 222 : fputs ("After writing ", dump_file);
2523 222 : print_generic_expr (dump_file, cst, TDF_NONE);
2524 222 : fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2525 : " at position %d\n", info->bitsize, pos_in_buffer);
2526 222 : fputs (" the merged value contains ", dump_file);
2527 222 : dump_char_array (dump_file, val, buf_size);
2528 222 : fputs (" the merged mask contains ", dump_file);
2529 222 : dump_char_array (dump_file, mask, buf_size);
2530 222 : if (bit_insertion)
2531 0 : fputs (" bit insertion is required\n", dump_file);
2532 222 : if (string_concatenation)
2533 0 : fputs (" string concatenation is required\n", dump_file);
2534 : }
2535 : else
2536 0 : fprintf (dump_file, "Failed to merge stores\n");
2537 : }
2538 1187484 : if (!ret)
2539 : return false;
2540 : }
2541 400352 : stores.qsort (sort_by_bitpos);
2542 : return true;
2543 : }
2544 :
2545 : /* Structure describing the store chain. */
2546 :
2547 : class imm_store_chain_info
2548 : {
2549 : public:
2550 : /* Doubly-linked list that imposes an order on chain processing.
2551 : PNXP (prev's next pointer) points to the head of a list, or to
2552 : the next field in the previous chain in the list.
2553 : See pass_store_merging::m_stores_head for more rationale. */
2554 : imm_store_chain_info *next, **pnxp;
2555 : tree base_addr;
2556 : auto_vec<store_immediate_info *> m_store_info;
2557 : auto_vec<merged_store_group *> m_merged_store_groups;
2558 :
2559 1899336 : imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2560 1899336 : : next (inspt), pnxp (&inspt), base_addr (b_a)
2561 : {
2562 1899336 : inspt = this;
2563 1899336 : if (next)
2564 : {
2565 628475 : gcc_checking_assert (pnxp == next->pnxp);
2566 628475 : next->pnxp = &next;
2567 : }
2568 1899336 : }
2569 1899336 : ~imm_store_chain_info ()
2570 : {
2571 1899336 : *pnxp = next;
2572 1899336 : if (next)
2573 : {
2574 603703 : gcc_checking_assert (&next == next->pnxp);
2575 603703 : next->pnxp = pnxp;
2576 : }
2577 1899336 : }
2578 : bool terminate_and_process_chain ();
2579 : bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2580 : unsigned int);
2581 : bool coalesce_immediate_stores ();
2582 : bool output_merged_store (merged_store_group *);
2583 : bool output_merged_stores ();
2584 : };
2585 :
2586 : const pass_data pass_data_tree_store_merging = {
2587 : GIMPLE_PASS, /* type */
2588 : "store-merging", /* name */
2589 : OPTGROUP_NONE, /* optinfo_flags */
2590 : TV_GIMPLE_STORE_MERGING, /* tv_id */
2591 : PROP_ssa, /* properties_required */
2592 : 0, /* properties_provided */
2593 : 0, /* properties_destroyed */
2594 : 0, /* todo_flags_start */
2595 : TODO_update_ssa, /* todo_flags_finish */
2596 : };
2597 :
2598 : class pass_store_merging : public gimple_opt_pass
2599 : {
2600 : public:
2601 285722 : pass_store_merging (gcc::context *ctxt)
2602 571444 : : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2603 285722 : m_n_chains (0), m_n_stores (0)
2604 : {
2605 285722 : }
2606 :
2607 : /* Pass not supported for PDP-endian, nor for insane hosts or
2608 : target character sizes where native_{encode,interpret}_expr
2609 : doesn't work properly. */
2610 : bool
2611 1041484 : gate (function *) final override
2612 : {
2613 1041484 : return flag_store_merging
2614 : && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2615 : && CHAR_BIT == 8
2616 1041484 : && BITS_PER_UNIT == 8;
2617 : }
2618 :
2619 : unsigned int execute (function *) final override;
2620 :
2621 : private:
2622 : hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2623 :
2624 : /* Form a doubly-linked stack of the elements of m_stores, so that
2625 : we can iterate over them in a predictable way. Using this order
2626 : avoids extraneous differences in the compiler output just because
2627 : of tree pointer variations (e.g. different chains end up in
2628 : different positions of m_stores, so they are handled in different
2629 : orders, so they allocate or release SSA names in different
2630 : orders, and when they get reused, subsequent passes end up
2631 : getting different SSA names, which may ultimately change
2632 : decisions when going out of SSA). */
2633 : imm_store_chain_info *m_stores_head;
2634 :
2635 : /* The number of store chains currently tracked. */
2636 : unsigned m_n_chains;
2637 : /* The number of stores currently tracked. */
2638 : unsigned m_n_stores;
2639 :
2640 : bool process_store (gimple *);
2641 : bool terminate_and_process_chain (imm_store_chain_info *);
2642 : bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2643 : bool terminate_and_process_all_chains ();
2644 : }; // class pass_store_merging
2645 :
2646 : /* Terminate and process all recorded chains. Return true if any changes
2647 : were made. */
2648 :
2649 : bool
2650 1158758 : pass_store_merging::terminate_and_process_all_chains ()
2651 : {
2652 1158758 : bool ret = false;
2653 2051910 : while (m_stores_head)
2654 893152 : ret |= terminate_and_process_chain (m_stores_head);
2655 1158758 : gcc_assert (m_stores.is_empty ());
2656 1158758 : return ret;
2657 : }
2658 :
2659 : /* Terminate all chains that are affected by the statement STMT.
2660 : CHAIN_INFO is the chain we should ignore from the checks if
2661 : non-NULL. Return true if any changes were made. */
2662 :
2663 : bool
2664 9790147 : pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2665 : **chain_info,
2666 : gimple *stmt)
2667 : {
2668 9790147 : bool ret = false;
2669 :
2670 : /* If the statement doesn't touch memory it can't alias. */
2671 19123043 : if (!gimple_vuse (stmt))
2672 : return false;
2673 :
2674 7810011 : tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2675 7810011 : ao_ref store_lhs_ref;
2676 7810011 : ao_ref_init (&store_lhs_ref, store_lhs);
2677 7810011 : for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2678 : {
2679 8098283 : next = cur->next;
2680 :
2681 : /* We already checked all the stores in chain_info and terminated the
2682 : chain if necessary. Skip it here. */
2683 8098283 : if (chain_info && *chain_info == cur)
2684 1132207 : continue;
2685 :
2686 : store_immediate_info *info;
2687 : unsigned int i;
2688 33317215 : FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2689 : {
2690 11446541 : tree lhs = gimple_assign_lhs (info->stmt);
2691 11446541 : ao_ref lhs_ref;
2692 11446541 : ao_ref_init (&lhs_ref, lhs);
2693 11446541 : if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2694 10617531 : || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2695 21923339 : || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2696 : &lhs_ref, false)))
2697 : {
2698 1003696 : if (dump_file && (dump_flags & TDF_DETAILS))
2699 : {
2700 24 : fprintf (dump_file, "stmt causes chain termination:\n");
2701 24 : print_gimple_stmt (dump_file, stmt, 0);
2702 : }
2703 1003696 : ret |= terminate_and_process_chain (cur);
2704 1003696 : break;
2705 : }
2706 : }
2707 : }
2708 :
2709 : return ret;
2710 : }
2711 :
2712 : /* Helper function. Terminate the recorded chain storing to base object
2713 : BASE. Return true if the merging and output was successful. The m_stores
2714 : entry is removed after the processing in any case. */
2715 :
2716 : bool
2717 1899336 : pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2718 : {
2719 1899336 : m_n_stores -= chain_info->m_store_info.length ();
2720 1899336 : m_n_chains--;
2721 1899336 : bool ret = chain_info->terminate_and_process_chain ();
2722 1899336 : m_stores.remove (chain_info->base_addr);
2723 1899336 : delete chain_info;
2724 1899336 : return ret;
2725 : }
2726 :
2727 : /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2728 : may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2729 : be able to sink load of REF across stores between FIRST and LAST, up
2730 : to right before LAST. */
2731 :
2732 : bool
2733 32199 : stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2734 : {
2735 32199 : ao_ref r;
2736 32199 : ao_ref_init (&r, ref);
2737 32199 : unsigned int count = 0;
2738 32199 : tree vop = gimple_vdef (last);
2739 32199 : gimple *stmt;
2740 :
2741 : /* Return true conservatively if the basic blocks are different. */
2742 32199 : if (gimple_bb (first) != gimple_bb (last))
2743 : return true;
2744 :
2745 83089 : do
2746 : {
2747 83089 : stmt = SSA_NAME_DEF_STMT (vop);
2748 83089 : if (stmt_may_clobber_ref_p_1 (stmt, &r))
2749 : return true;
2750 82530 : if (gimple_store_p (stmt)
2751 82530 : && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2752 : return true;
2753 : /* Avoid quadratic compile time by bounding the number of checks
2754 : we perform. */
2755 82254 : if (++count > MAX_STORE_ALIAS_CHECKS)
2756 : return true;
2757 82254 : vop = gimple_vuse (stmt);
2758 : }
2759 82254 : while (stmt != first);
2760 :
2761 : return false;
2762 : }
2763 :
2764 : /* Return true if INFO->ops[IDX] is mergeable with the
2765 : corresponding loads already in MERGED_STORE group.
2766 : BASE_ADDR is the base address of the whole store group. */
2767 :
2768 : bool
2769 118936 : compatible_load_p (merged_store_group *merged_store,
2770 : store_immediate_info *info,
2771 : tree base_addr, int idx)
2772 : {
2773 118936 : store_immediate_info *infof = merged_store->stores[0];
2774 118936 : if (!info->ops[idx].base_addr
2775 118936 : || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2776 118936 : info->bitpos - infof->bitpos)
2777 221726 : || !operand_equal_p (info->ops[idx].base_addr,
2778 102790 : infof->ops[idx].base_addr, 0))
2779 17659 : return false;
2780 :
2781 101277 : store_immediate_info *infol = merged_store->stores.last ();
2782 101277 : tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2783 : /* In this case all vuses should be the same, e.g.
2784 : _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2785 : or
2786 : _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2787 : and we can emit the coalesced load next to any of those loads. */
2788 101277 : if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2789 192328 : && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2790 : return true;
2791 :
2792 : /* Otherwise, at least for now require that the load has the same
2793 : vuse as the store. See following examples. */
2794 20452 : if (gimple_vuse (info->stmt) != load_vuse)
2795 : return false;
2796 :
2797 16280 : if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2798 8140 : || (infof != infol
2799 10032 : && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2800 : return false;
2801 :
2802 : /* If the load is from the same location as the store, already
2803 : the construction of the immediate chain info guarantees no intervening
2804 : stores, so no further checks are needed. Example:
2805 : _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2806 7621 : if (known_eq (info->ops[idx].bitpos, info->bitpos)
2807 7621 : && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2808 : return true;
2809 :
2810 : /* Otherwise, we need to punt if any of the loads can be clobbered by any
2811 : of the stores in the group, or any other stores in between those.
2812 : Previous calls to compatible_load_p ensured that for all the
2813 : merged_store->stores IDX loads, no stmts starting with
2814 : merged_store->first_stmt and ending right before merged_store->last_stmt
2815 : clobbers those loads. */
2816 7498 : gimple *first = merged_store->first_stmt;
2817 7498 : gimple *last = merged_store->last_stmt;
2818 : /* The stores are sorted by increasing store bitpos, so if info->stmt store
2819 : comes before the so far first load, we'll be changing
2820 : merged_store->first_stmt. In that case we need to give up if
2821 : any of the earlier processed loads clobber with the stmts in the new
2822 : range. */
2823 7498 : if (info->order < merged_store->first_order)
2824 : {
2825 1167 : for (store_immediate_info *infoc : merged_store->stores)
2826 329 : if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2827 : return false;
2828 180 : first = info->stmt;
2829 : }
2830 : /* Similarly, we could change merged_store->last_stmt, so ensure
2831 : in that case no stmts in the new range clobber any of the earlier
2832 : processed loads. */
2833 7169 : else if (info->order > merged_store->last_order)
2834 : {
2835 45848 : for (store_immediate_info *infoc : merged_store->stores)
2836 25012 : if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2837 : return false;
2838 6498 : last = info->stmt;
2839 : }
2840 : /* And finally, we'd be adding a new load to the set, ensure it isn't
2841 : clobbered in the new range. */
2842 6678 : if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2843 : return false;
2844 :
2845 : /* Otherwise, we are looking for:
2846 : _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2847 : or
2848 : _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2849 : return true;
2850 : }
2851 :
2852 : /* Add all refs loaded to compute VAL to REFS vector. */
2853 :
2854 : void
2855 231 : gather_bswap_load_refs (vec<tree> *refs, tree val)
2856 : {
2857 244 : if (TREE_CODE (val) != SSA_NAME)
2858 : return;
2859 :
2860 242 : gimple *stmt = SSA_NAME_DEF_STMT (val);
2861 242 : if (!is_gimple_assign (stmt))
2862 : return;
2863 :
2864 242 : if (gimple_assign_load_p (stmt))
2865 : {
2866 229 : refs->safe_push (gimple_assign_rhs1 (stmt));
2867 229 : return;
2868 : }
2869 :
2870 13 : switch (gimple_assign_rhs_class (stmt))
2871 : {
2872 2 : case GIMPLE_BINARY_RHS:
2873 2 : gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2874 : /* FALLTHRU */
2875 13 : case GIMPLE_UNARY_RHS:
2876 13 : gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2877 13 : break;
2878 0 : default:
2879 0 : gcc_unreachable ();
2880 : }
2881 : }
2882 :
2883 : /* Check if there are any stores in M_STORE_INFO after index I
2884 : (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2885 : a potential group ending with END that have their order
2886 : smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2887 : all the stores already merged and the one under consideration
2888 : have rhs_code of INTEGER_CST. Return true if there are no such stores.
2889 : Consider:
2890 : MEM[(long long int *)p_28] = 0;
2891 : MEM[(long long int *)p_28 + 8B] = 0;
2892 : MEM[(long long int *)p_28 + 16B] = 0;
2893 : MEM[(long long int *)p_28 + 24B] = 0;
2894 : _129 = (int) _130;
2895 : MEM[(int *)p_28 + 8B] = _129;
2896 : MEM[(int *)p_28].a = -1;
2897 : We already have
2898 : MEM[(long long int *)p_28] = 0;
2899 : MEM[(int *)p_28].a = -1;
2900 : stmts in the current group and need to consider if it is safe to
2901 : add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2902 : There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2903 : store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2904 : into the group and merging of those 3 stores is successful, merged
2905 : stmts will be emitted at the latest store from that group, i.e.
2906 : LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2907 : The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2908 : the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2909 : so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2910 : into the group. That way it will be its own store group and will
2911 : not be touched. If ALL_INTEGER_CST_P and there are overlapping
2912 : INTEGER_CST stores, those are mergeable using merge_overlapping,
2913 : so don't return false for those.
2914 :
2915 : Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2916 : (exclusive), whether they don't overlap the bitrange START to END
2917 : and have order in between FIRST_ORDER and LAST_ORDER. This is to
2918 : prevent merging in cases like:
2919 : MEM <char[12]> [&b + 8B] = {};
2920 : MEM[(short *) &b] = 5;
2921 : _5 = *x_4(D);
2922 : MEM <long long unsigned int> [&b + 2B] = _5;
2923 : MEM[(char *)&b + 16B] = 88;
2924 : MEM[(int *)&b + 20B] = 1;
2925 : The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2926 : be merged with it, because the = _5 store overlaps these and is in between
2927 : them in sort_by_order ordering. If it was merged, the merged store would
2928 : go after the = _5 store and thus change behavior. */
2929 :
2930 : static bool
2931 799134 : check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2932 : unsigned int i,
2933 : bool all_integer_cst_p, unsigned int first_order,
2934 : unsigned int last_order, unsigned HOST_WIDE_INT start,
2935 : unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2936 : unsigned end_earlier)
2937 : {
2938 799134 : unsigned int len = m_store_info.length ();
2939 810856 : for (unsigned int j = first_earlier; j < end_earlier; j++)
2940 : {
2941 11731 : store_immediate_info *info = m_store_info[j];
2942 11731 : if (info->order > first_order
2943 39 : && info->order < last_order
2944 11 : && info->bitpos + info->bitsize > start)
2945 : return false;
2946 : }
2947 882022 : for (++i; i < len; ++i)
2948 : {
2949 460554 : store_immediate_info *info = m_store_info[i];
2950 460554 : if (info->bitpos >= end)
2951 : break;
2952 82916 : if (info->order < last_order
2953 36563 : && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2954 : return false;
2955 : }
2956 : return true;
2957 : }
2958 :
2959 : /* Return true if m_store_info[first] and at least one following store
2960 : form a group which store try_size bitsize value which is byte swapped
2961 : from a memory load or some value, or identity from some value.
2962 : This uses the bswap pass APIs. */
2963 :
2964 : bool
2965 229245 : imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2966 : unsigned int first,
2967 : unsigned int try_size,
2968 : unsigned int first_earlier)
2969 : {
2970 229245 : unsigned int len = m_store_info.length (), last = first;
2971 229245 : unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2972 229245 : if (width >= try_size)
2973 : return false;
2974 84722 : for (unsigned int i = first + 1; i < len; ++i)
2975 : {
2976 77411 : if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2977 76693 : || m_store_info[i]->lp_nr != merged_store->lp_nr
2978 154104 : || m_store_info[i]->ins_stmt == NULL)
2979 : return false;
2980 75702 : width += m_store_info[i]->bitsize;
2981 75702 : if (width >= try_size)
2982 : {
2983 : last = i;
2984 : break;
2985 : }
2986 : }
2987 48154 : if (width != try_size)
2988 : return false;
2989 :
2990 40302 : bool allow_unaligned
2991 40302 : = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2992 : /* Punt if the combined store would not be aligned and we need alignment. */
2993 40302 : if (!allow_unaligned)
2994 : {
2995 0 : unsigned int align = merged_store->align;
2996 0 : unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2997 0 : for (unsigned int i = first + 1; i <= last; ++i)
2998 : {
2999 0 : unsigned int this_align;
3000 0 : unsigned HOST_WIDE_INT align_bitpos = 0;
3001 0 : get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
3002 : &this_align, &align_bitpos);
3003 0 : if (this_align > align)
3004 : {
3005 0 : align = this_align;
3006 0 : align_base = m_store_info[i]->bitpos - align_bitpos;
3007 : }
3008 : }
3009 0 : unsigned HOST_WIDE_INT align_bitpos
3010 0 : = (m_store_info[first]->bitpos - align_base) & (align - 1);
3011 0 : if (align_bitpos)
3012 0 : align = least_bit_hwi (align_bitpos);
3013 0 : if (align < try_size)
3014 : return false;
3015 : }
3016 :
3017 40302 : tree type;
3018 40302 : switch (try_size)
3019 : {
3020 6123 : case 16: type = uint16_type_node; break;
3021 5011 : case 32: type = uint32_type_node; break;
3022 29168 : case 64: type = uint64_type_node; break;
3023 0 : default: gcc_unreachable ();
3024 : }
3025 40302 : struct symbolic_number n;
3026 40302 : gimple *ins_stmt = NULL;
3027 40302 : int vuse_store = -1;
3028 40302 : unsigned int first_order = merged_store->first_order;
3029 40302 : unsigned int last_order = merged_store->last_order;
3030 40302 : gimple *first_stmt = merged_store->first_stmt;
3031 40302 : gimple *last_stmt = merged_store->last_stmt;
3032 40302 : unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
3033 40302 : store_immediate_info *infof = m_store_info[first];
3034 :
3035 111011 : for (unsigned int i = first; i <= last; ++i)
3036 : {
3037 87167 : store_immediate_info *info = m_store_info[i];
3038 87167 : struct symbolic_number this_n = info->n;
3039 87167 : this_n.type = type;
3040 87167 : if (!this_n.base_addr)
3041 12012 : this_n.range = try_size / BITS_PER_UNIT;
3042 : else
3043 : /* Update vuse in case it has changed by output_merged_stores. */
3044 150310 : this_n.vuse = gimple_vuse (info->ins_stmt);
3045 87167 : unsigned int bitpos = info->bitpos - infof->bitpos;
3046 87167 : if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
3047 : BYTES_BIG_ENDIAN
3048 : ? try_size - info->bitsize - bitpos
3049 : : bitpos))
3050 16458 : return false;
3051 87167 : if (this_n.base_addr && vuse_store)
3052 : {
3053 : unsigned int j;
3054 118579 : for (j = first; j <= last; ++j)
3055 183952 : if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
3056 : break;
3057 45280 : if (j > last)
3058 : {
3059 26603 : if (vuse_store == 1)
3060 : return false;
3061 : vuse_store = 0;
3062 : }
3063 : }
3064 87167 : if (i == first)
3065 : {
3066 40302 : n = this_n;
3067 40302 : ins_stmt = info->ins_stmt;
3068 : }
3069 : else
3070 : {
3071 46865 : if (n.base_addr && n.vuse != this_n.vuse)
3072 : {
3073 7055 : if (vuse_store == 0)
3074 : return false;
3075 : vuse_store = 1;
3076 : }
3077 42827 : if (info->order > last_order)
3078 : {
3079 40158 : last_order = info->order;
3080 40158 : last_stmt = info->stmt;
3081 : }
3082 2669 : else if (info->order < first_order)
3083 : {
3084 2646 : first_order = info->order;
3085 2646 : first_stmt = info->stmt;
3086 : }
3087 42827 : end = MAX (end, info->bitpos + info->bitsize);
3088 :
3089 42827 : ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
3090 : &this_n, &n, BIT_IOR_EXPR);
3091 42827 : if (ins_stmt == NULL)
3092 : return false;
3093 : }
3094 : }
3095 :
3096 23844 : uint64_t cmpxchg, cmpnop;
3097 23844 : bool cast64_to_32;
3098 23844 : find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
3099 :
3100 : /* A complete byte swap should make the symbolic number to start with
3101 : the largest digit in the highest order byte. Unchanged symbolic
3102 : number indicates a read with same endianness as target architecture. */
3103 23844 : if (n.n != cmpnop && n.n != cmpxchg)
3104 : return false;
3105 :
3106 : /* For now. */
3107 21175 : if (cast64_to_32)
3108 : return false;
3109 :
3110 21175 : if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
3111 : return false;
3112 :
3113 21175 : if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
3114 : merged_store->start, end, first_earlier, first))
3115 : return false;
3116 :
3117 : /* Don't handle memory copy this way if normal non-bswap processing
3118 : would handle it too. */
3119 21173 : if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
3120 : {
3121 : unsigned int i;
3122 61376 : for (i = first; i <= last; ++i)
3123 41156 : if (m_store_info[i]->rhs_code != MEM_REF)
3124 : break;
3125 20547 : if (i == last + 1)
3126 : return false;
3127 : }
3128 :
3129 953 : if (n.n == cmpxchg)
3130 626 : switch (try_size)
3131 : {
3132 : case 16:
3133 : /* Will emit LROTATE_EXPR. */
3134 : break;
3135 182 : case 32:
3136 182 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
3137 306 : && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
3138 : break;
3139 58 : return false;
3140 114 : case 64:
3141 114 : if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
3142 186 : && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
3143 31 : || (word_mode == SImode
3144 31 : && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
3145 31 : && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
3146 : break;
3147 42 : return false;
3148 0 : default:
3149 0 : gcc_unreachable ();
3150 : }
3151 :
3152 853 : if (!allow_unaligned && n.base_addr)
3153 : {
3154 0 : unsigned int align = get_object_alignment (n.src);
3155 0 : if (align < try_size)
3156 : return false;
3157 : }
3158 :
3159 : /* If each load has vuse of the corresponding store, need to verify
3160 : the loads can be sunk right before the last store. */
3161 853 : if (vuse_store == 1)
3162 : {
3163 92 : auto_vec<tree, 64> refs;
3164 321 : for (unsigned int i = first; i <= last; ++i)
3165 229 : gather_bswap_load_refs (&refs,
3166 229 : gimple_assign_rhs1 (m_store_info[i]->stmt));
3167 :
3168 437 : for (tree ref : refs)
3169 180 : if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
3170 19 : return false;
3171 73 : n.vuse = NULL_TREE;
3172 92 : }
3173 :
3174 834 : infof->n = n;
3175 834 : infof->ins_stmt = ins_stmt;
3176 3878 : for (unsigned int i = first; i <= last; ++i)
3177 : {
3178 4426 : m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3179 3044 : m_store_info[i]->ops[0].base_addr = NULL_TREE;
3180 3044 : m_store_info[i]->ops[1].base_addr = NULL_TREE;
3181 3044 : if (i != first)
3182 2210 : merged_store->merge_into (m_store_info[i]);
3183 : }
3184 :
3185 : return true;
3186 : }
3187 :
3188 : /* Go through the candidate stores recorded in m_store_info and merge them
3189 : into merged_store_group objects recorded into m_merged_store_groups
3190 : representing the widened stores. Return true if coalescing was successful
3191 : and the number of widened stores is fewer than the original number
3192 : of stores. */
3193 :
3194 : bool
3195 494918 : imm_store_chain_info::coalesce_immediate_stores ()
3196 : {
3197 : /* Anything less can't be processed. */
3198 621150 : if (m_store_info.length () < 2)
3199 : return false;
3200 :
3201 494918 : if (dump_file && (dump_flags & TDF_DETAILS))
3202 25 : fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3203 : m_store_info.length ());
3204 :
3205 494918 : store_immediate_info *info;
3206 494918 : unsigned int i, ignore = 0;
3207 494918 : unsigned int first_earlier = 0;
3208 494918 : unsigned int end_earlier = 0;
3209 :
3210 : /* Order the stores by the bitposition they write to. */
3211 494918 : m_store_info.qsort (sort_by_bitpos);
3212 :
3213 494918 : info = m_store_info[0];
3214 494918 : merged_store_group *merged_store = new merged_store_group (info);
3215 494918 : if (dump_file && (dump_flags & TDF_DETAILS))
3216 25 : fputs ("New store group\n", dump_file);
3217 :
3218 2122043 : FOR_EACH_VEC_ELT (m_store_info, i, info)
3219 : {
3220 1627125 : unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3221 :
3222 1627125 : if (i <= ignore)
3223 576678 : goto done;
3224 :
3225 : while (first_earlier < end_earlier
3226 1306304 : && (m_store_info[first_earlier]->bitpos
3227 266443 : + m_store_info[first_earlier]->bitsize
3228 266443 : <= merged_store->start))
3229 255857 : first_earlier++;
3230 :
3231 : /* First try to handle group of stores like:
3232 : p[0] = data >> 24;
3233 : p[1] = data >> 16;
3234 : p[2] = data >> 8;
3235 : p[3] = data;
3236 : using the bswap framework. */
3237 1050447 : if (info->bitpos == merged_store->start + merged_store->width
3238 681153 : && merged_store->stores.length () == 1
3239 369965 : && merged_store->stores[0]->ins_stmt != NULL
3240 114639 : && info->lp_nr == merged_store->lp_nr
3241 1165086 : && info->ins_stmt != NULL)
3242 : {
3243 : unsigned int try_size;
3244 305029 : for (try_size = 64; try_size >= 16; try_size >>= 1)
3245 229245 : if (try_coalesce_bswap (merged_store, i - 1, try_size,
3246 : first_earlier))
3247 : break;
3248 :
3249 76618 : if (try_size >= 16)
3250 : {
3251 834 : ignore = i + merged_store->stores.length () - 1;
3252 834 : m_merged_store_groups.safe_push (merged_store);
3253 834 : if (ignore < m_store_info.length ())
3254 : {
3255 324 : merged_store = new merged_store_group (m_store_info[ignore]);
3256 324 : end_earlier = ignore;
3257 : }
3258 : else
3259 510 : merged_store = NULL;
3260 834 : goto done;
3261 : }
3262 : }
3263 :
3264 1049613 : new_bitregion_start
3265 1049613 : = MIN (merged_store->bitregion_start, info->bitregion_start);
3266 1049613 : new_bitregion_end
3267 1049613 : = MAX (merged_store->bitregion_end, info->bitregion_end);
3268 :
3269 1049613 : if (info->order >= merged_store->first_nonmergeable_order
3270 1047800 : || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3271 1047800 : > (unsigned) param_store_merging_max_size))
3272 : ;
3273 :
3274 : /* |---store 1---|
3275 : |---store 2---|
3276 : Overlapping stores. */
3277 1047720 : else if (IN_RANGE (info->bitpos, merged_store->start,
3278 : merged_store->start + merged_store->width - 1)
3279 : /* |---store 1---||---store 2---|
3280 : Handle also the consecutive INTEGER_CST stores case here,
3281 : as we have here the code to deal with overlaps. */
3282 1047720 : || (info->bitregion_start <= merged_store->bitregion_end
3283 687702 : && info->rhs_code == INTEGER_CST
3284 525551 : && merged_store->only_constants
3285 491973 : && merged_store->can_be_merged_into (info)))
3286 : {
3287 : /* Only allow overlapping stores of constants. */
3288 601094 : if (info->rhs_code == INTEGER_CST
3289 594030 : && merged_store->only_constants
3290 593955 : && info->lp_nr == merged_store->lp_nr)
3291 : {
3292 593943 : unsigned int first_order
3293 593943 : = MIN (merged_store->first_order, info->order);
3294 593943 : unsigned int last_order
3295 593943 : = MAX (merged_store->last_order, info->order);
3296 593943 : unsigned HOST_WIDE_INT end
3297 593943 : = MAX (merged_store->start + merged_store->width,
3298 : info->bitpos + info->bitsize);
3299 593943 : if (check_no_overlap (m_store_info, i, true, first_order,
3300 : last_order, merged_store->start, end,
3301 : first_earlier, end_earlier))
3302 : {
3303 : /* check_no_overlap call above made sure there are no
3304 : overlapping stores with non-INTEGER_CST rhs_code
3305 : in between the first and last of the stores we've
3306 : just merged. If there are any INTEGER_CST rhs_code
3307 : stores in between, we need to merge_overlapping them
3308 : even if in the sort_by_bitpos order there are other
3309 : overlapping stores in between. Keep those stores as is.
3310 : Example:
3311 : MEM[(int *)p_28] = 0;
3312 : MEM[(char *)p_28 + 3B] = 1;
3313 : MEM[(char *)p_28 + 1B] = 2;
3314 : MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3315 : We can't merge the zero store with the store of two and
3316 : not merge anything else, because the store of one is
3317 : in the original order in between those two, but in
3318 : store_by_bitpos order it comes after the last store that
3319 : we can't merge with them. We can merge the first 3 stores
3320 : and keep the last store as is though. */
3321 593919 : unsigned int len = m_store_info.length ();
3322 593919 : unsigned int try_order = last_order;
3323 593919 : unsigned int first_nonmergeable_order;
3324 593919 : unsigned int k;
3325 593919 : bool last_iter = false;
3326 593919 : int attempts = 0;
3327 616561 : do
3328 : {
3329 616561 : unsigned int max_order = 0;
3330 616561 : unsigned int min_order = first_order;
3331 616561 : unsigned first_nonmergeable_int_order = ~0U;
3332 616561 : unsigned HOST_WIDE_INT this_end = end;
3333 616561 : unsigned HOST_WIDE_INT this_bitregion_start
3334 : = new_bitregion_start;
3335 616561 : unsigned HOST_WIDE_INT this_bitregion_end
3336 : = new_bitregion_end;
3337 616561 : k = i;
3338 616561 : first_nonmergeable_order = ~0U;
3339 745272 : for (unsigned int j = i + 1; j < len; ++j)
3340 : {
3341 455957 : store_immediate_info *info2 = m_store_info[j];
3342 455957 : if (info2->bitpos >= this_end)
3343 : break;
3344 128714 : if (info2->order < try_order)
3345 : {
3346 80961 : if (info2->rhs_code != INTEGER_CST
3347 80958 : || info2->lp_nr != merged_store->lp_nr)
3348 : {
3349 : /* Normally check_no_overlap makes sure this
3350 : doesn't happen, but if end grows below,
3351 : then we need to process more stores than
3352 : check_no_overlap verified. Example:
3353 : MEM[(int *)p_5] = 0;
3354 : MEM[(short *)p_5 + 3B] = 1;
3355 : MEM[(char *)p_5 + 4B] = _9;
3356 : MEM[(char *)p_5 + 2B] = 2; */
3357 : k = 0;
3358 : break;
3359 : }
3360 80958 : if (info2->bitregion_start
3361 : < this_bitregion_start)
3362 : this_bitregion_start = info2->bitregion_start;
3363 80958 : if (info2->bitregion_end
3364 : > this_bitregion_end)
3365 : this_bitregion_end = info2->bitregion_end;
3366 80958 : if (((this_bitregion_end - this_bitregion_start
3367 80958 : + 1) / BITS_PER_UNIT)
3368 : > (unsigned) param_store_merging_max_size)
3369 : {
3370 : k = 0;
3371 : break;
3372 : }
3373 80958 : k = j;
3374 80958 : min_order = MIN (min_order, info2->order);
3375 80958 : this_end = MAX (this_end,
3376 : info2->bitpos + info2->bitsize);
3377 : }
3378 47753 : else if (info2->rhs_code == INTEGER_CST
3379 44945 : && info2->lp_nr == merged_store->lp_nr
3380 44945 : && !last_iter)
3381 : {
3382 44945 : max_order = MAX (max_order, info2->order + 1);
3383 44945 : first_nonmergeable_int_order
3384 44945 : = MIN (first_nonmergeable_int_order,
3385 : info2->order);
3386 : }
3387 : else
3388 2808 : first_nonmergeable_order
3389 2808 : = MIN (first_nonmergeable_order, info2->order);
3390 : }
3391 616561 : if (k > i
3392 616561 : && !check_no_overlap (m_store_info, len - 1, true,
3393 : min_order, try_order,
3394 : merged_store->start, this_end,
3395 : first_earlier, end_earlier))
3396 : k = 0;
3397 616561 : if (k == 0)
3398 : {
3399 3 : if (last_order == try_order)
3400 : break;
3401 : /* If this failed, but only because we grew
3402 : try_order, retry with the last working one,
3403 : so that we merge at least something. */
3404 0 : try_order = last_order;
3405 0 : last_iter = true;
3406 0 : continue;
3407 : }
3408 616558 : last_order = try_order;
3409 : /* Retry with a larger try_order to see if we could
3410 : merge some further INTEGER_CST stores. */
3411 616558 : if (max_order
3412 616558 : && (first_nonmergeable_int_order
3413 616558 : < first_nonmergeable_order))
3414 : {
3415 22643 : try_order = MIN (max_order,
3416 : first_nonmergeable_order);
3417 22643 : try_order
3418 22643 : = MIN (try_order,
3419 : merged_store->first_nonmergeable_order);
3420 22643 : if (try_order > last_order && ++attempts < 16)
3421 22642 : continue;
3422 : }
3423 593916 : first_nonmergeable_order
3424 593916 : = MIN (first_nonmergeable_order,
3425 : first_nonmergeable_int_order);
3426 : end = this_end;
3427 : break;
3428 : }
3429 : while (1);
3430 :
3431 593919 : if (k != 0)
3432 : {
3433 593916 : merged_store->merge_overlapping (info);
3434 :
3435 593916 : merged_store->first_nonmergeable_order
3436 593916 : = MIN (merged_store->first_nonmergeable_order,
3437 : first_nonmergeable_order);
3438 :
3439 673976 : for (unsigned int j = i + 1; j <= k; j++)
3440 : {
3441 80060 : store_immediate_info *info2 = m_store_info[j];
3442 80060 : gcc_assert (info2->bitpos < end);
3443 80060 : if (info2->order < last_order)
3444 : {
3445 80002 : gcc_assert (info2->rhs_code == INTEGER_CST);
3446 80002 : if (info != info2)
3447 80002 : merged_store->merge_overlapping (info2);
3448 : }
3449 : /* Other stores are kept and not merged in any
3450 : way. */
3451 : }
3452 593916 : ignore = k;
3453 593916 : goto done;
3454 : }
3455 : }
3456 : }
3457 : }
3458 : /* |---store 1---||---store 2---|
3459 : This store is consecutive to the previous one.
3460 : Merge it into the current store group. There can be gaps in between
3461 : the stores, but there can't be gaps in between bitregions. */
3462 446626 : else if (info->bitregion_start <= merged_store->bitregion_end
3463 446626 : && merged_store->can_be_merged_into (info))
3464 : {
3465 134331 : store_immediate_info *infof = merged_store->stores[0];
3466 :
3467 : /* All the rhs_code ops that take 2 operands are commutative,
3468 : swap the operands if it could make the operands compatible. */
3469 134331 : if (infof->ops[0].base_addr
3470 117284 : && infof->ops[1].base_addr
3471 1656 : && info->ops[0].base_addr
3472 1656 : && info->ops[1].base_addr
3473 1656 : && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3474 : info->bitpos - infof->bitpos)
3475 135493 : && operand_equal_p (info->ops[1].base_addr,
3476 : infof->ops[0].base_addr, 0))
3477 : {
3478 955 : std::swap (info->ops[0], info->ops[1]);
3479 955 : info->ops_swapped_p = true;
3480 : }
3481 134331 : if (check_no_overlap (m_store_info, i, false,
3482 134331 : MIN (merged_store->first_order, info->order),
3483 134331 : MAX (merged_store->last_order, info->order),
3484 : merged_store->start,
3485 134331 : MAX (merged_store->start + merged_store->width,
3486 : info->bitpos + info->bitsize),
3487 : first_earlier, end_earlier))
3488 : {
3489 : /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3490 134329 : if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3491 : {
3492 763 : info->rhs_code = BIT_INSERT_EXPR;
3493 763 : info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3494 763 : info->ops[0].base_addr = NULL_TREE;
3495 : }
3496 133566 : else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3497 : {
3498 8 : for (store_immediate_info *infoj : merged_store->stores)
3499 : {
3500 2 : infoj->rhs_code = BIT_INSERT_EXPR;
3501 2 : infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3502 2 : infoj->ops[0].base_addr = NULL_TREE;
3503 : }
3504 2 : merged_store->bit_insertion = true;
3505 : }
3506 134329 : if ((infof->ops[0].base_addr
3507 117280 : ? compatible_load_p (merged_store, info, base_addr, 0)
3508 17049 : : !info->ops[0].base_addr)
3509 270314 : && (infof->ops[1].base_addr
3510 1656 : ? compatible_load_p (merged_store, info, base_addr, 1)
3511 111565 : : !info->ops[1].base_addr))
3512 : {
3513 113214 : merged_store->merge_into (info);
3514 113214 : goto done;
3515 : }
3516 : }
3517 : }
3518 :
3519 : /* |---store 1---| <gap> |---store 2---|.
3520 : Gap between stores or the rhs not compatible. Start a new group. */
3521 :
3522 : /* Try to apply all the stores recorded for the group to determine
3523 : the bitpattern they write and discard it if that fails.
3524 : This will also reject single-store groups. */
3525 342483 : if (merged_store->apply_stores ())
3526 58274 : m_merged_store_groups.safe_push (merged_store);
3527 : else
3528 284209 : delete merged_store;
3529 :
3530 342483 : merged_store = new merged_store_group (info);
3531 342483 : end_earlier = i;
3532 342483 : if (dump_file && (dump_flags & TDF_DETAILS))
3533 1 : fputs ("New store group\n", dump_file);
3534 :
3535 1627125 : done:
3536 1627125 : if (dump_file && (dump_flags & TDF_DETAILS))
3537 : {
3538 227 : fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3539 : " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3540 : i, info->bitsize, info->bitpos);
3541 227 : print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3542 227 : fputc ('\n', dump_file);
3543 : }
3544 : }
3545 :
3546 : /* Record or discard the last store group. */
3547 494918 : if (merged_store)
3548 : {
3549 494408 : if (merged_store->apply_stores ())
3550 342078 : m_merged_store_groups.safe_push (merged_store);
3551 : else
3552 152330 : delete merged_store;
3553 : }
3554 :
3555 1358522 : gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3556 :
3557 494918 : bool success
3558 494918 : = !m_merged_store_groups.is_empty ()
3559 368686 : && m_merged_store_groups.length () < m_store_info.length ();
3560 :
3561 368686 : if (success && dump_file)
3562 94 : fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3563 : m_merged_store_groups.length ());
3564 :
3565 : return success;
3566 : }
3567 :
3568 : /* Return the type to use for the merged stores or loads described by STMTS.
3569 : This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3570 : otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3571 : of the MEM_REFs if any. */
3572 :
3573 : static tree
3574 92139 : get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3575 : unsigned short *cliquep, unsigned short *basep)
3576 : {
3577 92139 : gimple *stmt;
3578 92139 : unsigned int i;
3579 92139 : tree type = NULL_TREE;
3580 92139 : tree ret = NULL_TREE;
3581 92139 : *cliquep = 0;
3582 92139 : *basep = 0;
3583 :
3584 318043 : FOR_EACH_VEC_ELT (stmts, i, stmt)
3585 : {
3586 225904 : tree ref = is_load ? gimple_assign_rhs1 (stmt)
3587 221287 : : gimple_assign_lhs (stmt);
3588 225904 : tree type1 = reference_alias_ptr_type (ref);
3589 225904 : tree base = get_base_address (ref);
3590 :
3591 225904 : if (i == 0)
3592 : {
3593 92139 : if (TREE_CODE (base) == MEM_REF)
3594 : {
3595 12588 : *cliquep = MR_DEPENDENCE_CLIQUE (base);
3596 12588 : *basep = MR_DEPENDENCE_BASE (base);
3597 : }
3598 92139 : ret = type = type1;
3599 92139 : continue;
3600 : }
3601 133765 : if (!alias_ptr_types_compatible_p (type, type1))
3602 82217 : ret = ptr_type_node;
3603 133765 : if (TREE_CODE (base) != MEM_REF
3604 17320 : || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3605 149649 : || *basep != MR_DEPENDENCE_BASE (base))
3606 : {
3607 117881 : *cliquep = 0;
3608 117881 : *basep = 0;
3609 : }
3610 : }
3611 92139 : return ret;
3612 : }
3613 :
3614 : /* Return the location_t information we can find among the statements
3615 : in STMTS. */
3616 :
3617 : static location_t
3618 92176 : get_location_for_stmts (vec<gimple *> &stmts)
3619 : {
3620 280241 : for (gimple *stmt : stmts)
3621 94303 : if (gimple_has_location (stmt))
3622 90590 : return gimple_location (stmt);
3623 :
3624 : return UNKNOWN_LOCATION;
3625 : }
3626 :
3627 : /* Used to decribe a store resulting from splitting a wide store in smaller
3628 : regularly-sized stores in split_group. */
3629 :
3630 906752 : class split_store
3631 : {
3632 : public:
3633 : unsigned HOST_WIDE_INT bytepos;
3634 : unsigned HOST_WIDE_INT size;
3635 : unsigned HOST_WIDE_INT align;
3636 : auto_vec<store_immediate_info *> orig_stores;
3637 : /* True if there is a single orig stmt covering the whole split store. */
3638 : bool orig;
3639 : split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3640 : unsigned HOST_WIDE_INT);
3641 : };
3642 :
3643 : /* Simple constructor. */
3644 :
3645 906752 : split_store::split_store (unsigned HOST_WIDE_INT bp,
3646 : unsigned HOST_WIDE_INT sz,
3647 906752 : unsigned HOST_WIDE_INT al)
3648 906752 : : bytepos (bp), size (sz), align (al), orig (false)
3649 : {
3650 906752 : orig_stores.create (0);
3651 0 : }
3652 :
3653 : /* Record all stores in GROUP that write to the region starting at BITPOS and
3654 : is of size BITSIZE. Record infos for such statements in STORES if
3655 : non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3656 : if there is exactly one original store in the range (in that case ignore
3657 : clobber stmts, unless there are only clobber stmts). */
3658 :
3659 : static store_immediate_info *
3660 7198103 : find_constituent_stores (class merged_store_group *group,
3661 : vec<store_immediate_info *> *stores,
3662 : unsigned int *first,
3663 : unsigned HOST_WIDE_INT bitpos,
3664 : unsigned HOST_WIDE_INT bitsize)
3665 : {
3666 7198103 : store_immediate_info *info, *ret = NULL;
3667 7198103 : unsigned int i;
3668 7198103 : bool second = false;
3669 7198103 : bool update_first = true;
3670 7198103 : unsigned HOST_WIDE_INT end = bitpos + bitsize;
3671 18864241 : for (i = *first; group->stores.iterate (i, &info); ++i)
3672 : {
3673 15796661 : unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3674 15796661 : unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3675 15796661 : if (stmt_end <= bitpos)
3676 : {
3677 : /* BITPOS passed to this function never decreases from within the
3678 : same split_group call, so optimize and don't scan info records
3679 : which are known to end before or at BITPOS next time.
3680 : Only do it if all stores before this one also pass this. */
3681 4436826 : if (update_first)
3682 2045716 : *first = i + 1;
3683 4436826 : continue;
3684 : }
3685 : else
3686 11359835 : update_first = false;
3687 :
3688 : /* The stores in GROUP are ordered by bitposition so if we're past
3689 : the region for this group return early. */
3690 11359835 : if (stmt_start >= end)
3691 : return ret;
3692 :
3693 7599756 : if (gimple_clobber_p (info->stmt))
3694 : {
3695 713270 : if (stores)
3696 34858 : stores->safe_push (info);
3697 713270 : if (ret == NULL)
3698 687510 : ret = info;
3699 713270 : continue;
3700 : }
3701 6886486 : if (stores)
3702 : {
3703 1043107 : stores->safe_push (info);
3704 1043107 : if (ret && !gimple_clobber_p (ret->stmt))
3705 : {
3706 : ret = NULL;
3707 : second = true;
3708 : }
3709 : }
3710 5843379 : else if (ret && !gimple_clobber_p (ret->stmt))
3711 : return NULL;
3712 6417544 : if (!second)
3713 6377922 : ret = info;
3714 : }
3715 : return ret;
3716 : }
3717 :
3718 : /* Return how many SSA_NAMEs used to compute value to store in the INFO
3719 : store have multiple uses. If any SSA_NAME has multiple uses, also
3720 : count statements needed to compute it. */
3721 :
3722 : static unsigned
3723 1133841 : count_multiple_uses (store_immediate_info *info)
3724 : {
3725 1133841 : gimple *stmt = info->stmt;
3726 1133841 : unsigned ret = 0;
3727 1133841 : switch (info->rhs_code)
3728 : {
3729 : case INTEGER_CST:
3730 : case STRING_CST:
3731 : return 0;
3732 6229 : case BIT_AND_EXPR:
3733 6229 : case BIT_IOR_EXPR:
3734 6229 : case BIT_XOR_EXPR:
3735 6229 : if (info->bit_not_p)
3736 : {
3737 65 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3738 : ret = 1; /* Fall through below to return
3739 : the BIT_NOT_EXPR stmt and then
3740 : BIT_{AND,IOR,XOR}_EXPR and anything it
3741 : uses. */
3742 : else
3743 : /* stmt is after this the BIT_NOT_EXPR. */
3744 65 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3745 : }
3746 6229 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3747 : {
3748 26 : ret += 1 + info->ops[0].bit_not_p;
3749 26 : if (info->ops[1].base_addr)
3750 26 : ret += 1 + info->ops[1].bit_not_p;
3751 26 : return ret + 1;
3752 : }
3753 6203 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3754 : /* stmt is now the BIT_*_EXPR. */
3755 6203 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3756 3760 : ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3757 2443 : else if (info->ops[info->ops_swapped_p].bit_not_p)
3758 : {
3759 171 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3760 171 : if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3761 0 : ++ret;
3762 : }
3763 6203 : if (info->ops[1].base_addr == NULL_TREE)
3764 : {
3765 313 : gcc_checking_assert (!info->ops_swapped_p);
3766 : return ret;
3767 : }
3768 5890 : if (!has_single_use (gimple_assign_rhs2 (stmt)))
3769 1772 : ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3770 4118 : else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3771 : {
3772 19 : gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3773 19 : if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3774 0 : ++ret;
3775 : }
3776 : return ret;
3777 253077 : case MEM_REF:
3778 253077 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3779 163894 : return 1 + info->ops[0].bit_not_p;
3780 89183 : else if (info->ops[0].bit_not_p)
3781 : {
3782 124 : stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3783 124 : if (!has_single_use (gimple_assign_rhs1 (stmt)))
3784 : return 1;
3785 : }
3786 : return 0;
3787 13895 : case BIT_INSERT_EXPR:
3788 13895 : return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3789 0 : default:
3790 0 : gcc_unreachable ();
3791 : }
3792 : }
3793 :
3794 : /* Split a merged store described by GROUP by populating the SPLIT_STORES
3795 : vector (if non-NULL) with split_store structs describing the byte offset
3796 : (from the base), the bit size and alignment of each store as well as the
3797 : original statements involved in each such split group.
3798 : This is to separate the splitting strategy from the statement
3799 : building/emission/linking done in output_merged_store.
3800 : Return number of new stores.
3801 : If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3802 : If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3803 : BZERO_FIRST may be true only when the first store covers the whole group
3804 : and clears it; if BZERO_FIRST is true, keep that first store in the set
3805 : unmodified and emit further stores for the overrides only.
3806 : If SPLIT_STORES is NULL, it is just a dry run to count number of
3807 : new stores. */
3808 :
3809 : static unsigned int
3810 1009191 : split_group (merged_store_group *group, bool allow_unaligned_store,
3811 : bool allow_unaligned_load, bool bzero_first,
3812 : vec<split_store *> *split_stores,
3813 : unsigned *total_orig,
3814 : unsigned *total_new)
3815 : {
3816 1009191 : unsigned HOST_WIDE_INT pos = group->bitregion_start;
3817 1009191 : unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3818 1009191 : unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3819 1009191 : unsigned HOST_WIDE_INT group_align = group->align;
3820 1009191 : unsigned HOST_WIDE_INT align_base = group->align_base;
3821 1009191 : unsigned HOST_WIDE_INT group_load_align = group_align;
3822 1009191 : bool any_orig = false;
3823 :
3824 1009191 : gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3825 :
3826 : /* For bswap framework using sets of stores, all the checking has been done
3827 : earlier in try_coalesce_bswap and the result always needs to be emitted
3828 : as a single store. Likewise for string concatenation. */
3829 1009191 : if (group->stores[0]->rhs_code == LROTATE_EXPR
3830 1007670 : || group->stores[0]->rhs_code == NOP_EXPR
3831 2015880 : || group->string_concatenation)
3832 : {
3833 2703 : gcc_assert (!bzero_first);
3834 2703 : if (total_orig)
3835 : {
3836 : /* Avoid the old/new stmt count heuristics. It should be
3837 : always beneficial. */
3838 901 : total_new[0] = 1;
3839 901 : total_orig[0] = 2;
3840 : }
3841 :
3842 2703 : if (split_stores)
3843 : {
3844 901 : unsigned HOST_WIDE_INT align_bitpos
3845 901 : = (group->start - align_base) & (group_align - 1);
3846 901 : unsigned HOST_WIDE_INT align = group_align;
3847 901 : if (align_bitpos)
3848 120 : align = least_bit_hwi (align_bitpos);
3849 901 : bytepos = group->start / BITS_PER_UNIT;
3850 901 : split_store *store
3851 901 : = new split_store (bytepos, group->width, align);
3852 901 : unsigned int first = 0;
3853 901 : find_constituent_stores (group, &store->orig_stores,
3854 : &first, group->start, group->width);
3855 901 : split_stores->safe_push (store);
3856 : }
3857 :
3858 2703 : return 1;
3859 : }
3860 :
3861 1006488 : unsigned int ret = 0, first = 0;
3862 1006488 : unsigned HOST_WIDE_INT try_pos = bytepos;
3863 :
3864 1006488 : if (total_orig)
3865 : {
3866 328264 : unsigned int i;
3867 328264 : store_immediate_info *info = group->stores[0];
3868 :
3869 328264 : total_new[0] = 0;
3870 328264 : total_orig[0] = 1; /* The orig store. */
3871 328264 : info = group->stores[0];
3872 328264 : if (info->ops[0].base_addr)
3873 72438 : total_orig[0]++;
3874 328264 : if (info->ops[1].base_addr)
3875 1492 : total_orig[0]++;
3876 328264 : switch (info->rhs_code)
3877 : {
3878 1586 : case BIT_AND_EXPR:
3879 1586 : case BIT_IOR_EXPR:
3880 1586 : case BIT_XOR_EXPR:
3881 1586 : total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3882 1586 : break;
3883 : default:
3884 : break;
3885 : }
3886 328264 : total_orig[0] *= group->stores.length ();
3887 :
3888 1371182 : FOR_EACH_VEC_ELT (group->stores, i, info)
3889 : {
3890 1042918 : total_new[0] += count_multiple_uses (info);
3891 1042918 : total_orig[0] += (info->bit_not_p
3892 1042918 : + info->ops[0].bit_not_p
3893 1042918 : + info->ops[1].bit_not_p);
3894 : }
3895 : }
3896 :
3897 1006488 : if (!allow_unaligned_load)
3898 0 : for (int i = 0; i < 2; ++i)
3899 0 : if (group->load_align[i])
3900 0 : group_load_align = MIN (group_load_align, group->load_align[i]);
3901 :
3902 1006488 : if (bzero_first)
3903 : {
3904 : store_immediate_info *gstore;
3905 24489 : FOR_EACH_VEC_ELT (group->stores, first, gstore)
3906 24489 : if (!gimple_clobber_p (gstore->stmt))
3907 : break;
3908 23457 : ++first;
3909 23457 : ret = 1;
3910 23457 : if (split_stores)
3911 : {
3912 1761 : split_store *store
3913 1761 : = new split_store (bytepos, gstore->bitsize, align_base);
3914 1761 : store->orig_stores.safe_push (gstore);
3915 1761 : store->orig = true;
3916 1761 : any_orig = true;
3917 1761 : split_stores->safe_push (store);
3918 : }
3919 : }
3920 :
3921 5119565 : while (size > 0)
3922 : {
3923 4113077 : if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3924 1793271 : && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3925 1776952 : || (bzero_first && group->val[try_pos - bytepos] == 0)))
3926 : {
3927 : /* Skip padding bytes. */
3928 612142 : ++try_pos;
3929 612142 : size -= BITS_PER_UNIT;
3930 612142 : continue;
3931 : }
3932 :
3933 3500935 : unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3934 3500935 : unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3935 3500935 : unsigned HOST_WIDE_INT align_bitpos
3936 3500935 : = (try_bitpos - align_base) & (group_align - 1);
3937 3500935 : unsigned HOST_WIDE_INT align = group_align;
3938 3500935 : bool found_orig = false;
3939 3500935 : if (align_bitpos)
3940 932556 : align = least_bit_hwi (align_bitpos);
3941 3500935 : if (!allow_unaligned_store)
3942 2359264 : try_size = MIN (try_size, align);
3943 3500935 : if (!allow_unaligned_load)
3944 : {
3945 : /* If we can't do or don't want to do unaligned stores
3946 : as well as loads, we need to take the loads into account
3947 : as well. */
3948 0 : unsigned HOST_WIDE_INT load_align = group_load_align;
3949 0 : align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3950 0 : if (align_bitpos)
3951 0 : load_align = least_bit_hwi (align_bitpos);
3952 0 : for (int i = 0; i < 2; ++i)
3953 0 : if (group->load_align[i])
3954 : {
3955 0 : align_bitpos
3956 0 : = known_alignment (try_bitpos
3957 0 : - group->stores[0]->bitpos
3958 0 : + group->stores[0]->ops[i].bitpos
3959 0 : - group->load_align_base[i]);
3960 0 : if (align_bitpos & (group_load_align - 1))
3961 : {
3962 0 : unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3963 0 : load_align = MIN (load_align, a);
3964 : }
3965 : }
3966 0 : try_size = MIN (try_size, load_align);
3967 : }
3968 3500935 : store_immediate_info *info
3969 3500935 : = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3970 3500935 : if (info && !gimple_clobber_p (info->stmt))
3971 : {
3972 : /* If there is just one original statement for the range, see if
3973 : we can just reuse the original store which could be even larger
3974 : than try_size. */
3975 2570316 : unsigned HOST_WIDE_INT stmt_end
3976 2570316 : = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3977 2570316 : info = find_constituent_stores (group, NULL, &first, try_bitpos,
3978 : stmt_end - try_bitpos);
3979 2570316 : if (info && info->bitpos >= try_bitpos)
3980 : {
3981 2384712 : store_immediate_info *info2 = NULL;
3982 2384712 : unsigned int first_copy = first;
3983 2384712 : if (info->bitpos > try_bitpos
3984 6279 : && stmt_end - try_bitpos <= try_size)
3985 : {
3986 6037 : info2 = find_constituent_stores (group, NULL, &first_copy,
3987 : try_bitpos,
3988 : info->bitpos - try_bitpos);
3989 6037 : gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3990 : }
3991 2384298 : if (info2 == NULL && stmt_end - try_bitpos < try_size)
3992 : {
3993 431648 : info2 = find_constituent_stores (group, NULL, &first_copy,
3994 : stmt_end,
3995 215824 : (try_bitpos + try_size)
3996 : - stmt_end);
3997 215824 : gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3998 : }
3999 : if (info2 == NULL)
4000 : {
4001 2364033 : try_size = stmt_end - try_bitpos;
4002 2364033 : found_orig = true;
4003 2364033 : goto found;
4004 : }
4005 : }
4006 : }
4007 :
4008 : /* Approximate store bitsize for the case when there are no padding
4009 : bits. */
4010 1222706 : while (try_size > size)
4011 85804 : try_size /= 2;
4012 : /* Now look for whole padding bytes at the end of that bitsize. */
4013 2206823 : for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
4014 2038968 : if (group->mask[try_pos - bytepos + nonmasked - 1]
4015 : != (unsigned char) ~0U
4016 1989212 : && (!bzero_first
4017 1027961 : || group->val[try_pos - bytepos + nonmasked - 1] != 0))
4018 : break;
4019 1136902 : if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
4020 : {
4021 : /* If entire try_size range is padding, skip it. */
4022 598608 : try_pos += try_size / BITS_PER_UNIT;
4023 598608 : size -= try_size;
4024 598608 : continue;
4025 : }
4026 : /* Otherwise try to decrease try_size if second half, last 3 quarters
4027 : etc. are padding. */
4028 538294 : nonmasked *= BITS_PER_UNIT;
4029 549279 : while (nonmasked <= try_size / 2)
4030 : try_size /= 2;
4031 538294 : if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
4032 : {
4033 : /* Now look for whole padding bytes at the start of that bitsize. */
4034 308533 : unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
4035 318165 : for (masked = 0; masked < try_bytesize; ++masked)
4036 318165 : if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
4037 315867 : && (!bzero_first
4038 11104 : || group->val[try_pos - bytepos + masked] != 0))
4039 : break;
4040 308533 : masked *= BITS_PER_UNIT;
4041 308533 : gcc_assert (masked < try_size);
4042 308533 : if (masked >= try_size / 2)
4043 : {
4044 5170 : while (masked >= try_size / 2)
4045 : {
4046 2689 : try_size /= 2;
4047 2689 : try_pos += try_size / BITS_PER_UNIT;
4048 2689 : size -= try_size;
4049 2689 : masked -= try_size;
4050 : }
4051 : /* Need to recompute the alignment, so just retry at the new
4052 : position. */
4053 2481 : continue;
4054 : }
4055 : }
4056 :
4057 229761 : found:
4058 2899846 : ++ret;
4059 :
4060 2899846 : if (split_stores)
4061 : {
4062 904090 : split_store *store
4063 904090 : = new split_store (try_pos, try_size, align);
4064 904090 : info = find_constituent_stores (group, &store->orig_stores,
4065 : &first, try_bitpos, try_size);
4066 904090 : if (info
4067 806496 : && !gimple_clobber_p (info->stmt)
4068 806489 : && info->bitpos >= try_bitpos
4069 796982 : && info->bitpos + info->bitsize <= try_bitpos + try_size
4070 1699411 : && (store->orig_stores.length () == 1
4071 31378 : || found_orig
4072 6800 : || (info->bitpos == try_bitpos
4073 6702 : && (info->bitpos + info->bitsize
4074 : == try_bitpos + try_size))))
4075 : {
4076 788521 : store->orig = true;
4077 788521 : any_orig = true;
4078 : }
4079 904090 : split_stores->safe_push (store);
4080 : }
4081 :
4082 2899846 : try_pos += try_size / BITS_PER_UNIT;
4083 2899846 : size -= try_size;
4084 : }
4085 :
4086 1006488 : if (total_orig)
4087 : {
4088 328264 : unsigned int i;
4089 328264 : split_store *store;
4090 : /* If we are reusing some original stores and any of the
4091 : original SSA_NAMEs had multiple uses, we need to subtract
4092 : those now before we add the new ones. */
4093 328264 : if (total_new[0] && any_orig)
4094 : {
4095 133282 : FOR_EACH_VEC_ELT (*split_stores, i, store)
4096 91294 : if (store->orig)
4097 90923 : total_new[0] -= count_multiple_uses (store->orig_stores[0]);
4098 : }
4099 328264 : total_new[0] += ret; /* The new store. */
4100 328264 : store_immediate_info *info = group->stores[0];
4101 328264 : if (info->ops[0].base_addr)
4102 72438 : total_new[0] += ret;
4103 328264 : if (info->ops[1].base_addr)
4104 1492 : total_new[0] += ret;
4105 328264 : switch (info->rhs_code)
4106 : {
4107 1586 : case BIT_AND_EXPR:
4108 1586 : case BIT_IOR_EXPR:
4109 1586 : case BIT_XOR_EXPR:
4110 1586 : total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
4111 1586 : break;
4112 : default:
4113 : break;
4114 : }
4115 1234115 : FOR_EACH_VEC_ELT (*split_stores, i, store)
4116 : {
4117 905851 : unsigned int j;
4118 905851 : bool bit_not_p[3] = { false, false, false };
4119 : /* If all orig_stores have certain bit_not_p set, then
4120 : we'd use a BIT_NOT_EXPR stmt and need to account for it.
4121 : If some orig_stores have certain bit_not_p set, then
4122 : we'd use a BIT_XOR_EXPR with a mask and need to account for
4123 : it. */
4124 1982109 : FOR_EACH_VEC_ELT (store->orig_stores, j, info)
4125 : {
4126 1076258 : if (info->ops[0].bit_not_p)
4127 297 : bit_not_p[0] = true;
4128 1076258 : if (info->ops[1].bit_not_p)
4129 17 : bit_not_p[1] = true;
4130 1076258 : if (info->bit_not_p)
4131 65 : bit_not_p[2] = true;
4132 : }
4133 905851 : total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
4134 : }
4135 :
4136 : }
4137 :
4138 : return ret;
4139 : }
4140 :
4141 : /* Return the operation through which the operand IDX (if < 2) or
4142 : result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
4143 : is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
4144 : the bits should be xored with mask. */
4145 :
4146 : static enum tree_code
4147 2682 : invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
4148 : {
4149 2682 : unsigned int i;
4150 2682 : store_immediate_info *info;
4151 2682 : unsigned int cnt = 0;
4152 2682 : bool any_paddings = false;
4153 10491 : FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4154 : {
4155 7809 : bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4156 7809 : if (bit_not_p)
4157 : {
4158 89 : ++cnt;
4159 89 : tree lhs = gimple_assign_lhs (info->stmt);
4160 178 : if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4161 178 : && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
4162 : any_paddings = true;
4163 : }
4164 : }
4165 2682 : mask = NULL_TREE;
4166 2682 : if (cnt == 0)
4167 : return NOP_EXPR;
4168 66 : if (cnt == split_store->orig_stores.length () && !any_paddings)
4169 : return BIT_NOT_EXPR;
4170 :
4171 26 : unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
4172 26 : unsigned buf_size = split_store->size / BITS_PER_UNIT;
4173 26 : unsigned char *buf
4174 26 : = XALLOCAVEC (unsigned char, buf_size);
4175 26 : memset (buf, ~0U, buf_size);
4176 106 : FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4177 : {
4178 80 : bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4179 80 : if (!bit_not_p)
4180 27 : continue;
4181 : /* Clear regions with bit_not_p and invert afterwards, rather than
4182 : clear regions with !bit_not_p, so that gaps in between stores aren't
4183 : set in the mask. */
4184 53 : unsigned HOST_WIDE_INT bitsize = info->bitsize;
4185 53 : unsigned HOST_WIDE_INT prec = bitsize;
4186 53 : unsigned int pos_in_buffer = 0;
4187 53 : if (any_paddings)
4188 : {
4189 36 : tree lhs = gimple_assign_lhs (info->stmt);
4190 72 : if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4191 72 : && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4192 36 : prec = TYPE_PRECISION (TREE_TYPE (lhs));
4193 : }
4194 53 : if (info->bitpos < try_bitpos)
4195 : {
4196 0 : gcc_assert (info->bitpos + bitsize > try_bitpos);
4197 0 : if (!BYTES_BIG_ENDIAN)
4198 : {
4199 0 : if (prec <= try_bitpos - info->bitpos)
4200 0 : continue;
4201 0 : prec -= try_bitpos - info->bitpos;
4202 : }
4203 0 : bitsize -= try_bitpos - info->bitpos;
4204 0 : if (BYTES_BIG_ENDIAN && prec > bitsize)
4205 : prec = bitsize;
4206 : }
4207 : else
4208 53 : pos_in_buffer = info->bitpos - try_bitpos;
4209 53 : if (prec < bitsize)
4210 : {
4211 : /* If this is a bool inversion, invert just the least significant
4212 : prec bits rather than all bits of it. */
4213 : if (BYTES_BIG_ENDIAN)
4214 : {
4215 : pos_in_buffer += bitsize - prec;
4216 : if (pos_in_buffer >= split_store->size)
4217 : continue;
4218 : }
4219 : bitsize = prec;
4220 : }
4221 53 : if (pos_in_buffer + bitsize > split_store->size)
4222 0 : bitsize = split_store->size - pos_in_buffer;
4223 53 : unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4224 53 : if (BYTES_BIG_ENDIAN)
4225 : clear_bit_region_be (p, (BITS_PER_UNIT - 1
4226 : - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4227 : else
4228 53 : clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4229 : }
4230 182 : for (unsigned int i = 0; i < buf_size; ++i)
4231 156 : buf[i] = ~buf[i];
4232 26 : mask = native_interpret_expr (int_type, buf, buf_size);
4233 26 : return BIT_XOR_EXPR;
4234 : }
4235 :
4236 : /* Given a merged store group GROUP output the widened version of it.
4237 : The store chain is against the base object BASE.
4238 : Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4239 : unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4240 : Make sure that the number of statements output is less than the number of
4241 : original statements. If a better sequence is possible emit it and
4242 : return true. */
4243 :
4244 : bool
4245 401186 : imm_store_chain_info::output_merged_store (merged_store_group *group)
4246 : {
4247 401186 : const unsigned HOST_WIDE_INT start_byte_pos
4248 401186 : = group->bitregion_start / BITS_PER_UNIT;
4249 473207 : unsigned int orig_num_stmts = group->stores.length ();
4250 401186 : if (orig_num_stmts < 2)
4251 : return false;
4252 :
4253 401186 : bool allow_unaligned_store
4254 401186 : = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4255 401186 : bool allow_unaligned_load = allow_unaligned_store;
4256 401186 : bool bzero_first = false;
4257 401186 : store_immediate_info *store;
4258 401186 : unsigned int num_clobber_stmts = 0;
4259 401186 : if (group->stores[0]->rhs_code == INTEGER_CST)
4260 : {
4261 : unsigned int i;
4262 486279 : FOR_EACH_VEC_ELT (group->stores, i, store)
4263 414258 : if (gimple_clobber_p (store->stmt))
4264 162765 : num_clobber_stmts++;
4265 251493 : else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4266 15390 : && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4267 15390 : && group->start == store->bitpos
4268 14966 : && group->width == store->bitsize
4269 10848 : && (group->start % BITS_PER_UNIT) == 0
4270 262341 : && (group->width % BITS_PER_UNIT) == 0)
4271 : {
4272 : bzero_first = true;
4273 : break;
4274 : }
4275 : else
4276 : break;
4277 1161064 : FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4278 837550 : if (gimple_clobber_p (store->stmt))
4279 1915 : num_clobber_stmts++;
4280 323514 : if (num_clobber_stmts == orig_num_stmts)
4281 : return false;
4282 251493 : orig_num_stmts -= num_clobber_stmts;
4283 : }
4284 329165 : if (allow_unaligned_store || bzero_first)
4285 : {
4286 : /* If unaligned stores are allowed, see how many stores we'd emit
4287 : for unaligned and how many stores we'd emit for aligned stores.
4288 : Only use unaligned stores if it allows fewer stores than aligned.
4289 : Similarly, if there is a whole region clear first, prefer expanding
4290 : it together compared to expanding clear first followed by merged
4291 : further stores. */
4292 329165 : unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4293 329165 : int pass_min = 0;
4294 1645825 : for (int pass = 0; pass < 4; ++pass)
4295 : {
4296 1316660 : if (!allow_unaligned_store && (pass & 1) != 0)
4297 0 : continue;
4298 1316660 : if (!bzero_first && (pass & 2) != 0)
4299 636634 : continue;
4300 1360052 : cnt[pass] = split_group (group, (pass & 1) != 0,
4301 680026 : allow_unaligned_load, (pass & 2) != 0,
4302 : NULL, NULL, NULL);
4303 680026 : if (cnt[pass] < cnt[pass_min])
4304 1316660 : pass_min = pass;
4305 : }
4306 329165 : if ((pass_min & 1) == 0)
4307 321916 : allow_unaligned_store = false;
4308 329165 : if ((pass_min & 2) == 0)
4309 327404 : bzero_first = false;
4310 : }
4311 :
4312 329165 : auto_vec<class split_store *, 32> split_stores;
4313 329165 : split_store *split_store;
4314 329165 : unsigned total_orig, total_new, i;
4315 329165 : split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4316 : &split_stores, &total_orig, &total_new);
4317 :
4318 : /* Determine if there is a clobber covering the whole group at the start,
4319 : followed by proposed split stores that cover the whole group. In that
4320 : case, prefer the transformation even if
4321 : split_stores.length () == orig_num_stmts. */
4322 329165 : bool clobber_first = false;
4323 329165 : if (num_clobber_stmts
4324 18904 : && gimple_clobber_p (group->stores[0]->stmt)
4325 18418 : && group->start == group->stores[0]->bitpos
4326 18418 : && group->width == group->stores[0]->bitsize
4327 18228 : && (group->start % BITS_PER_UNIT) == 0
4328 347393 : && (group->width % BITS_PER_UNIT) == 0)
4329 : {
4330 18228 : clobber_first = true;
4331 18228 : unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4332 45645 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4333 32735 : if (split_store->bytepos != pos)
4334 : {
4335 : clobber_first = false;
4336 : break;
4337 : }
4338 : else
4339 27417 : pos += split_store->size / BITS_PER_UNIT;
4340 18228 : if (pos != (group->start + group->width) / BITS_PER_UNIT)
4341 324234 : clobber_first = false;
4342 : }
4343 :
4344 658330 : if (split_stores.length () >= orig_num_stmts + clobber_first)
4345 : {
4346 :
4347 : /* We didn't manage to reduce the number of statements. Bail out. */
4348 243514 : if (dump_file && (dump_flags & TDF_DETAILS))
4349 2 : fprintf (dump_file, "Exceeded original number of stmts (%u)."
4350 : " Not profitable to emit new sequence.\n",
4351 : orig_num_stmts);
4352 849911 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4353 1212794 : delete split_store;
4354 : return false;
4355 : }
4356 85651 : if (total_orig <= total_new)
4357 : {
4358 : /* If number of estimated new statements is above estimated original
4359 : statements, bail out too. */
4360 1801 : if (dump_file && (dump_flags & TDF_DETAILS))
4361 0 : fprintf (dump_file, "Estimated number of original stmts (%u)"
4362 : " not larger than estimated number of new"
4363 : " stmts (%u).\n",
4364 : total_orig, total_new);
4365 3800 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4366 3998 : delete split_store;
4367 : return false;
4368 : }
4369 83850 : if (group->stores[0]->rhs_code == INTEGER_CST)
4370 : {
4371 163233 : bool all_orig = true;
4372 163233 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4373 159449 : if (!split_store->orig)
4374 : {
4375 : all_orig = false;
4376 : break;
4377 : }
4378 78062 : if (all_orig)
4379 : {
4380 : unsigned int cnt = split_stores.length ();
4381 : store_immediate_info *store;
4382 15512 : FOR_EACH_VEC_ELT (group->stores, i, store)
4383 11728 : if (gimple_clobber_p (store->stmt))
4384 3802 : ++cnt;
4385 : /* Punt if we wouldn't make any real changes, i.e. keep all
4386 : orig stmts + all clobbers. */
4387 3784 : if (cnt == group->stores.length ())
4388 : {
4389 3774 : if (dump_file && (dump_flags & TDF_DETAILS))
4390 0 : fprintf (dump_file, "Exceeded original number of stmts (%u)."
4391 : " Not profitable to emit new sequence.\n",
4392 : orig_num_stmts);
4393 11679 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4394 15810 : delete split_store;
4395 249089 : return false;
4396 : }
4397 : }
4398 : }
4399 :
4400 80076 : gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4401 80076 : gimple_seq seq = NULL;
4402 80076 : tree last_vdef, new_vuse;
4403 80076 : last_vdef = gimple_vdef (group->last_stmt);
4404 80076 : new_vuse = gimple_vuse (group->last_stmt);
4405 80076 : tree bswap_res = NULL_TREE;
4406 :
4407 : /* Clobbers are not removed. */
4408 80076 : if (gimple_clobber_p (group->last_stmt))
4409 : {
4410 7 : new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4411 7 : gimple_set_vdef (group->last_stmt, new_vuse);
4412 : }
4413 :
4414 80076 : if (group->stores[0]->rhs_code == LROTATE_EXPR
4415 80076 : || group->stores[0]->rhs_code == NOP_EXPR)
4416 : {
4417 834 : tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4418 834 : gimple *ins_stmt = group->stores[0]->ins_stmt;
4419 834 : struct symbolic_number *n = &group->stores[0]->n;
4420 834 : bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4421 :
4422 834 : switch (n->range)
4423 : {
4424 420 : case 16:
4425 420 : load_type = bswap_type = uint16_type_node;
4426 420 : break;
4427 219 : case 32:
4428 219 : load_type = uint32_type_node;
4429 219 : if (bswap)
4430 : {
4431 109 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4432 109 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4433 : }
4434 : break;
4435 195 : case 64:
4436 195 : load_type = uint64_type_node;
4437 195 : if (bswap)
4438 : {
4439 72 : fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4440 72 : bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4441 : }
4442 : break;
4443 0 : default:
4444 0 : gcc_unreachable ();
4445 : }
4446 :
4447 : /* If the loads have each vuse of the corresponding store,
4448 : we've checked the aliasing already in try_coalesce_bswap and
4449 : we want to sink the need load into seq. So need to use new_vuse
4450 : on the load. */
4451 834 : if (n->base_addr)
4452 : {
4453 421 : if (n->vuse == NULL)
4454 : {
4455 73 : n->vuse = new_vuse;
4456 73 : ins_stmt = NULL;
4457 : }
4458 : else
4459 : /* Update vuse in case it has changed by output_merged_stores. */
4460 696 : n->vuse = gimple_vuse (ins_stmt);
4461 : }
4462 834 : bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4463 : bswap_type, load_type, n, bswap,
4464 : ~(uint64_t) 0, 0);
4465 834 : gcc_assert (bswap_res);
4466 : }
4467 :
4468 80076 : gimple *stmt = NULL;
4469 160152 : auto_vec<gimple *, 32> orig_stmts;
4470 80076 : gimple_seq this_seq;
4471 80076 : tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4472 : is_gimple_mem_ref_addr, NULL_TREE);
4473 80076 : gimple_seq_add_seq_without_update (&seq, this_seq);
4474 :
4475 80076 : tree load_addr[2] = { NULL_TREE, NULL_TREE };
4476 80076 : gimple_seq load_seq[2] = { NULL, NULL };
4477 80076 : gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4478 240228 : for (int j = 0; j < 2; ++j)
4479 : {
4480 160152 : store_operand_info &op = group->stores[0]->ops[j];
4481 160152 : if (op.base_addr == NULL_TREE)
4482 158672 : continue;
4483 :
4484 1480 : store_immediate_info *infol = group->stores.last ();
4485 4440 : if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4486 : {
4487 : /* We can't pick the location randomly; while we've verified
4488 : all the loads have the same vuse, they can be still in different
4489 : basic blocks and we need to pick the one from the last bb:
4490 : int x = q[0];
4491 : if (x == N) return;
4492 : int y = q[1];
4493 : p[0] = x;
4494 : p[1] = y;
4495 : otherwise if we put the wider load at the q[0] load, we might
4496 : segfault if q[1] is not mapped. */
4497 620 : basic_block bb = gimple_bb (op.stmt);
4498 620 : gimple *ostmt = op.stmt;
4499 620 : store_immediate_info *info;
4500 3606 : FOR_EACH_VEC_ELT (group->stores, i, info)
4501 : {
4502 2986 : gimple *tstmt = info->ops[j].stmt;
4503 2986 : basic_block tbb = gimple_bb (tstmt);
4504 2986 : if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4505 : {
4506 2966 : ostmt = tstmt;
4507 2966 : bb = tbb;
4508 : }
4509 : }
4510 620 : load_gsi[j] = gsi_for_stmt (ostmt);
4511 620 : load_addr[j]
4512 620 : = force_gimple_operand_1 (unshare_expr (op.base_addr),
4513 : &load_seq[j], is_gimple_mem_ref_addr,
4514 : NULL_TREE);
4515 : }
4516 860 : else if (operand_equal_p (base_addr, op.base_addr, 0))
4517 40 : load_addr[j] = addr;
4518 : else
4519 : {
4520 820 : load_addr[j]
4521 820 : = force_gimple_operand_1 (unshare_expr (op.base_addr),
4522 : &this_seq, is_gimple_mem_ref_addr,
4523 : NULL_TREE);
4524 820 : gimple_seq_add_seq_without_update (&seq, this_seq);
4525 : }
4526 : }
4527 :
4528 370527 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4529 : {
4530 290451 : const unsigned HOST_WIDE_INT try_size = split_store->size;
4531 290451 : const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4532 290451 : const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4533 290451 : const unsigned HOST_WIDE_INT try_align = split_store->align;
4534 290451 : const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4535 290451 : tree dest, src;
4536 290451 : location_t loc;
4537 :
4538 290451 : if (split_store->orig)
4539 : {
4540 : /* If there is just a single non-clobber constituent store
4541 : which covers the whole area, just reuse the lhs and rhs. */
4542 205993 : gimple *orig_stmt = NULL;
4543 : store_immediate_info *store;
4544 : unsigned int j;
4545 205993 : FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4546 205993 : if (!gimple_clobber_p (store->stmt))
4547 : {
4548 : orig_stmt = store->stmt;
4549 : break;
4550 : }
4551 200123 : dest = gimple_assign_lhs (orig_stmt);
4552 200123 : src = gimple_assign_rhs1 (orig_stmt);
4553 200123 : loc = gimple_location (orig_stmt);
4554 : }
4555 : else
4556 : {
4557 : store_immediate_info *info;
4558 : unsigned short clique, base;
4559 : unsigned int k;
4560 311615 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4561 221287 : orig_stmts.safe_push (info->stmt);
4562 90328 : tree offset_type
4563 90328 : = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4564 90328 : tree dest_type;
4565 90328 : loc = get_location_for_stmts (orig_stmts);
4566 90328 : orig_stmts.truncate (0);
4567 :
4568 90328 : if (group->string_concatenation)
4569 67 : dest_type
4570 67 : = build_array_type_nelts (char_type_node,
4571 67 : try_size / BITS_PER_UNIT);
4572 : else
4573 : {
4574 90261 : dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4575 90261 : dest_type = build_aligned_type (dest_type, try_align);
4576 : }
4577 90328 : dest = fold_build2 (MEM_REF, dest_type, addr,
4578 : build_int_cst (offset_type, try_pos));
4579 90328 : if (TREE_CODE (dest) == MEM_REF)
4580 : {
4581 90328 : MR_DEPENDENCE_CLIQUE (dest) = clique;
4582 90328 : MR_DEPENDENCE_BASE (dest) = base;
4583 : }
4584 :
4585 90328 : tree mask;
4586 90328 : if (bswap_res || group->string_concatenation)
4587 901 : mask = integer_zero_node;
4588 : else
4589 89427 : mask = native_interpret_expr (dest_type,
4590 89427 : group->mask + try_offset,
4591 89427 : group->buf_size);
4592 :
4593 90328 : tree ops[2];
4594 180693 : for (int j = 0;
4595 361275 : j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4596 : ++j)
4597 : {
4598 90365 : store_operand_info &op = split_store->orig_stores[0]->ops[j];
4599 90365 : if (bswap_res)
4600 834 : ops[j] = bswap_res;
4601 89531 : else if (group->string_concatenation)
4602 : {
4603 134 : ops[j] = build_string (try_size / BITS_PER_UNIT,
4604 67 : (const char *) group->val + try_offset);
4605 67 : TREE_TYPE (ops[j]) = dest_type;
4606 : }
4607 89464 : else if (op.base_addr)
4608 : {
4609 6428 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4610 4617 : orig_stmts.safe_push (info->ops[j].stmt);
4611 :
4612 1811 : offset_type = get_alias_type_for_stmts (orig_stmts, true,
4613 : &clique, &base);
4614 1811 : location_t load_loc = get_location_for_stmts (orig_stmts);
4615 1811 : orig_stmts.truncate (0);
4616 :
4617 1811 : unsigned HOST_WIDE_INT load_align = group->load_align[j];
4618 1811 : unsigned HOST_WIDE_INT align_bitpos
4619 1811 : = known_alignment (try_bitpos
4620 1811 : - split_store->orig_stores[0]->bitpos
4621 1811 : + op.bitpos);
4622 1811 : if (align_bitpos & (load_align - 1))
4623 664 : load_align = least_bit_hwi (align_bitpos);
4624 :
4625 1811 : tree load_int_type
4626 1811 : = build_nonstandard_integer_type (try_size, UNSIGNED);
4627 1811 : load_int_type
4628 1811 : = build_aligned_type (load_int_type, load_align);
4629 :
4630 1811 : poly_uint64 load_pos
4631 1811 : = exact_div (try_bitpos
4632 1811 : - split_store->orig_stores[0]->bitpos
4633 1811 : + op.bitpos,
4634 : BITS_PER_UNIT);
4635 1811 : ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4636 : build_int_cst (offset_type, load_pos));
4637 1811 : if (TREE_CODE (ops[j]) == MEM_REF)
4638 : {
4639 1811 : MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4640 1811 : MR_DEPENDENCE_BASE (ops[j]) = base;
4641 : }
4642 1811 : if (!integer_zerop (mask))
4643 : {
4644 : /* The load might load some bits (that will be masked
4645 : off later on) uninitialized, avoid -W*uninitialized
4646 : warnings in that case. */
4647 141 : suppress_warning (ops[j], OPT_Wuninitialized);
4648 : }
4649 :
4650 1811 : stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4651 1811 : gimple_set_location (stmt, load_loc);
4652 1811 : if (gsi_bb (load_gsi[j]))
4653 : {
4654 1672 : gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4655 836 : gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4656 : }
4657 : else
4658 : {
4659 975 : gimple_set_vuse (stmt, new_vuse);
4660 975 : gimple_seq_add_stmt_without_update (&seq, stmt);
4661 : }
4662 1811 : ops[j] = gimple_assign_lhs (stmt);
4663 1811 : tree xor_mask;
4664 1811 : enum tree_code inv_op
4665 1811 : = invert_op (split_store, j, dest_type, xor_mask);
4666 1811 : if (inv_op != NOP_EXPR)
4667 : {
4668 29 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4669 : inv_op, ops[j], xor_mask);
4670 29 : gimple_set_location (stmt, load_loc);
4671 29 : ops[j] = gimple_assign_lhs (stmt);
4672 :
4673 29 : if (gsi_bb (load_gsi[j]))
4674 7 : gimple_seq_add_stmt_without_update (&load_seq[j],
4675 : stmt);
4676 : else
4677 22 : gimple_seq_add_stmt_without_update (&seq, stmt);
4678 : }
4679 : }
4680 : else
4681 87653 : ops[j] = native_interpret_expr (dest_type,
4682 87653 : group->val + try_offset,
4683 87653 : group->buf_size);
4684 : }
4685 :
4686 90328 : switch (split_store->orig_stores[0]->rhs_code)
4687 : {
4688 : case BIT_AND_EXPR:
4689 : case BIT_IOR_EXPR:
4690 : case BIT_XOR_EXPR:
4691 185 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4692 : {
4693 148 : tree rhs1 = gimple_assign_rhs1 (info->stmt);
4694 148 : orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4695 : }
4696 37 : location_t bit_loc;
4697 37 : bit_loc = get_location_for_stmts (orig_stmts);
4698 37 : orig_stmts.truncate (0);
4699 :
4700 37 : stmt
4701 37 : = gimple_build_assign (make_ssa_name (dest_type),
4702 37 : split_store->orig_stores[0]->rhs_code,
4703 : ops[0], ops[1]);
4704 37 : gimple_set_location (stmt, bit_loc);
4705 : /* If there is just one load and there is a separate
4706 : load_seq[0], emit the bitwise op right after it. */
4707 37 : if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4708 0 : gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4709 : /* Otherwise, if at least one load is in seq, we need to
4710 : emit the bitwise op right before the store. If there
4711 : are two loads and are emitted somewhere else, it would
4712 : be better to emit the bitwise op as early as possible;
4713 : we don't track where that would be possible right now
4714 : though. */
4715 : else
4716 37 : gimple_seq_add_stmt_without_update (&seq, stmt);
4717 37 : src = gimple_assign_lhs (stmt);
4718 37 : tree xor_mask;
4719 37 : enum tree_code inv_op;
4720 37 : inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4721 37 : if (inv_op != NOP_EXPR)
4722 : {
4723 2 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4724 : inv_op, src, xor_mask);
4725 2 : gimple_set_location (stmt, bit_loc);
4726 2 : if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4727 0 : gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4728 : else
4729 2 : gimple_seq_add_stmt_without_update (&seq, stmt);
4730 2 : src = gimple_assign_lhs (stmt);
4731 : }
4732 : break;
4733 834 : case LROTATE_EXPR:
4734 834 : case NOP_EXPR:
4735 834 : src = ops[0];
4736 834 : if (!is_gimple_val (src))
4737 : {
4738 0 : stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4739 : src);
4740 0 : gimple_seq_add_stmt_without_update (&seq, stmt);
4741 0 : src = gimple_assign_lhs (stmt);
4742 : }
4743 834 : if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4744 : {
4745 86 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4746 : NOP_EXPR, src);
4747 86 : gimple_seq_add_stmt_without_update (&seq, stmt);
4748 86 : src = gimple_assign_lhs (stmt);
4749 : }
4750 834 : inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4751 834 : if (inv_op != NOP_EXPR)
4752 : {
4753 2 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4754 : inv_op, src, xor_mask);
4755 2 : gimple_set_location (stmt, loc);
4756 2 : gimple_seq_add_stmt_without_update (&seq, stmt);
4757 2 : src = gimple_assign_lhs (stmt);
4758 : }
4759 : break;
4760 89457 : default:
4761 89457 : src = ops[0];
4762 89457 : break;
4763 : }
4764 :
4765 : /* If bit insertion is required, we use the source as an accumulator
4766 : into which the successive bit-field values are manually inserted.
4767 : FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4768 90328 : if (group->bit_insertion)
4769 26253 : FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4770 20919 : if (info->rhs_code == BIT_INSERT_EXPR
4771 11298 : && info->bitpos < try_bitpos + try_size
4772 11298 : && info->bitpos + info->bitsize > try_bitpos)
4773 : {
4774 : /* Mask, truncate, convert to final type, shift and ior into
4775 : the accumulator. Note that every step can be a no-op. */
4776 11298 : const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4777 11298 : const HOST_WIDE_INT end_gap
4778 11298 : = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4779 11298 : tree tem = info->ops[0].val;
4780 11298 : if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4781 : {
4782 0 : const unsigned HOST_WIDE_INT size
4783 0 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4784 0 : tree integer_type
4785 0 : = build_nonstandard_integer_type (size, UNSIGNED);
4786 0 : tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4787 : integer_type, tem);
4788 : }
4789 11298 : if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4790 : {
4791 7671 : tree bitfield_type
4792 7671 : = build_nonstandard_integer_type (info->bitsize,
4793 : UNSIGNED);
4794 7671 : tem = gimple_convert (&seq, loc, bitfield_type, tem);
4795 : }
4796 3627 : else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4797 : {
4798 2577 : wide_int imask
4799 : = wi::mask (info->bitsize, false,
4800 2577 : TYPE_PRECISION (TREE_TYPE (tem)));
4801 7731 : tem = gimple_build (&seq, loc,
4802 2577 : BIT_AND_EXPR, TREE_TYPE (tem), tem,
4803 2577 : wide_int_to_tree (TREE_TYPE (tem),
4804 5154 : imask));
4805 2577 : }
4806 11298 : const HOST_WIDE_INT shift
4807 : = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4808 11298 : if (shift < 0)
4809 4 : tem = gimple_build (&seq, loc,
4810 4 : RSHIFT_EXPR, TREE_TYPE (tem), tem,
4811 4 : build_int_cst (NULL_TREE, -shift));
4812 11298 : tem = gimple_convert (&seq, loc, dest_type, tem);
4813 11298 : if (shift > 0)
4814 8592 : tem = gimple_build (&seq, loc,
4815 : LSHIFT_EXPR, dest_type, tem,
4816 8592 : build_int_cst (NULL_TREE, shift));
4817 11298 : src = gimple_build (&seq, loc,
4818 : BIT_IOR_EXPR, dest_type, tem, src);
4819 : }
4820 :
4821 90328 : if (!integer_zerop (mask))
4822 : {
4823 3992 : tree tem = make_ssa_name (dest_type);
4824 3992 : tree load_src = unshare_expr (dest);
4825 : /* The load might load some or all bits uninitialized,
4826 : avoid -W*uninitialized warnings in that case.
4827 : As optimization, it would be nice if all the bits are
4828 : provably uninitialized (no stores at all yet or previous
4829 : store a CLOBBER) we'd optimize away the load and replace
4830 : it e.g. with 0. */
4831 3992 : suppress_warning (load_src, OPT_Wuninitialized);
4832 3992 : stmt = gimple_build_assign (tem, load_src);
4833 3992 : gimple_set_location (stmt, loc);
4834 3992 : gimple_set_vuse (stmt, new_vuse);
4835 3992 : gimple_seq_add_stmt_without_update (&seq, stmt);
4836 :
4837 : /* FIXME: If there is a single chunk of zero bits in mask,
4838 : perhaps use BIT_INSERT_EXPR instead? */
4839 3992 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4840 : BIT_AND_EXPR, tem, mask);
4841 3992 : gimple_set_location (stmt, loc);
4842 3992 : gimple_seq_add_stmt_without_update (&seq, stmt);
4843 3992 : tem = gimple_assign_lhs (stmt);
4844 :
4845 3992 : if (TREE_CODE (src) == INTEGER_CST)
4846 832 : src = wide_int_to_tree (dest_type,
4847 832 : wi::bit_and_not (wi::to_wide (src),
4848 1664 : wi::to_wide (mask)));
4849 : else
4850 : {
4851 3160 : tree nmask
4852 3160 : = wide_int_to_tree (dest_type,
4853 3160 : wi::bit_not (wi::to_wide (mask)));
4854 3160 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4855 : BIT_AND_EXPR, src, nmask);
4856 3160 : gimple_set_location (stmt, loc);
4857 3160 : gimple_seq_add_stmt_without_update (&seq, stmt);
4858 3160 : src = gimple_assign_lhs (stmt);
4859 : }
4860 3992 : stmt = gimple_build_assign (make_ssa_name (dest_type),
4861 : BIT_IOR_EXPR, tem, src);
4862 3992 : gimple_set_location (stmt, loc);
4863 3992 : gimple_seq_add_stmt_without_update (&seq, stmt);
4864 3992 : src = gimple_assign_lhs (stmt);
4865 : }
4866 : }
4867 :
4868 290451 : stmt = gimple_build_assign (dest, src);
4869 290451 : gimple_set_location (stmt, loc);
4870 290451 : gimple_set_vuse (stmt, new_vuse);
4871 290451 : gimple_seq_add_stmt_without_update (&seq, stmt);
4872 :
4873 290451 : if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4874 53 : add_stmt_to_eh_lp (stmt, group->lp_nr);
4875 :
4876 290451 : tree new_vdef;
4877 580902 : if (i < split_stores.length () - 1)
4878 210375 : new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4879 : else
4880 : new_vdef = last_vdef;
4881 :
4882 290451 : gimple_set_vdef (stmt, new_vdef);
4883 290451 : SSA_NAME_DEF_STMT (new_vdef) = stmt;
4884 290451 : new_vuse = new_vdef;
4885 : }
4886 :
4887 370527 : FOR_EACH_VEC_ELT (split_stores, i, split_store)
4888 580902 : delete split_store;
4889 :
4890 80076 : gcc_assert (seq);
4891 80076 : if (dump_file)
4892 : {
4893 184 : fprintf (dump_file,
4894 : "New sequence of %u stores to replace old one of %u stores\n",
4895 : split_stores.length (), orig_num_stmts);
4896 92 : if (dump_flags & TDF_DETAILS)
4897 23 : print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4898 : }
4899 :
4900 80076 : if (gimple_clobber_p (group->last_stmt))
4901 7 : update_stmt (group->last_stmt);
4902 :
4903 80076 : if (group->lp_nr > 0)
4904 : {
4905 : /* We're going to insert a sequence of (potentially) throwing stores
4906 : into an active EH region. This means that we're going to create
4907 : new basic blocks with EH edges pointing to the post landing pad
4908 : and, therefore, to have to update its PHI nodes, if any. For the
4909 : virtual PHI node, we're going to use the VDEFs created above, but
4910 : for the other nodes, we need to record the original reaching defs. */
4911 46 : eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4912 46 : basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4913 46 : basic_block last_bb = gimple_bb (group->last_stmt);
4914 46 : edge last_edge = find_edge (last_bb, lp_bb);
4915 46 : auto_vec<tree, 16> last_defs;
4916 46 : gphi_iterator gpi;
4917 98 : for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4918 : {
4919 52 : gphi *phi = gpi.phi ();
4920 52 : tree last_def;
4921 104 : if (virtual_operand_p (gimple_phi_result (phi)))
4922 46 : last_def = NULL_TREE;
4923 : else
4924 6 : last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4925 52 : last_defs.safe_push (last_def);
4926 : }
4927 :
4928 : /* Do the insertion. Then, if new basic blocks have been created in the
4929 : process, rewind the chain of VDEFs create above to walk the new basic
4930 : blocks and update the corresponding arguments of the PHI nodes. */
4931 46 : update_modified_stmts (seq);
4932 46 : if (gimple_find_sub_bbs (seq, &last_gsi))
4933 99 : while (last_vdef != gimple_vuse (group->last_stmt))
4934 : {
4935 53 : gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4936 53 : if (stmt_could_throw_p (cfun, stmt))
4937 : {
4938 53 : edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4939 53 : unsigned int i;
4940 53 : for (gpi = gsi_start_phis (lp_bb), i = 0;
4941 112 : !gsi_end_p (gpi);
4942 59 : gsi_next (&gpi), i++)
4943 : {
4944 59 : gphi *phi = gpi.phi ();
4945 59 : tree new_def;
4946 118 : if (virtual_operand_p (gimple_phi_result (phi)))
4947 : new_def = last_vdef;
4948 : else
4949 6 : new_def = last_defs[i];
4950 59 : add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4951 : }
4952 : }
4953 152 : last_vdef = gimple_vuse (stmt);
4954 : }
4955 46 : }
4956 : else
4957 80030 : gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4958 :
4959 240228 : for (int j = 0; j < 2; ++j)
4960 160152 : if (load_seq[j])
4961 620 : gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4962 :
4963 80076 : return true;
4964 329165 : }
4965 :
4966 : /* Process the merged_store_group objects created in the coalescing phase.
4967 : The stores are all against the base object BASE.
4968 : Try to output the widened stores and delete the original statements if
4969 : successful. Return true iff any changes were made. */
4970 :
4971 : bool
4972 368686 : imm_store_chain_info::output_merged_stores ()
4973 : {
4974 368686 : unsigned int i;
4975 368686 : merged_store_group *merged_store;
4976 368686 : bool ret = false;
4977 769872 : FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4978 : {
4979 401186 : if (dbg_cnt (store_merging)
4980 401186 : && output_merged_store (merged_store))
4981 : {
4982 : unsigned int j;
4983 : store_immediate_info *store;
4984 896280 : FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4985 : {
4986 415018 : gimple *stmt = store->stmt;
4987 415018 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4988 : /* Don't remove clobbers, they are still useful even if
4989 : everything is overwritten afterwards. */
4990 415018 : if (gimple_clobber_p (stmt))
4991 2148 : continue;
4992 412870 : gsi_remove (&gsi, true);
4993 412870 : if (store->lp_nr)
4994 165 : remove_stmt_from_eh_lp (stmt);
4995 412870 : if (stmt != merged_store->last_stmt)
4996 : {
4997 332801 : unlink_stmt_vdef (stmt);
4998 332801 : release_defs (stmt);
4999 : }
5000 : }
5001 : ret = true;
5002 : }
5003 : }
5004 368686 : if (ret && dump_file)
5005 92 : fprintf (dump_file, "Merging successful!\n");
5006 :
5007 368686 : return ret;
5008 : }
5009 :
5010 : /* Coalesce the store_immediate_info objects recorded against the base object
5011 : BASE in the first phase and output them.
5012 : Delete the allocated structures.
5013 : Return true if any changes were made. */
5014 :
5015 : bool
5016 1899336 : imm_store_chain_info::terminate_and_process_chain ()
5017 : {
5018 1899336 : if (dump_file && (dump_flags & TDF_DETAILS))
5019 98 : fprintf (dump_file, "Terminating chain with %u stores\n",
5020 : m_store_info.length ());
5021 : /* Process store chain. */
5022 1899336 : bool ret = false;
5023 1899336 : if (m_store_info.length () > 1)
5024 : {
5025 494918 : ret = coalesce_immediate_stores ();
5026 494918 : if (ret)
5027 368686 : ret = output_merged_stores ();
5028 : }
5029 :
5030 : /* Delete all the entries we allocated ourselves. */
5031 1899336 : store_immediate_info *info;
5032 1899336 : unsigned int i;
5033 4930879 : FOR_EACH_VEC_ELT (m_store_info, i, info)
5034 3031543 : delete info;
5035 :
5036 : merged_store_group *merged_info;
5037 2300522 : FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
5038 401186 : delete merged_info;
5039 :
5040 1899336 : return ret;
5041 : }
5042 :
5043 : /* Return true iff LHS is a destination potentially interesting for
5044 : store merging. In practice these are the codes that get_inner_reference
5045 : can process. */
5046 :
5047 : static bool
5048 9381524 : lhs_valid_for_store_merging_p (tree lhs)
5049 : {
5050 9381524 : if (DECL_P (lhs))
5051 : return true;
5052 :
5053 7214162 : switch (TREE_CODE (lhs))
5054 : {
5055 : case ARRAY_REF:
5056 : case ARRAY_RANGE_REF:
5057 : case BIT_FIELD_REF:
5058 : case COMPONENT_REF:
5059 : case MEM_REF:
5060 : case VIEW_CONVERT_EXPR:
5061 : return true;
5062 : default:
5063 : return false;
5064 : }
5065 : }
5066 :
5067 : /* Return true if the tree RHS is a constant we want to consider
5068 : during store merging. In practice accept all codes that
5069 : native_encode_expr accepts. */
5070 :
5071 : static bool
5072 5024317 : rhs_valid_for_store_merging_p (tree rhs)
5073 : {
5074 5024317 : unsigned HOST_WIDE_INT size;
5075 5024317 : if (TREE_CODE (rhs) == CONSTRUCTOR
5076 928568 : && CONSTRUCTOR_NELTS (rhs) == 0
5077 928568 : && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
5078 5952885 : && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
5079 : return true;
5080 8191498 : return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
5081 4095749 : && native_encode_expr (rhs, NULL, size) != 0);
5082 : }
5083 :
5084 : /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
5085 : and return true on success or false on failure. */
5086 :
5087 : static bool
5088 1057717 : adjust_bit_pos (poly_offset_int byte_off,
5089 : poly_int64 *pbitpos,
5090 : poly_uint64 *pbitregion_start,
5091 : poly_uint64 *pbitregion_end)
5092 : {
5093 1057717 : poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
5094 1057717 : bit_off += *pbitpos;
5095 :
5096 1057717 : if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
5097 : {
5098 1046496 : if (maybe_ne (*pbitregion_end, 0U))
5099 : {
5100 7375 : bit_off = byte_off << LOG2_BITS_PER_UNIT;
5101 7375 : bit_off += *pbitregion_start;
5102 7375 : if (bit_off.to_uhwi (pbitregion_start))
5103 : {
5104 7375 : bit_off = byte_off << LOG2_BITS_PER_UNIT;
5105 7375 : bit_off += *pbitregion_end;
5106 7375 : if (!bit_off.to_uhwi (pbitregion_end))
5107 0 : *pbitregion_end = 0;
5108 : }
5109 : else
5110 0 : *pbitregion_end = 0;
5111 : }
5112 1046496 : return true;
5113 : }
5114 : else
5115 11221 : return false;
5116 : }
5117 :
5118 : /* If MEM is a memory reference usable for store merging (either as
5119 : store destination or for loads), return the non-NULL base_addr
5120 : and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
5121 : Otherwise return NULL, *PBITPOS should be still valid even for that
5122 : case. */
5123 :
5124 : static tree
5125 5573570 : mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
5126 : poly_uint64 *pbitpos,
5127 : poly_uint64 *pbitregion_start,
5128 : poly_uint64 *pbitregion_end)
5129 : {
5130 5573570 : poly_int64 bitsize, bitpos;
5131 5573570 : poly_uint64 bitregion_start = 0, bitregion_end = 0;
5132 5573570 : machine_mode mode;
5133 5573570 : int unsignedp = 0, reversep = 0, volatilep = 0;
5134 5573570 : tree offset;
5135 5573570 : tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
5136 : &unsignedp, &reversep, &volatilep);
5137 5573570 : *pbitsize = bitsize;
5138 5573570 : if (known_le (bitsize, 0))
5139 : return NULL_TREE;
5140 :
5141 5572374 : if (TREE_CODE (mem) == COMPONENT_REF
5142 5572374 : && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
5143 : {
5144 52856 : get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
5145 52856 : if (maybe_ne (bitregion_end, 0U))
5146 52856 : bitregion_end += 1;
5147 : }
5148 :
5149 5572374 : if (reversep)
5150 : return NULL_TREE;
5151 :
5152 : /* We do not want to rewrite TARGET_MEM_REFs. */
5153 5571880 : if (TREE_CODE (base_addr) == TARGET_MEM_REF)
5154 : return NULL_TREE;
5155 : /* In some cases get_inner_reference may return a
5156 : MEM_REF [ptr + byteoffset]. For the purposes of this pass
5157 : canonicalize the base_addr to MEM_REF [ptr] and take
5158 : byteoffset into account in the bitpos. This occurs in
5159 : PR 23684 and this way we can catch more chains. */
5160 5523668 : else if (TREE_CODE (base_addr) == MEM_REF)
5161 : {
5162 1054219 : if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
5163 : &bitregion_start, &bitregion_end))
5164 : return NULL_TREE;
5165 1042998 : base_addr = TREE_OPERAND (base_addr, 0);
5166 : }
5167 : /* get_inner_reference returns the base object, get at its
5168 : address now. */
5169 : else
5170 : {
5171 4469449 : if (maybe_lt (bitpos, 0))
5172 : return NULL_TREE;
5173 4469296 : base_addr = build_fold_addr_expr (base_addr);
5174 : }
5175 :
5176 5512294 : if (offset)
5177 : {
5178 : /* If the access is variable offset then a base decl has to be
5179 : address-taken to be able to emit pointer-based stores to it.
5180 : ??? We might be able to get away with re-using the original
5181 : base up to the first variable part and then wrapping that inside
5182 : a BIT_FIELD_REF. */
5183 33446 : tree base = get_base_address (base_addr);
5184 33446 : if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
5185 : return NULL_TREE;
5186 :
5187 : /* Similarly to above for the base, remove constant from the offset. */
5188 33446 : if (TREE_CODE (offset) == PLUS_EXPR
5189 3554 : && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
5190 37000 : && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5191 : &bitpos, &bitregion_start, &bitregion_end))
5192 3498 : offset = TREE_OPERAND (offset, 0);
5193 :
5194 33446 : base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5195 : base_addr, offset);
5196 : }
5197 :
5198 5512294 : if (known_eq (bitregion_end, 0U))
5199 : {
5200 5459863 : bitregion_start = round_down_to_byte_boundary (bitpos);
5201 5459863 : bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5202 : }
5203 :
5204 5512294 : *pbitsize = bitsize;
5205 5512294 : *pbitpos = bitpos;
5206 5512294 : *pbitregion_start = bitregion_start;
5207 5512294 : *pbitregion_end = bitregion_end;
5208 5512294 : return base_addr;
5209 : }
5210 :
5211 : /* Return true if STMT is a load that can be used for store merging.
5212 : In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5213 : BITREGION_END are properties of the corresponding store. */
5214 :
5215 : static bool
5216 1022861 : handled_load (gimple *stmt, store_operand_info *op,
5217 : poly_uint64 bitsize, poly_uint64 bitpos,
5218 : poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5219 : {
5220 1022861 : if (!is_gimple_assign (stmt))
5221 : return false;
5222 1022757 : if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5223 : {
5224 1375 : tree rhs1 = gimple_assign_rhs1 (stmt);
5225 1375 : if (TREE_CODE (rhs1) == SSA_NAME
5226 1375 : && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5227 : bitregion_start, bitregion_end))
5228 : {
5229 : /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5230 : been optimized earlier, but if allowed here, would confuse the
5231 : multiple uses counting. */
5232 647 : if (op->bit_not_p)
5233 : return false;
5234 647 : op->bit_not_p = !op->bit_not_p;
5235 647 : return true;
5236 : }
5237 728 : return false;
5238 : }
5239 1021382 : if (gimple_vuse (stmt)
5240 534745 : && gimple_assign_load_p (stmt)
5241 534712 : && !stmt_can_throw_internal (cfun, stmt)
5242 1552785 : && !gimple_has_volatile_ops (stmt))
5243 : {
5244 531105 : tree mem = gimple_assign_rhs1 (stmt);
5245 531105 : op->base_addr
5246 531105 : = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5247 : &op->bitregion_start,
5248 : &op->bitregion_end);
5249 531105 : if (op->base_addr != NULL_TREE
5250 479382 : && known_eq (op->bitsize, bitsize)
5251 958741 : && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5252 479367 : && known_ge (op->bitpos - op->bitregion_start,
5253 : bitpos - bitregion_start)
5254 1010238 : && known_ge (op->bitregion_end - op->bitpos,
5255 : bitregion_end - bitpos))
5256 : {
5257 479078 : op->stmt = stmt;
5258 479078 : op->val = mem;
5259 479078 : op->bit_not_p = false;
5260 479078 : return true;
5261 : }
5262 : }
5263 : return false;
5264 : }
5265 :
5266 : /* Return the index number of the landing pad for STMT, if any. */
5267 :
5268 : static int
5269 3031543 : lp_nr_for_store (gimple *stmt)
5270 : {
5271 3031543 : if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5272 : return 0;
5273 :
5274 800773 : if (!stmt_could_throw_p (cfun, stmt))
5275 : return 0;
5276 :
5277 78158 : return lookup_stmt_eh_lp (stmt);
5278 : }
5279 :
5280 : /* Record the store STMT for store merging optimization if it can be
5281 : optimized. Return true if any changes were made. */
5282 :
5283 : bool
5284 5042465 : pass_store_merging::process_store (gimple *stmt)
5285 : {
5286 5042465 : tree lhs = gimple_assign_lhs (stmt);
5287 5042465 : tree rhs = gimple_assign_rhs1 (stmt);
5288 5042465 : poly_uint64 bitsize, bitpos = 0;
5289 5042465 : poly_uint64 bitregion_start = 0, bitregion_end = 0;
5290 5042465 : tree base_addr
5291 5042465 : = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5292 5042465 : &bitregion_start, &bitregion_end);
5293 5042465 : if (known_eq (bitsize, 0U))
5294 : return false;
5295 :
5296 5041275 : bool invalid = (base_addr == NULL_TREE
5297 5041275 : || (maybe_gt (bitsize,
5298 : (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5299 156127 : && TREE_CODE (rhs) != INTEGER_CST
5300 156127 : && (TREE_CODE (rhs) != CONSTRUCTOR
5301 143461 : || CONSTRUCTOR_NELTS (rhs) != 0)));
5302 5041275 : enum tree_code rhs_code = ERROR_MARK;
5303 5041275 : bool bit_not_p = false;
5304 5041275 : struct symbolic_number n;
5305 5041275 : gimple *ins_stmt = NULL;
5306 15123825 : store_operand_info ops[2];
5307 5041275 : if (invalid)
5308 : ;
5309 5020246 : else if (TREE_CODE (rhs) == STRING_CST)
5310 : {
5311 4701 : rhs_code = STRING_CST;
5312 4701 : ops[0].val = rhs;
5313 : }
5314 5015545 : else if (rhs_valid_for_store_merging_p (rhs))
5315 : {
5316 2473277 : rhs_code = INTEGER_CST;
5317 2473277 : ops[0].val = rhs;
5318 : }
5319 2542268 : else if (TREE_CODE (rhs) == SSA_NAME)
5320 : {
5321 1647838 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5322 1647838 : if (!is_gimple_assign (def_stmt))
5323 : invalid = true;
5324 1001622 : else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5325 : bitregion_start, bitregion_end))
5326 : rhs_code = MEM_REF;
5327 534777 : else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5328 : {
5329 606 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
5330 606 : if (TREE_CODE (rhs1) == SSA_NAME
5331 606 : && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5332 : {
5333 : bit_not_p = true;
5334 : def_stmt = SSA_NAME_DEF_STMT (rhs1);
5335 : }
5336 : }
5337 :
5338 1647838 : if (rhs_code == ERROR_MARK && !invalid)
5339 534777 : switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5340 : {
5341 16229 : case BIT_AND_EXPR:
5342 16229 : case BIT_IOR_EXPR:
5343 16229 : case BIT_XOR_EXPR:
5344 16229 : tree rhs1, rhs2;
5345 16229 : rhs1 = gimple_assign_rhs1 (def_stmt);
5346 16229 : rhs2 = gimple_assign_rhs2 (def_stmt);
5347 16229 : invalid = true;
5348 16229 : if (TREE_CODE (rhs1) != SSA_NAME)
5349 : break;
5350 16229 : def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5351 16229 : if (!is_gimple_assign (def_stmt1)
5352 16229 : || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5353 : bitregion_start, bitregion_end))
5354 : break;
5355 8772 : if (rhs_valid_for_store_merging_p (rhs2))
5356 4090 : ops[1].val = rhs2;
5357 4682 : else if (TREE_CODE (rhs2) != SSA_NAME)
5358 : break;
5359 : else
5360 : {
5361 4682 : def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5362 4682 : if (!is_gimple_assign (def_stmt2))
5363 : break;
5364 4578 : else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5365 : bitregion_start, bitregion_end))
5366 : break;
5367 : }
5368 : invalid = false;
5369 : break;
5370 : default:
5371 : invalid = true;
5372 : break;
5373 : }
5374 :
5375 1647838 : unsigned HOST_WIDE_INT const_bitsize;
5376 1647838 : if (bitsize.is_constant (&const_bitsize)
5377 1647838 : && (const_bitsize % BITS_PER_UNIT) == 0
5378 1630493 : && const_bitsize <= 64
5379 1466119 : && multiple_p (bitpos, BITS_PER_UNIT))
5380 : {
5381 1465851 : ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5382 1465851 : if (ins_stmt)
5383 : {
5384 469825 : uint64_t nn = n.n;
5385 469825 : for (unsigned HOST_WIDE_INT i = 0;
5386 3321572 : i < const_bitsize;
5387 2851747 : i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5388 2863105 : if ((nn & MARKER_MASK) == 0
5389 2863105 : || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5390 : {
5391 : ins_stmt = NULL;
5392 : break;
5393 : }
5394 469825 : if (ins_stmt)
5395 : {
5396 458467 : if (invalid)
5397 : {
5398 63164 : rhs_code = LROTATE_EXPR;
5399 63164 : ops[0].base_addr = NULL_TREE;
5400 63164 : ops[1].base_addr = NULL_TREE;
5401 : }
5402 : invalid = false;
5403 : }
5404 : }
5405 : }
5406 :
5407 1189371 : if (invalid
5408 1110278 : && bitsize.is_constant (&const_bitsize)
5409 1110278 : && ((const_bitsize % BITS_PER_UNIT) != 0
5410 1094227 : || !multiple_p (bitpos, BITS_PER_UNIT))
5411 1222007 : && const_bitsize <= MAX_FIXED_MODE_SIZE)
5412 : {
5413 : /* Bypass a conversion to the bit-field type. */
5414 16318 : if (!bit_not_p
5415 16279 : && is_gimple_assign (def_stmt)
5416 24505 : && CONVERT_EXPR_CODE_P (rhs_code))
5417 : {
5418 6028 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
5419 6028 : if (TREE_CODE (rhs1) == SSA_NAME
5420 6028 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5421 : rhs = rhs1;
5422 : }
5423 16318 : rhs_code = BIT_INSERT_EXPR;
5424 16318 : bit_not_p = false;
5425 16318 : ops[0].val = rhs;
5426 16318 : ops[0].base_addr = NULL_TREE;
5427 16318 : ops[1].base_addr = NULL_TREE;
5428 16318 : invalid = false;
5429 : }
5430 : }
5431 : else
5432 : invalid = true;
5433 :
5434 4125816 : unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5435 4125816 : unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5436 16318 : if (invalid
5437 3031856 : || !bitsize.is_constant (&const_bitsize)
5438 3031856 : || !bitpos.is_constant (&const_bitpos)
5439 3031856 : || !bitregion_start.is_constant (&const_bitregion_start)
5440 3031856 : || !bitregion_end.is_constant (&const_bitregion_end)
5441 4109498 : || ((const_bitregion_end - const_bitregion_start + 1) / BITS_PER_UNIT
5442 3031856 : > (unsigned) param_store_merging_max_size))
5443 2009732 : return terminate_all_aliasing_chains (NULL, stmt);
5444 :
5445 3031543 : if (!ins_stmt)
5446 2573085 : memset (&n, 0, sizeof (n));
5447 :
5448 3031543 : class imm_store_chain_info **chain_info = NULL;
5449 3031543 : bool ret = false;
5450 3031543 : if (base_addr)
5451 3031543 : chain_info = m_stores.get (base_addr);
5452 :
5453 3031543 : store_immediate_info *info;
5454 3031543 : if (chain_info)
5455 : {
5456 1132207 : unsigned int ord = (*chain_info)->m_store_info.length ();
5457 2264414 : info = new store_immediate_info (const_bitsize, const_bitpos,
5458 : const_bitregion_start,
5459 : const_bitregion_end,
5460 : stmt, ord, rhs_code, n, ins_stmt,
5461 : bit_not_p, lp_nr_for_store (stmt),
5462 1132207 : ops[0], ops[1]);
5463 1132207 : if (dump_file && (dump_flags & TDF_DETAILS))
5464 : {
5465 202 : fprintf (dump_file, "Recording immediate store from stmt:\n");
5466 202 : print_gimple_stmt (dump_file, stmt, 0);
5467 : }
5468 1132207 : (*chain_info)->m_store_info.safe_push (info);
5469 1132207 : m_n_stores++;
5470 1132207 : ret |= terminate_all_aliasing_chains (chain_info, stmt);
5471 : /* If we reach the limit of stores to merge in a chain terminate and
5472 : process the chain now. */
5473 1132207 : if ((*chain_info)->m_store_info.length ()
5474 1132207 : == (unsigned int) param_max_stores_to_merge)
5475 : {
5476 458 : if (dump_file && (dump_flags & TDF_DETAILS))
5477 0 : fprintf (dump_file,
5478 : "Reached maximum number of statements to merge:\n");
5479 458 : ret |= terminate_and_process_chain (*chain_info);
5480 : }
5481 : }
5482 : else
5483 : {
5484 : /* Store aliases any existing chain? */
5485 1899336 : ret |= terminate_all_aliasing_chains (NULL, stmt);
5486 :
5487 : /* Start a new chain. */
5488 1899336 : class imm_store_chain_info *new_chain
5489 1899336 : = new imm_store_chain_info (m_stores_head, base_addr);
5490 3798672 : info = new store_immediate_info (const_bitsize, const_bitpos,
5491 : const_bitregion_start,
5492 : const_bitregion_end,
5493 : stmt, 0, rhs_code, n, ins_stmt,
5494 : bit_not_p, lp_nr_for_store (stmt),
5495 1899336 : ops[0], ops[1]);
5496 1899336 : new_chain->m_store_info.safe_push (info);
5497 1899336 : m_n_stores++;
5498 1899336 : m_stores.put (base_addr, new_chain);
5499 1899336 : m_n_chains++;
5500 1899336 : if (dump_file && (dump_flags & TDF_DETAILS))
5501 : {
5502 49 : fprintf (dump_file, "Starting active chain number %u with statement:\n",
5503 : m_n_chains);
5504 49 : print_gimple_stmt (dump_file, stmt, 0);
5505 49 : fprintf (dump_file, "The base object is:\n");
5506 49 : print_generic_expr (dump_file, base_addr);
5507 49 : fprintf (dump_file, "\n");
5508 : }
5509 : }
5510 :
5511 : /* Prune oldest chains so that after adding the chain or store above
5512 : we're again within the limits set by the params. */
5513 3031543 : if (m_n_chains > (unsigned)param_max_store_chains_to_track
5514 3029517 : || m_n_stores > (unsigned)param_max_stores_to_track)
5515 : {
5516 2030 : if (dump_file && (dump_flags & TDF_DETAILS))
5517 0 : fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5518 : "terminating oldest chain(s).\n", m_n_chains,
5519 : param_max_store_chains_to_track, m_n_stores,
5520 : param_max_stores_to_track);
5521 2030 : imm_store_chain_info **e = &m_stores_head;
5522 2030 : unsigned idx = 0;
5523 2030 : unsigned n_stores = 0;
5524 133474 : while (*e)
5525 : {
5526 131444 : if (idx >= (unsigned)param_max_store_chains_to_track
5527 131444 : || (n_stores + (*e)->m_store_info.length ()
5528 129418 : > (unsigned)param_max_stores_to_track))
5529 2030 : ret |= terminate_and_process_chain (*e);
5530 : else
5531 : {
5532 129414 : n_stores += (*e)->m_store_info.length ();
5533 129414 : e = &(*e)->next;
5534 129414 : ++idx;
5535 : }
5536 : }
5537 : }
5538 :
5539 : return ret;
5540 : }
5541 :
5542 : /* Return true if STMT is a store valid for store merging. */
5543 :
5544 : static bool
5545 35993365 : store_valid_for_store_merging_p (gimple *stmt)
5546 : {
5547 35993365 : return gimple_assign_single_p (stmt)
5548 16412905 : && gimple_vdef (stmt)
5549 9381524 : && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5550 45110275 : && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5551 : }
5552 :
5553 : enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5554 :
5555 : /* Return the status of basic block BB wrt store merging. */
5556 :
5557 : static enum basic_block_status
5558 8971876 : get_status_for_store_merging (basic_block bb)
5559 : {
5560 8971876 : unsigned int num_statements = 0;
5561 8971876 : unsigned int num_constructors = 0;
5562 8971876 : gimple_stmt_iterator gsi;
5563 8971876 : edge e;
5564 8971876 : gimple *last_stmt = NULL;
5565 :
5566 73633431 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5567 : {
5568 65782250 : gimple *stmt = gsi_stmt (gsi);
5569 :
5570 65782250 : if (is_gimple_debug (stmt))
5571 40452771 : continue;
5572 :
5573 25329479 : last_stmt = stmt;
5574 :
5575 25329479 : if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5576 : break;
5577 :
5578 24225976 : if (is_gimple_assign (stmt)
5579 24225976 : && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5580 15022530 : || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR))
5581 : {
5582 631596 : tree rhs = gimple_assign_rhs1 (stmt);
5583 631596 : if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5584 125316 : && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5585 745124 : && gimple_assign_lhs (stmt) != NULL_TREE)
5586 : {
5587 113528 : HOST_WIDE_INT sz
5588 113528 : = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5589 113528 : if (sz == 16 || sz == 32 || sz == 64)
5590 : {
5591 : num_constructors = 1;
5592 : break;
5593 : }
5594 : }
5595 : }
5596 : }
5597 :
5598 8971876 : if (num_statements == 0 && num_constructors == 0)
5599 : return BB_INVALID;
5600 :
5601 872549 : if (cfun->can_throw_non_call_exceptions && cfun->eh
5602 872549 : && store_valid_for_store_merging_p (last_stmt)
5603 668270 : && (e = find_fallthru_edge (bb->succs))
5604 2622693 : && e->dest == bb->next_bb)
5605 : return BB_EXTENDED_VALID;
5606 :
5607 2071976 : return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5608 : }
5609 :
5610 : /* Entry point for the pass. Go over each basic block recording chains of
5611 : immediate stores. Upon encountering a terminating statement (as defined
5612 : by stmt_terminates_chain_p) process the recorded stores and emit the widened
5613 : variants. */
5614 :
5615 : unsigned int
5616 964241 : pass_store_merging::execute (function *fun)
5617 : {
5618 964241 : basic_block bb;
5619 964241 : hash_set<gimple *> orig_stmts;
5620 964241 : bool changed = false, open_chains = false;
5621 :
5622 : /* If the function can throw and catch non-call exceptions, we'll be trying
5623 : to merge stores across different basic blocks so we need to first unsplit
5624 : the EH edges in order to streamline the CFG of the function. */
5625 964241 : if (cfun->can_throw_non_call_exceptions && cfun->eh)
5626 240953 : unsplit_eh_edges ();
5627 :
5628 964241 : calculate_dominance_info (CDI_DOMINATORS);
5629 :
5630 9936117 : FOR_EACH_BB_FN (bb, fun)
5631 : {
5632 8971876 : const basic_block_status bb_status = get_status_for_store_merging (bb);
5633 8971876 : gimple_stmt_iterator gsi;
5634 :
5635 9011169 : if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5636 : {
5637 119878 : changed |= terminate_and_process_all_chains ();
5638 119878 : open_chains = false;
5639 : }
5640 :
5641 8971876 : if (bb_status == BB_INVALID)
5642 7804975 : continue;
5643 :
5644 1166901 : if (dump_file && (dump_flags & TDF_DETAILS))
5645 23 : fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5646 :
5647 29447481 : for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5648 : {
5649 28280580 : gimple *stmt = gsi_stmt (gsi);
5650 28280580 : gsi_next (&gsi);
5651 :
5652 28280580 : if (is_gimple_debug (stmt))
5653 18477542 : continue;
5654 :
5655 19148825 : if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5656 : {
5657 : /* Terminate all chains. */
5658 9486 : if (dump_file && (dump_flags & TDF_DETAILS))
5659 1 : fprintf (dump_file, "Volatile access terminates "
5660 : "all chains\n");
5661 9486 : changed |= terminate_and_process_all_chains ();
5662 9486 : open_chains = false;
5663 9486 : continue;
5664 : }
5665 :
5666 9793552 : if (is_gimple_assign (stmt)
5667 7884730 : && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5668 6867922 : || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR)
5669 10813056 : && maybe_optimize_vector_constructor (stmt))
5670 2215 : continue;
5671 :
5672 9791337 : if (store_valid_for_store_merging_p (stmt))
5673 5042465 : changed |= process_store (stmt);
5674 : else
5675 4748872 : changed |= terminate_all_aliasing_chains (NULL, stmt);
5676 : }
5677 :
5678 1166901 : if (bb_status == BB_EXTENDED_VALID)
5679 : open_chains = true;
5680 : else
5681 : {
5682 1029394 : changed |= terminate_and_process_all_chains ();
5683 1029394 : open_chains = false;
5684 : }
5685 : }
5686 :
5687 964241 : if (open_chains)
5688 0 : changed |= terminate_and_process_all_chains ();
5689 :
5690 : /* If the function can throw and catch non-call exceptions and something
5691 : changed during the pass, then the CFG has (very likely) changed too. */
5692 964241 : if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5693 : {
5694 1019 : free_dominance_info (CDI_DOMINATORS);
5695 1019 : return TODO_cleanup_cfg;
5696 : }
5697 :
5698 : return 0;
5699 964241 : }
5700 :
5701 : } // anon namespace
5702 :
5703 : /* Construct and return a store merging pass object. */
5704 :
5705 : gimple_opt_pass *
5706 285722 : make_pass_store_merging (gcc::context *ctxt)
5707 : {
5708 285722 : return new pass_store_merging (ctxt);
5709 : }
5710 :
5711 : #if CHECKING_P
5712 :
5713 : namespace selftest {
5714 :
5715 : /* Selftests for store merging helpers. */
5716 :
5717 : /* Assert that all elements of the byte arrays X and Y, both of length N
5718 : are equal. */
5719 :
5720 : static void
5721 32 : verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5722 : {
5723 112 : for (unsigned int i = 0; i < n; i++)
5724 : {
5725 80 : if (x[i] != y[i])
5726 : {
5727 0 : fprintf (stderr, "Arrays do not match. X:\n");
5728 0 : dump_char_array (stderr, x, n);
5729 0 : fprintf (stderr, "Y:\n");
5730 0 : dump_char_array (stderr, y, n);
5731 : }
5732 80 : ASSERT_EQ (x[i], y[i]);
5733 : }
5734 32 : }
5735 :
5736 : /* Test shift_bytes_in_array_left and that it carries bits across between
5737 : bytes correctly. */
5738 :
5739 : static void
5740 4 : verify_shift_bytes_in_array_left (void)
5741 : {
5742 : /* byte 1 | byte 0
5743 : 00011111 | 11100000. */
5744 4 : unsigned char orig[2] = { 0xe0, 0x1f };
5745 4 : unsigned char in[2];
5746 4 : memcpy (in, orig, sizeof orig);
5747 :
5748 4 : unsigned char expected[2] = { 0x80, 0x7f };
5749 4 : shift_bytes_in_array_left (in, sizeof (in), 2);
5750 4 : verify_array_eq (in, expected, sizeof (in));
5751 :
5752 4 : memcpy (in, orig, sizeof orig);
5753 4 : memcpy (expected, orig, sizeof orig);
5754 : /* Check that shifting by zero doesn't change anything. */
5755 4 : shift_bytes_in_array_left (in, sizeof (in), 0);
5756 4 : verify_array_eq (in, expected, sizeof (in));
5757 :
5758 4 : }
5759 :
5760 : /* Test shift_bytes_in_array_right and that it carries bits across between
5761 : bytes correctly. */
5762 :
5763 : static void
5764 4 : verify_shift_bytes_in_array_right (void)
5765 : {
5766 : /* byte 1 | byte 0
5767 : 00011111 | 11100000. */
5768 4 : unsigned char orig[2] = { 0x1f, 0xe0};
5769 4 : unsigned char in[2];
5770 4 : memcpy (in, orig, sizeof orig);
5771 4 : unsigned char expected[2] = { 0x07, 0xf8};
5772 4 : shift_bytes_in_array_right (in, sizeof (in), 2);
5773 4 : verify_array_eq (in, expected, sizeof (in));
5774 :
5775 4 : memcpy (in, orig, sizeof orig);
5776 4 : memcpy (expected, orig, sizeof orig);
5777 : /* Check that shifting by zero doesn't change anything. */
5778 4 : shift_bytes_in_array_right (in, sizeof (in), 0);
5779 4 : verify_array_eq (in, expected, sizeof (in));
5780 4 : }
5781 :
5782 : /* Test clear_bit_region that it clears exactly the bits asked and
5783 : nothing more. */
5784 :
5785 : static void
5786 4 : verify_clear_bit_region (void)
5787 : {
5788 : /* Start with all bits set and test clearing various patterns in them. */
5789 4 : unsigned char orig[3] = { 0xff, 0xff, 0xff};
5790 4 : unsigned char in[3];
5791 4 : unsigned char expected[3];
5792 4 : memcpy (in, orig, sizeof in);
5793 :
5794 : /* Check zeroing out all the bits. */
5795 4 : clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5796 4 : expected[0] = expected[1] = expected[2] = 0;
5797 4 : verify_array_eq (in, expected, sizeof in);
5798 :
5799 4 : memcpy (in, orig, sizeof in);
5800 : /* Leave the first and last bits intact. */
5801 4 : clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5802 4 : expected[0] = 0x1;
5803 4 : expected[1] = 0;
5804 4 : expected[2] = 0x80;
5805 4 : verify_array_eq (in, expected, sizeof in);
5806 4 : }
5807 :
5808 : /* Test clear_bit_region_be that it clears exactly the bits asked and
5809 : nothing more. */
5810 :
5811 : static void
5812 4 : verify_clear_bit_region_be (void)
5813 : {
5814 : /* Start with all bits set and test clearing various patterns in them. */
5815 4 : unsigned char orig[3] = { 0xff, 0xff, 0xff};
5816 4 : unsigned char in[3];
5817 4 : unsigned char expected[3];
5818 4 : memcpy (in, orig, sizeof in);
5819 :
5820 : /* Check zeroing out all the bits. */
5821 4 : clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5822 4 : expected[0] = expected[1] = expected[2] = 0;
5823 4 : verify_array_eq (in, expected, sizeof in);
5824 :
5825 4 : memcpy (in, orig, sizeof in);
5826 : /* Leave the first and last bits intact. */
5827 4 : clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5828 4 : expected[0] = 0x80;
5829 4 : expected[1] = 0;
5830 4 : expected[2] = 0x1;
5831 4 : verify_array_eq (in, expected, sizeof in);
5832 4 : }
5833 :
5834 :
5835 : /* Run all of the selftests within this file. */
5836 :
5837 : void
5838 4 : store_merging_cc_tests (void)
5839 : {
5840 4 : verify_shift_bytes_in_array_left ();
5841 4 : verify_shift_bytes_in_array_right ();
5842 4 : verify_clear_bit_region ();
5843 4 : verify_clear_bit_region_be ();
5844 4 : }
5845 :
5846 : } // namespace selftest
5847 : #endif /* CHECKING_P. */
|