Line data Source code
1 : /* Redundant Extension Elimination pass for the GNU compiler.
2 : Copyright (C) 2010-2026 Free Software Foundation, Inc.
3 : Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4 :
5 : Based on the Redundant Zero-extension elimination pass contributed by
6 : Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 :
8 : This file is part of GCC.
9 :
10 : GCC is free software; you can redistribute it and/or modify it under
11 : the terms of the GNU General Public License as published by the Free
12 : Software Foundation; either version 3, or (at your option) any later
13 : version.
14 :
15 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 : for more details.
19 :
20 : You should have received a copy of the GNU General Public License
21 : along with GCC; see the file COPYING3. If not see
22 : <http://www.gnu.org/licenses/>. */
23 :
24 :
25 : /* Problem Description :
26 : --------------------
27 : This pass is intended to remove redundant extension instructions.
28 : Such instructions appear for different reasons. We expect some of
29 : them due to implicit zero-extension in 64-bit registers after writing
30 : to their lower 32-bit half (e.g. for the x86-64 architecture).
31 : Another possible reason is a type cast which follows a load (for
32 : instance a register restore) and which can be combined into a single
33 : instruction, and for which earlier local passes, e.g. the combiner,
34 : weren't able to optimize.
35 :
36 : How does this pass work ?
37 : --------------------------
38 :
39 : This pass is run after register allocation. Hence, all registers that
40 : this pass deals with are hard registers. This pass first looks for an
41 : extension instruction that could possibly be redundant. Such extension
42 : instructions show up in RTL with the pattern :
43 : (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44 : where x can be any hard register.
45 : Now, this pass tries to eliminate this instruction by merging the
46 : extension with the definitions of register x. For instance, if
47 : one of the definitions of register x was :
48 : (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49 : followed by extension :
50 : (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51 : then the combination converts this into :
52 : (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53 : If all the merged definitions are recognizable assembly instructions,
54 : the extension is effectively eliminated.
55 :
56 : For example, for the x86-64 architecture, implicit zero-extensions
57 : are captured with appropriate patterns in the i386.md file. Hence,
58 : these merged definition can be matched to a single assembly instruction.
59 : The original extension instruction is then deleted if all the
60 : definitions can be merged.
61 :
62 : However, there are cases where the definition instruction cannot be
63 : merged with an extension. Examples are CALL instructions. In such
64 : cases, the original extension is not redundant and this pass does
65 : not delete it.
66 :
67 : Handling conditional moves :
68 : ----------------------------
69 :
70 : Architectures like x86-64 support conditional moves whose semantics for
71 : extension differ from the other instructions. For instance, the
72 : instruction *cmov ebx, eax*
73 : zero-extends eax onto rax only when the move from ebx to eax happens.
74 : Otherwise, eax may not be zero-extended. Consider conditional moves as
75 : RTL instructions of the form
76 : (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77 : This pass tries to merge an extension with a conditional move by
78 : actually merging the definitions of y and z with an extension and then
79 : converting the conditional move into :
80 : (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81 : Since registers y and z are extended, register x will also be extended
82 : after the conditional move. Note that this step has to be done
83 : transitively since the definition of a conditional copy can be
84 : another conditional copy.
85 :
86 : Motivating Example I :
87 : ---------------------
88 : For this program :
89 : **********************************************
90 : bad_code.c
91 :
92 : int mask[1000];
93 :
94 : int foo(unsigned x)
95 : {
96 : if (x < 10)
97 : x = x * 45;
98 : else
99 : x = x * 78;
100 : return mask[x];
101 : }
102 : **********************************************
103 :
104 : $ gcc -O2 bad_code.c
105 : ........
106 : 400315: b8 4e 00 00 00 mov $0x4e,%eax
107 : 40031a: 0f af f8 imul %eax,%edi
108 : 40031d: 89 ff mov %edi,%edi - useless extension
109 : 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
110 : 400326: c3 retq
111 : ......
112 : 400330: ba 2d 00 00 00 mov $0x2d,%edx
113 : 400335: 0f af fa imul %edx,%edi
114 : 400338: 89 ff mov %edi,%edi - useless extension
115 : 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
116 : 400341: c3 retq
117 :
118 : $ gcc -O2 -free bad_code.c
119 : ......
120 : 400315: 6b ff 4e imul $0x4e,%edi,%edi
121 : 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
122 : 40031f: c3 retq
123 : 400320: 6b ff 2d imul $0x2d,%edi,%edi
124 : 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
125 : 40032a: c3 retq
126 :
127 : Motivating Example II :
128 : ---------------------
129 :
130 : Here is an example with a conditional move.
131 :
132 : For this program :
133 : **********************************************
134 :
135 : unsigned long long foo(unsigned x , unsigned y)
136 : {
137 : unsigned z;
138 : if (x > 100)
139 : z = x + y;
140 : else
141 : z = x - y;
142 : return (unsigned long long)(z);
143 : }
144 :
145 : $ gcc -O2 bad_code.c
146 : ............
147 : 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
148 : 400363: 89 f8 mov %edi,%eax
149 : 400365: 29 f0 sub %esi,%eax
150 : 400367: 83 ff 65 cmp $0x65,%edi
151 : 40036a: 0f 43 c2 cmovae %edx,%eax
152 : 40036d: 89 c0 mov %eax,%eax - useless extension
153 : 40036f: c3 retq
154 :
155 : $ gcc -O2 -free bad_code.c
156 : .............
157 : 400360: 89 fa mov %edi,%edx
158 : 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
159 : 400365: 29 f2 sub %esi,%edx
160 : 400367: 83 ff 65 cmp $0x65,%edi
161 : 40036a: 89 d6 mov %edx,%esi
162 : 40036c: 48 0f 42 c6 cmovb %rsi,%rax
163 : 400370: c3 retq
164 :
165 : Motivating Example III :
166 : ---------------------
167 :
168 : Here is an example with a type cast.
169 :
170 : For this program :
171 : **********************************************
172 :
173 : void test(int size, unsigned char *in, unsigned char *out)
174 : {
175 : int i;
176 : unsigned char xr, xg, xy=0;
177 :
178 : for (i = 0; i < size; i++) {
179 : xr = *in++;
180 : xg = *in++;
181 : xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182 : *out++ = xy;
183 : }
184 : }
185 :
186 : $ gcc -O2 bad_code.c
187 : ............
188 : 10: 0f b6 0e movzbl (%rsi),%ecx
189 : 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
190 : 17: 48 83 c6 02 add $0x2,%rsi
191 : 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension
192 : 1e: 0f b6 c0 movzbl %al,%eax - useless extension
193 : 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
194 : 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
195 :
196 : $ gcc -O2 -free bad_code.c
197 : .............
198 : 10: 0f b6 0e movzbl (%rsi),%ecx
199 : 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
200 : 17: 48 83 c6 02 add $0x2,%rsi
201 : 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
202 : 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
203 :
204 : Usefulness :
205 : ----------
206 :
207 : The original redundant zero-extension elimination pass reported reduction
208 : of the dynamic instruction count of a compression benchmark by 2.8% and
209 : improvement of its run time by about 1%.
210 :
211 : The additional performance gain with the enhanced pass is mostly expected
212 : on in-order architectures where redundancy cannot be compensated by out of
213 : order execution. Measurements showed up to 10% performance gain (reduced
214 : run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215 : gain 1%. */
216 :
217 :
218 : #include "config.h"
219 : #include "system.h"
220 : #include "coretypes.h"
221 : #include "backend.h"
222 : #include "target.h"
223 : #include "rtl.h"
224 : #include "tree.h"
225 : #include "df.h"
226 : #include "memmodel.h"
227 : #include "tm_p.h"
228 : #include "optabs.h"
229 : #include "regs.h"
230 : #include "emit-rtl.h"
231 : #include "recog.h"
232 : #include "cfgrtl.h"
233 : #include "expr.h"
234 : #include "tree-pass.h"
235 :
236 : /* This structure represents a candidate for elimination. */
237 :
238 : struct ext_cand
239 : {
240 : /* The expression. */
241 : const_rtx expr;
242 :
243 : /* The kind of extension. */
244 : enum rtx_code code;
245 :
246 : /* The destination mode. */
247 : machine_mode mode;
248 :
249 : /* The instruction where it lives. */
250 : rtx_insn *insn;
251 : };
252 :
253 :
254 : static int max_insn_uid;
255 :
256 : /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */
257 :
258 : static bool
259 206464 : update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
260 : machine_mode old_mode, enum rtx_code code)
261 : {
262 206464 : rtx *loc = ®_NOTES (insn);
263 230306 : while (*loc)
264 : {
265 23842 : enum reg_note kind = REG_NOTE_KIND (*loc);
266 23842 : if (kind == REG_EQUAL || kind == REG_EQUIV)
267 : {
268 23162 : rtx orig_src = XEXP (*loc, 0);
269 : /* Update equivalency constants. Recall that RTL constants are
270 : sign-extended. */
271 23162 : if (GET_CODE (orig_src) == CONST_INT
272 23162 : && HWI_COMPUTABLE_MODE_P (new_mode))
273 : {
274 17314 : if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND)
275 : /* Nothing needed. */;
276 : else
277 : {
278 : /* Zero-extend the negative constant by masking out the
279 : bits outside the source mode. */
280 27 : rtx new_const_int
281 27 : = gen_int_mode (INTVAL (orig_src)
282 27 : & GET_MODE_MASK (old_mode),
283 : new_mode);
284 27 : if (!validate_change (insn, &XEXP (*loc, 0),
285 : new_const_int, true))
286 : return false;
287 : }
288 17314 : loc = &XEXP (*loc, 1);
289 : }
290 : /* Drop all other notes, they assume a wrong mode. */
291 5848 : else if (!validate_change (insn, loc, XEXP (*loc, 1), true))
292 : return false;
293 : }
294 : else
295 680 : loc = &XEXP (*loc, 1);
296 : }
297 : return true;
298 : }
299 :
300 : /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
301 : and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
302 : this code modifies the SET rtx to a new SET rtx that extends the
303 : right hand expression into a register on the left hand side. Note
304 : that multiple assumptions are made about the nature of the set that
305 : needs to be true for this to work and is called from merge_def_and_ext.
306 :
307 : Original :
308 : (set (reg a) (expression))
309 :
310 : Transform :
311 : (set (reg a) (any_extend (expression)))
312 :
313 : Special Cases :
314 : If the expression is a constant or another extension, then directly
315 : assign it to the register. */
316 :
317 : static bool
318 225663 : combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
319 : {
320 225663 : rtx orig_src = SET_SRC (*orig_set);
321 225663 : machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
322 225663 : rtx new_set;
323 225663 : rtx cand_pat = single_set (cand->insn);
324 :
325 : /* If the extension's source/destination registers are not the same
326 : then we need to change the original load to reference the destination
327 : of the extension. Then we need to emit a copy from that destination
328 : to the original destination of the load. */
329 225663 : rtx new_reg;
330 225663 : bool copy_needed
331 225663 : = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
332 225663 : if (copy_needed)
333 55866 : new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
334 : else
335 169797 : new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
336 :
337 : /* Merge constants by directly moving the constant into the register under
338 : some conditions. Recall that RTL constants are sign-extended. */
339 225663 : if (GET_CODE (orig_src) == CONST_INT
340 225663 : && HWI_COMPUTABLE_MODE_P (cand->mode))
341 : {
342 32150 : if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
343 31953 : new_set = gen_rtx_SET (new_reg, orig_src);
344 : else
345 : {
346 : /* Zero-extend the negative constant by masking out the bits outside
347 : the source mode. */
348 197 : rtx new_const_int
349 394 : = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode),
350 197 : GET_MODE (new_reg));
351 197 : new_set = gen_rtx_SET (new_reg, new_const_int);
352 : }
353 : }
354 193513 : else if (GET_MODE (orig_src) == VOIDmode)
355 : {
356 : /* This is mostly due to a call insn that should not be optimized. */
357 : return false;
358 : }
359 173659 : else if (GET_CODE (orig_src) == cand->code)
360 : {
361 : /* Here is a sequence of two extensions. Try to merge them. */
362 1160 : rtx temp_extension
363 1160 : = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
364 1160 : rtx simplified_temp_extension = simplify_rtx (temp_extension);
365 1160 : if (simplified_temp_extension)
366 0 : temp_extension = simplified_temp_extension;
367 1160 : new_set = gen_rtx_SET (new_reg, temp_extension);
368 : }
369 172499 : else if (GET_CODE (orig_src) == IF_THEN_ELSE)
370 : {
371 : /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
372 : in general, IF_THEN_ELSE should not be combined. */
373 : return false;
374 : }
375 : else
376 : {
377 : /* This is the normal case. */
378 172423 : rtx temp_extension
379 172423 : = gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
380 172423 : rtx simplified_temp_extension = simplify_rtx (temp_extension);
381 172423 : if (simplified_temp_extension)
382 1703 : temp_extension = simplified_temp_extension;
383 172423 : new_set = gen_rtx_SET (new_reg, temp_extension);
384 : }
385 :
386 : /* This change is a part of a group of changes. Hence,
387 : validate_change will not try to commit the change. */
388 205733 : if (validate_change (curr_insn, orig_set, new_set, true)
389 205733 : && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode,
390 : cand->code))
391 : {
392 205733 : if (dump_file)
393 : {
394 41 : fprintf (dump_file,
395 : "Tentatively merged extension with definition %s:\n",
396 : (copy_needed) ? "(copy needed)" : "");
397 25 : print_rtl_single (dump_file, curr_insn);
398 : }
399 205733 : return true;
400 : }
401 :
402 : return false;
403 : }
404 :
405 : /* Treat if_then_else insns, where the operands of both branches
406 : are registers, as copies. For instance,
407 : Original :
408 : (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
409 : Transformed :
410 : (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
411 : DEF_INSN is the if_then_else insn. */
412 :
413 : static bool
414 1907 : transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
415 : {
416 1907 : rtx set_insn = PATTERN (def_insn);
417 1907 : rtx srcreg, dstreg, srcreg2;
418 1907 : rtx map_srcreg, map_dstreg, map_srcreg2;
419 1907 : rtx ifexpr;
420 1907 : rtx cond;
421 1907 : rtx new_set;
422 :
423 1907 : gcc_assert (GET_CODE (set_insn) == SET);
424 :
425 1907 : cond = XEXP (SET_SRC (set_insn), 0);
426 1907 : dstreg = SET_DEST (set_insn);
427 1907 : srcreg = XEXP (SET_SRC (set_insn), 1);
428 1907 : srcreg2 = XEXP (SET_SRC (set_insn), 2);
429 : /* If the conditional move already has the right or wider mode,
430 : there is nothing to do. */
431 1907 : if (GET_MODE_UNIT_SIZE (GET_MODE (dstreg))
432 4545 : >= GET_MODE_UNIT_SIZE (cand->mode))
433 : return true;
434 :
435 731 : map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
436 731 : map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
437 731 : map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
438 731 : ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
439 731 : new_set = gen_rtx_SET (map_dstreg, ifexpr);
440 :
441 731 : if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)
442 731 : && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg),
443 : cand->code))
444 : {
445 731 : if (dump_file)
446 : {
447 0 : fprintf (dump_file,
448 : "Mode of conditional move instruction extended:\n");
449 0 : print_rtl_single (dump_file, def_insn);
450 : }
451 731 : return true;
452 : }
453 :
454 : return false;
455 : }
456 :
457 : /* Get all the reaching definitions of an instruction. The definitions are
458 : desired for REG used in INSN. Return the definition list or NULL if a
459 : definition is missing. If DEST is non-NULL, additionally push the INSN
460 : of the definitions onto DEST. */
461 :
462 : static struct df_link *
463 750503 : get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
464 : {
465 750503 : df_ref use;
466 750503 : struct df_link *ref_chain, *ref_link;
467 :
468 760942 : FOR_EACH_INSN_USE (use, insn)
469 : {
470 760942 : if (GET_CODE (DF_REF_REG (use)) == SUBREG)
471 : return NULL;
472 760942 : if (REGNO (DF_REF_REG (use)) == REGNO (reg))
473 : break;
474 : }
475 :
476 750503 : gcc_assert (use != NULL);
477 :
478 750503 : ref_chain = DF_REF_CHAIN (use);
479 :
480 1752691 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
481 : {
482 : /* Problem getting some definition for this instruction. */
483 1030016 : if (ref_link->ref == NULL)
484 : return NULL;
485 1030016 : if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
486 : return NULL;
487 : /* As global regs are assumed to be defined at each function call
488 : dataflow can report a call_insn as being a definition of REG.
489 : But we can't do anything with that in this pass so proceed only
490 : if the instruction really sets REG in a way that can be deduced
491 : from the RTL structure. */
492 1002188 : if (global_regs[REGNO (reg)]
493 1002188 : && !set_of (reg, DF_REF_INSN (ref_link->ref)))
494 : return NULL;
495 : }
496 :
497 722675 : if (dest)
498 870018 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
499 504539 : dest->safe_push (DF_REF_INSN (ref_link->ref));
500 :
501 : return ref_chain;
502 : }
503 :
504 : /* Get all the reaching uses of an instruction. The uses are desired for REG
505 : set in INSN. Return use list or NULL if a use is missing or irregular. */
506 :
507 : static struct df_link *
508 0 : get_uses (rtx_insn *insn, rtx reg)
509 : {
510 0 : df_ref def;
511 0 : struct df_link *ref_chain, *ref_link;
512 :
513 0 : FOR_EACH_INSN_DEF (def, insn)
514 0 : if (REGNO (DF_REF_REG (def)) == REGNO (reg))
515 : break;
516 :
517 0 : gcc_assert (def != NULL);
518 :
519 0 : ref_chain = DF_REF_CHAIN (def);
520 :
521 0 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
522 : {
523 : /* Problem getting some use for this instruction. */
524 0 : if (ref_link->ref == NULL)
525 : return NULL;
526 0 : if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
527 : return NULL;
528 : }
529 :
530 : return ref_chain;
531 : }
532 :
533 : /* Return true if INSN is
534 : (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
535 : and store x1 and x2 in REG_1 and REG_2. */
536 :
537 : static bool
538 503778 : is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
539 : {
540 503778 : rtx expr = single_set (insn);
541 :
542 503778 : if (expr != NULL_RTX
543 494730 : && GET_CODE (expr) == SET
544 494730 : && GET_CODE (SET_DEST (expr)) == REG
545 494318 : && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
546 8061 : && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
547 8029 : && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
548 : {
549 7938 : *reg1 = XEXP (SET_SRC (expr), 1);
550 7938 : *reg2 = XEXP (SET_SRC (expr), 2);
551 7938 : return true;
552 : }
553 :
554 : return false;
555 : }
556 :
557 : enum ext_modified_kind
558 : {
559 : /* The insn hasn't been modified by ree pass yet. */
560 : EXT_MODIFIED_NONE,
561 : /* Changed into zero extension. */
562 : EXT_MODIFIED_ZEXT,
563 : /* Changed into sign extension. */
564 : EXT_MODIFIED_SEXT
565 : };
566 :
567 : struct ext_modified
568 : {
569 : /* Mode from which ree has zero or sign extended the destination. */
570 : ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
571 :
572 : /* Kind of modification of the insn. */
573 : ENUM_BITFIELD(ext_modified_kind) kind : 2;
574 :
575 : unsigned int do_not_reextend : 1;
576 :
577 : /* True if the insn is scheduled to be deleted. */
578 : unsigned int deleted : 1;
579 : };
580 :
581 : /* Vectors used by combine_reaching_defs and its helpers. */
582 963990 : class ext_state
583 : {
584 : public:
585 : /* In order to avoid constant alloc/free, we keep these
586 : 4 vectors live through the entire find_and_remove_re and just
587 : truncate them each time. */
588 : auto_vec<rtx_insn *> defs_list;
589 : auto_vec<rtx_insn *> copies_list;
590 : auto_vec<rtx_insn *> modified_list;
591 : auto_vec<rtx_insn *> work_list;
592 :
593 : /* For instructions that have been successfully modified, this is
594 : the original mode from which the insn is extending and
595 : kind of extension. */
596 : struct ext_modified *modified;
597 : };
598 :
599 : /* Reaching Definitions of the extended register could be conditional copies
600 : or regular definitions. This function separates the two types into two
601 : lists, STATE->DEFS_LIST and STATE->COPIES_LIST. This is necessary because,
602 : if a reaching definition is a conditional copy, merging the extension with
603 : this definition is wrong. Conditional copies are merged by transitively
604 : merging their definitions. The defs_list is populated with all the reaching
605 : definitions of the extension instruction (EXTEND_INSN) which must be merged
606 : with an extension. The copies_list contains all the conditional moves that
607 : will later be extended into a wider mode conditional move if all the merges
608 : are successful. The function returns false upon failure, true upon
609 : success. */
610 :
611 : static bool
612 350383 : make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat,
613 : ext_state *state)
614 : {
615 350383 : rtx src_reg = XEXP (SET_SRC (set_pat), 0);
616 350383 : bool *is_insn_visited;
617 350383 : bool ret = true;
618 :
619 350383 : state->work_list.truncate (0);
620 :
621 : /* Initialize the work list. */
622 350383 : if (!get_defs (extend_insn, src_reg, &state->work_list))
623 : return false;
624 :
625 350383 : is_insn_visited = XCNEWVEC (bool, max_insn_uid);
626 :
627 : /* Perform transitive closure for conditional copies. */
628 1204695 : while (!state->work_list.is_empty ())
629 : {
630 504391 : rtx_insn *def_insn = state->work_list.pop ();
631 504391 : rtx reg1, reg2;
632 :
633 504391 : gcc_assert (INSN_UID (def_insn) < max_insn_uid);
634 :
635 504391 : if (is_insn_visited[INSN_UID (def_insn)])
636 613 : continue;
637 503778 : is_insn_visited[INSN_UID (def_insn)] = true;
638 :
639 503778 : if (is_cond_copy_insn (def_insn, ®1, ®2))
640 : {
641 : /* Push it onto the copy list first. */
642 7938 : state->copies_list.safe_push (def_insn);
643 :
644 : /* Now perform the transitive closure. */
645 7938 : if (!get_defs (def_insn, reg1, &state->work_list)
646 7938 : || !get_defs (def_insn, reg2, &state->work_list))
647 : {
648 462 : ret = false;
649 462 : break;
650 : }
651 : }
652 : else
653 495840 : state->defs_list.safe_push (def_insn);
654 : }
655 :
656 350383 : XDELETEVEC (is_insn_visited);
657 :
658 350383 : return ret;
659 : }
660 :
661 : /* If DEF_INSN has single SET expression with a register
662 : destination, possibly buried inside a PARALLEL, return
663 : the address of the SET expression, else return NULL.
664 : This is similar to single_set, except that single_set
665 : allows multiple SETs when all but one is dead. */
666 : static rtx *
667 404958 : get_sub_rtx (rtx_insn *def_insn)
668 : {
669 404958 : enum rtx_code code = GET_CODE (PATTERN (def_insn));
670 404958 : rtx *sub_rtx = NULL;
671 :
672 404958 : if (code == PARALLEL)
673 : {
674 302653 : for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
675 : {
676 205461 : rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
677 205461 : if (GET_CODE (s_expr) != SET)
678 97188 : continue;
679 108273 : if (!REG_P (SET_DEST (s_expr)))
680 116 : continue;
681 :
682 108157 : if (sub_rtx == NULL)
683 102652 : sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
684 : else
685 : {
686 : /* PARALLEL with multiple SETs. */
687 : return NULL;
688 : }
689 : }
690 : }
691 302261 : else if (code == SET)
692 : {
693 302261 : rtx s_expr = PATTERN (def_insn);
694 302261 : if (REG_P (SET_DEST (s_expr)))
695 302176 : sub_rtx = &PATTERN (def_insn);
696 : }
697 :
698 : return sub_rtx;
699 : }
700 :
701 : /* Merge the DEF_INSN with an extension. Calls combine_set_extension
702 : on the SET pattern. */
703 :
704 : static bool
705 294431 : merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
706 : {
707 294431 : machine_mode ext_src_mode;
708 294431 : rtx *sub_rtx;
709 :
710 294431 : ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
711 294431 : sub_rtx = get_sub_rtx (def_insn);
712 :
713 294431 : if (sub_rtx == NULL)
714 : return false;
715 :
716 290169 : if (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
717 355320 : || ((state->modified[INSN_UID (def_insn)].kind
718 65151 : == (cand->code == ZERO_EXTEND
719 65151 : ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
720 14977 : && state->modified[INSN_UID (def_insn)].mode
721 : == ext_src_mode))
722 : {
723 239981 : if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
724 685695 : >= GET_MODE_UNIT_SIZE (cand->mode))
725 : return true;
726 : /* If def_insn is already scheduled to be deleted, don't attempt
727 : to modify it. */
728 225682 : if (state->modified[INSN_UID (def_insn)].deleted)
729 : return false;
730 225663 : if (combine_set_extension (cand, def_insn, sub_rtx))
731 : {
732 205733 : if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
733 205069 : state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
734 205733 : return true;
735 : }
736 : }
737 :
738 : return false;
739 : }
740 :
741 : /* Given SRC, which should be one or more extensions of a REG, strip
742 : away the extensions and return the REG. */
743 :
744 : static inline rtx
745 422037 : get_extended_src_reg (rtx src)
746 : {
747 844074 : while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
748 422037 : src = XEXP (src, 0);
749 422037 : gcc_assert (REG_P (src));
750 422037 : return src;
751 : }
752 :
753 : /* This function goes through all reaching defs of the source
754 : of the candidate for elimination (CAND) and tries to combine
755 : the extension with the definition instruction. The changes
756 : are made as a group so that even if one definition cannot be
757 : merged, all reaching definitions end up not being merged.
758 : When a conditional copy is encountered, merging is attempted
759 : transitively on its definitions. It returns true upon success
760 : and false upon failure. */
761 :
762 : static bool
763 350383 : combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
764 : {
765 350383 : rtx_insn *def_insn;
766 350383 : bool merge_successful = true;
767 350383 : int i;
768 350383 : int defs_ix;
769 350383 : bool outcome;
770 :
771 350383 : state->defs_list.truncate (0);
772 350383 : state->copies_list.truncate (0);
773 :
774 350383 : outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
775 :
776 350383 : if (!outcome)
777 : return false;
778 :
779 : /* If the destination operand of the extension is a different
780 : register than the source operand, then additional restrictions
781 : are needed. Note we have to handle cases where we have nested
782 : extensions in the source operand.
783 :
784 : Candidate insns are known to be single_sets, via the test in
785 : find_removable_extensions. So we continue to use single_set here
786 : rather than get_sub_rtx. */
787 349921 : rtx set = single_set (cand->insn);
788 349921 : bool copy_needed
789 349921 : = (REGNO (SET_DEST (set)) != REGNO (get_extended_src_reg (SET_SRC (set))));
790 349921 : if (copy_needed)
791 : {
792 : /* Considering transformation of
793 : (set (reg1) (expression))
794 : ...
795 : (set (reg2) (any_extend (reg1)))
796 :
797 : into
798 :
799 : (set (reg2) (any_extend (expression)))
800 : (set (reg1) (reg2))
801 : ... */
802 :
803 : /* In theory we could handle more than one reaching def, it
804 : just makes the code to update the insn stream more complex. */
805 230345 : if (state->defs_list.length () != 1)
806 : return false;
807 :
808 : /* We don't have the structure described above if there are
809 : conditional moves in between the def and the candidate,
810 : and we will not handle them correctly. See PR68194. */
811 129126 : if (state->copies_list.length () > 0)
812 : return false;
813 :
814 : /* We require the candidate not already be modified. It may,
815 : for example have been changed from a (sign_extend (reg))
816 : into (zero_extend (sign_extend (reg))).
817 :
818 : Handling that case shouldn't be terribly difficult, but the code
819 : here and the code to emit copies would need auditing. Until
820 : we see a need, this is the safe thing to do. */
821 129115 : if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
822 : return false;
823 :
824 129114 : machine_mode dst_mode = GET_MODE (SET_DEST (set));
825 129114 : rtx src_reg = get_extended_src_reg (SET_SRC (set));
826 :
827 : /* Ensure we can use the src_reg in dst_mode (needed for
828 : the (set (reg1) (reg2)) insn mentioned above). */
829 129114 : if (!targetm.hard_regno_mode_ok (REGNO (src_reg), dst_mode))
830 : return false;
831 :
832 : /* Ensure the number of hard registers of the copy match. */
833 129095 : if (hard_regno_nregs (REGNO (src_reg), dst_mode) != REG_NREGS (src_reg))
834 : return false;
835 :
836 : /* There's only one reaching def. */
837 129067 : rtx_insn *def_insn = state->defs_list[0];
838 :
839 : /* The defining statement must not have been modified either. */
840 129067 : if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
841 : return false;
842 :
843 : /* The defining statement and candidate insn must be in the same block.
844 : This is merely to keep the test for safety and updating the insn
845 : stream simple. Also ensure that within the block the candidate
846 : follows the defining insn. */
847 124509 : basic_block bb = BLOCK_FOR_INSN (cand->insn);
848 124509 : if (bb != BLOCK_FOR_INSN (def_insn)
849 124509 : || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
850 : return false;
851 :
852 : /* If there is an overlap between the destination of DEF_INSN and
853 : CAND->insn, then this transformation is not safe. Note we have
854 : to test in the widened mode. */
855 85358 : rtx *dest_sub_rtx = get_sub_rtx (def_insn);
856 85358 : if (dest_sub_rtx == NULL)
857 : return false;
858 :
859 167974 : rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (set)),
860 83987 : REGNO (SET_DEST (*dest_sub_rtx)));
861 83987 : if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (set)))
862 : return false;
863 :
864 : /* On RISC machines we must make sure that changing the mode of SRC_REG
865 : as destination register will not affect its reaching uses, which may
866 : read its value in a larger mode because DEF_INSN implicitly sets it
867 : in word mode. */
868 83985 : poly_int64 prec
869 83985 : = GET_MODE_PRECISION (GET_MODE (SET_DEST (*dest_sub_rtx)));
870 83985 : if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
871 : {
872 : struct df_link *uses = get_uses (def_insn, src_reg);
873 : if (!uses)
874 : return false;
875 :
876 : for (df_link *use = uses; use; use = use->next)
877 : if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
878 : GET_MODE (SET_DEST (*dest_sub_rtx)))
879 : && !DEBUG_INSN_P (DF_REF_INSN (use->ref)))
880 : return false;
881 : }
882 :
883 : /* The destination register of the extension insn must not be
884 : used or set between the def_insn and cand->insn exclusive. */
885 83985 : if (reg_used_between_p (SET_DEST (set), def_insn, cand->insn)
886 83985 : || reg_set_between_p (SET_DEST (set), def_insn, cand->insn))
887 11869 : return false;
888 :
889 : /* We must be able to copy between the two registers. Generate,
890 : recognize and verify constraints of the copy. Also fail if this
891 : generated more than one insn.
892 :
893 : This generates garbage since we throw away the insn when we're
894 : done, only to recreate it later if this test was successful.
895 :
896 : Make sure to get the mode from the extension (cand->insn). This
897 : is different than in the code to emit the copy as we have not
898 : modified the defining insn yet. */
899 72116 : start_sequence ();
900 72116 : rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (set)),
901 72116 : REGNO (get_extended_src_reg (SET_SRC (set))));
902 144232 : rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (set)),
903 72116 : REGNO (SET_DEST (set)));
904 72116 : emit_move_insn (new_dst, new_src);
905 :
906 72116 : rtx_insn *insn = end_sequence ();
907 72116 : if (NEXT_INSN (insn))
908 : return false;
909 72116 : if (recog_memoized (insn) == -1)
910 : return false;
911 72116 : extract_insn (insn);
912 72116 : if (!constrain_operands (1, get_preferred_alternatives (insn, bb)))
913 : return false;
914 :
915 72116 : while (REG_P (SET_SRC (*dest_sub_rtx))
916 72116 : && (REGNO (SET_SRC (*dest_sub_rtx)) == REGNO (SET_DEST (set))))
917 : {
918 : /* Considering transformation of
919 : (set (reg2) (expression))
920 : ...
921 : (set (reg1) (reg2))
922 : ...
923 : (set (reg2) (any_extend (reg1)))
924 :
925 : into
926 :
927 : (set (reg2) (any_extend (expression)))
928 : (set (reg1) (reg2))
929 : ... */
930 1312 : struct df_link *defs
931 1312 : = get_defs (def_insn, SET_SRC (*dest_sub_rtx), NULL);
932 1312 : if (defs == NULL || defs->next)
933 : break;
934 :
935 : /* There is only one reaching def. */
936 1141 : rtx_insn *def_insn2 = DF_REF_INSN (defs->ref);
937 :
938 : /* The defining statement must not have been modified either. */
939 1141 : if (state->modified[INSN_UID (def_insn2)].kind != EXT_MODIFIED_NONE)
940 : break;
941 :
942 : /* The def_insn2 and candidate insn must be in the same
943 : block and def_insn follows def_insn2. */
944 1141 : if (bb != BLOCK_FOR_INSN (def_insn2)
945 1141 : || DF_INSN_LUID (def_insn2) > DF_INSN_LUID (def_insn))
946 : break;
947 :
948 919 : rtx *dest_sub_rtx2 = get_sub_rtx (def_insn2);
949 919 : if (dest_sub_rtx2 == NULL)
950 : break;
951 :
952 : /* On RISC machines we must make sure that changing the mode of
953 : SRC_REG as destination register will not affect its reaching
954 : uses, which may read its value in a larger mode because DEF_INSN
955 : implicitly sets it in word mode. */
956 917 : if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
957 : {
958 : struct df_link *uses = get_uses (def_insn2, SET_DEST (set));
959 : if (!uses)
960 : break;
961 :
962 : df_link *use;
963 : rtx dest2 = SET_DEST (*dest_sub_rtx2);
964 : for (use = uses; use; use = use->next)
965 : if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
966 : GET_MODE (dest2))
967 : && !DEBUG_INSN_P (DF_REF_INSN (use->ref)))
968 : break;
969 : if (use)
970 : break;
971 : }
972 :
973 : /* The destination register of the extension insn must not be
974 : used or set between the def_insn2 and def_insn exclusive.
975 : Likewise for the other reg, i.e. check both reg1 and reg2
976 : in the above comment. */
977 917 : if (reg_used_between_p (SET_DEST (set), def_insn2, def_insn)
978 903 : || reg_set_between_p (SET_DEST (set), def_insn2, def_insn)
979 903 : || reg_used_between_p (src_reg, def_insn2, def_insn)
980 1785 : || reg_set_between_p (src_reg, def_insn2, def_insn))
981 : break;
982 :
983 233 : state->defs_list[0] = def_insn2;
984 233 : break;
985 : }
986 : }
987 :
988 : /* If cand->insn has been already modified, update cand->mode to a wider
989 : mode if possible, or punt. */
990 248702 : if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
991 : {
992 13 : machine_mode mode;
993 :
994 13 : if (state->modified[INSN_UID (cand->insn)].kind
995 13 : != (cand->code == ZERO_EXTEND
996 13 : ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
997 9 : || state->modified[INSN_UID (cand->insn)].mode != cand->mode
998 13 : || (set == NULL_RTX))
999 : return false;
1000 9 : mode = GET_MODE (SET_DEST (set));
1001 27 : gcc_assert (GET_MODE_UNIT_SIZE (mode)
1002 : >= GET_MODE_UNIT_SIZE (cand->mode));
1003 9 : cand->mode = mode;
1004 : }
1005 :
1006 248698 : merge_successful = true;
1007 :
1008 : /* Go through the defs vector and try to merge all the definitions
1009 : in this vector. */
1010 248698 : state->modified_list.truncate (0);
1011 717428 : FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
1012 : {
1013 294431 : if (merge_def_and_ext (cand, def_insn, state))
1014 220032 : state->modified_list.safe_push (def_insn);
1015 : else
1016 : {
1017 : merge_successful = false;
1018 : break;
1019 : }
1020 : }
1021 :
1022 : /* Now go through the conditional copies vector and try to merge all
1023 : the copies in this vector. */
1024 248698 : if (merge_successful)
1025 : {
1026 176206 : FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
1027 : {
1028 1907 : if (transform_ifelse (cand, def_insn))
1029 1907 : state->modified_list.safe_push (def_insn);
1030 : else
1031 : {
1032 : merge_successful = false;
1033 : break;
1034 : }
1035 : }
1036 : }
1037 :
1038 174299 : if (merge_successful)
1039 : {
1040 : /* Commit the changes here if possible
1041 : FIXME: It's an all-or-nothing scenario. Even if only one definition
1042 : cannot be merged, we entirely give up. In the future, we should allow
1043 : extensions to be partially eliminated along those paths where the
1044 : definitions could be merged. */
1045 174299 : if (apply_change_group ())
1046 : {
1047 80488 : if (dump_file)
1048 17 : fprintf (dump_file, "All merges were successful.\n");
1049 :
1050 199650 : FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1051 : {
1052 119162 : ext_modified *modified = &state->modified[INSN_UID (def_insn)];
1053 119162 : if (modified->kind == EXT_MODIFIED_NONE)
1054 150407 : modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
1055 : : EXT_MODIFIED_SEXT);
1056 :
1057 119162 : if (copy_needed)
1058 24250 : modified->do_not_reextend = 1;
1059 : }
1060 : return true;
1061 : }
1062 : else
1063 : {
1064 : /* Changes need not be cancelled explicitly as apply_change_group
1065 : does it. Print list of definitions in the dump_file for debug
1066 : purposes. This extension cannot be deleted. */
1067 93811 : if (dump_file)
1068 : {
1069 8 : fprintf (dump_file,
1070 : "Merge cancelled, non-mergeable definitions:\n");
1071 24 : FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1072 8 : print_rtl_single (dump_file, def_insn);
1073 : }
1074 : }
1075 : }
1076 : else
1077 : {
1078 : /* Cancel any changes that have been made so far. */
1079 74399 : cancel_changes (0);
1080 : }
1081 :
1082 : return false;
1083 : }
1084 :
1085 : /* Add an extension pattern that could be eliminated. */
1086 :
1087 : static void
1088 51165596 : add_removable_extension (const_rtx expr, rtx_insn *insn,
1089 : vec<ext_cand> *insn_list,
1090 : unsigned *def_map,
1091 : bitmap init_regs)
1092 : {
1093 51165596 : enum rtx_code code;
1094 51165596 : machine_mode mode;
1095 51165596 : unsigned int idx;
1096 51165596 : rtx src, dest;
1097 :
1098 : /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */
1099 51165596 : if (GET_CODE (expr) != SET)
1100 : return;
1101 :
1102 51165596 : src = SET_SRC (expr);
1103 51165596 : code = GET_CODE (src);
1104 51165596 : dest = SET_DEST (expr);
1105 51165596 : mode = GET_MODE (dest);
1106 :
1107 51165596 : if (REG_P (dest)
1108 34651685 : && (code == SIGN_EXTEND || code == ZERO_EXTEND)
1109 609937 : && REG_P (XEXP (src, 0)))
1110 : {
1111 422108 : rtx reg = XEXP (src, 0);
1112 422108 : struct df_link *defs, *def;
1113 422108 : ext_cand *cand;
1114 :
1115 422108 : if (fixed_regs[REGNO (reg)])
1116 : {
1117 5 : if (dump_file)
1118 : {
1119 0 : fprintf (dump_file, "Cannot eliminate extension:\n");
1120 0 : print_rtl_single (dump_file, insn);
1121 0 : fprintf (dump_file, " because extension on fixed register"
1122 : " isn't supported.\n");
1123 : }
1124 71725 : return;
1125 : }
1126 :
1127 : /* Zero-extension of an undefined value is partly defined (it's
1128 : completely undefined for sign-extension, though). So if there exists
1129 : a path from the entry to this zero-extension that leaves this register
1130 : uninitialized, removing the extension could change the behavior of
1131 : correct programs. So first, check it is not the case. */
1132 422103 : if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
1133 : {
1134 38853 : if (dump_file)
1135 : {
1136 0 : fprintf (dump_file, "Cannot eliminate extension:\n");
1137 0 : print_rtl_single (dump_file, insn);
1138 0 : fprintf (dump_file, " because it can operate on uninitialized"
1139 : " data\n");
1140 : }
1141 38853 : return;
1142 : }
1143 :
1144 : /* Second, make sure we can get all the reaching definitions. */
1145 383250 : defs = get_defs (insn, reg, NULL);
1146 383250 : if (!defs)
1147 : {
1148 27246 : if (dump_file)
1149 : {
1150 2 : fprintf (dump_file, "Cannot eliminate extension:\n");
1151 2 : print_rtl_single (dump_file, insn);
1152 2 : fprintf (dump_file, " because of missing definition(s)\n");
1153 : }
1154 27246 : return;
1155 : }
1156 :
1157 : /* Third, make sure the reaching definitions don't feed another and
1158 : different extension. FIXME: this obviously can be improved. */
1159 844734 : for (def = defs; def; def = def->next)
1160 494351 : if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
1161 83453 : && idx != -1U
1162 83453 : && (cand = &(*insn_list)[idx - 1])
1163 577804 : && cand->code != code)
1164 : {
1165 5458 : if (dump_file)
1166 : {
1167 1 : fprintf (dump_file, "Cannot eliminate extension:\n");
1168 1 : print_rtl_single (dump_file, insn);
1169 1 : fprintf (dump_file, " because of other extension\n");
1170 : }
1171 5458 : return;
1172 : }
1173 : /* For vector mode extensions, ensure that all uses of the
1174 : XEXP (src, 0) register are in insn or debug insns, as unlike
1175 : integral extensions lowpart subreg of the sign/zero extended
1176 : register are not equal to the original register, so we have
1177 : to change all uses or none and the current code isn't able
1178 : to change them all at once in one transaction. */
1179 488893 : else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
1180 : {
1181 1619 : struct df_link *ref_chain, *ref_link;
1182 :
1183 1619 : ref_chain = DF_REF_CHAIN (def->ref);
1184 3233 : for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
1185 : {
1186 1777 : if (ref_link->ref == NULL
1187 1777 : || DF_REF_INSN_INFO (ref_link->ref) == NULL)
1188 : {
1189 : idx = -1U;
1190 : break;
1191 : }
1192 1777 : rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
1193 1777 : if (use_insn != insn && !DEBUG_INSN_P (use_insn))
1194 : {
1195 : idx = -1U;
1196 : break;
1197 : }
1198 : }
1199 :
1200 1619 : if (idx == -1U)
1201 : {
1202 163 : def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1203 163 : if (dump_file)
1204 : {
1205 0 : fprintf (dump_file, "Cannot eliminate extension:\n");
1206 0 : print_rtl_single (dump_file, insn);
1207 0 : fprintf (dump_file,
1208 : " because some vector uses aren't extension\n");
1209 : }
1210 163 : return;
1211 : }
1212 : }
1213 :
1214 : /* Fourth, if the extended version occupies more registers than the
1215 : original and the source of the extension is the same hard register
1216 : as the destination of the extension, then we cannot eliminate
1217 : the extension without deep analysis, so just punt.
1218 :
1219 : We allow this when the registers are different because the
1220 : code in combine_reaching_defs will handle that case correctly. */
1221 350383 : if (hard_regno_nregs (REGNO (dest), mode) != REG_NREGS (reg)
1222 350383 : && reg_overlap_mentioned_p (dest, reg))
1223 : return;
1224 :
1225 : /* Then add the candidate to the list and insert the reaching definitions
1226 : into the definition map. */
1227 350383 : ext_cand e = {expr, code, mode, insn};
1228 350383 : insn_list->safe_push (e);
1229 350383 : idx = insn_list->length ();
1230 :
1231 839029 : for (def = defs; def; def = def->next)
1232 488646 : def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1233 : }
1234 : }
1235 :
1236 : /* Traverse the instruction stream looking for extensions and return the
1237 : list of candidates. */
1238 :
1239 : static vec<ext_cand>
1240 963990 : find_removable_extensions (void)
1241 : {
1242 963990 : vec<ext_cand> insn_list = vNULL;
1243 963990 : basic_block bb;
1244 963990 : rtx_insn *insn;
1245 963990 : rtx set;
1246 963990 : unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
1247 963990 : bitmap_head init, kill, gen, tmp;
1248 :
1249 963990 : bitmap_initialize (&init, NULL);
1250 963990 : bitmap_initialize (&kill, NULL);
1251 963990 : bitmap_initialize (&gen, NULL);
1252 963990 : bitmap_initialize (&tmp, NULL);
1253 :
1254 11375023 : FOR_EACH_BB_FN (bb, cfun)
1255 : {
1256 20822066 : bitmap_copy (&init, DF_MIR_IN (bb));
1257 10411033 : bitmap_clear (&kill);
1258 10411033 : bitmap_clear (&gen);
1259 :
1260 135702721 : FOR_BB_INSNS (bb, insn)
1261 : {
1262 125291688 : if (NONDEBUG_INSN_P (insn))
1263 : {
1264 54880475 : set = single_set (insn);
1265 54880475 : if (set != NULL_RTX)
1266 51165596 : add_removable_extension (set, insn, &insn_list, def_map,
1267 : &init);
1268 54880475 : df_mir_simulate_one_insn (bb, insn, &kill, &gen);
1269 54880475 : bitmap_ior_and_compl (&tmp, &gen, &init, &kill);
1270 54880475 : bitmap_copy (&init, &tmp);
1271 : }
1272 : }
1273 : }
1274 :
1275 963990 : XDELETEVEC (def_map);
1276 :
1277 963990 : return insn_list;
1278 : }
1279 :
1280 : /* This is the main function that checks the insn stream for redundant
1281 : extensions and tries to remove them if possible. */
1282 :
1283 : static void
1284 963990 : find_and_remove_re (void)
1285 : {
1286 963990 : ext_cand *curr_cand;
1287 963990 : rtx_insn *curr_insn = NULL;
1288 963990 : int num_re_opportunities = 0, num_realized = 0, i;
1289 963990 : vec<ext_cand> reinsn_list;
1290 963990 : auto_vec<rtx_insn *> reinsn_del_list;
1291 963990 : auto_vec<rtx_insn *> reinsn_copy_list;
1292 :
1293 : /* Construct DU chain to get all reaching definitions of each
1294 : extension instruction. */
1295 963990 : df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
1296 963990 : df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
1297 963990 : df_mir_add_problem ();
1298 963990 : df_analyze ();
1299 963990 : df_set_flags (DF_DEFER_INSN_RESCAN);
1300 :
1301 963990 : max_insn_uid = get_max_uid ();
1302 963990 : reinsn_list = find_removable_extensions ();
1303 :
1304 963990 : ext_state state;
1305 963990 : if (reinsn_list.is_empty ())
1306 846431 : state.modified = NULL;
1307 : else
1308 117559 : state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
1309 :
1310 1314373 : FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
1311 : {
1312 350383 : num_re_opportunities++;
1313 :
1314 : /* Try to combine the extension with the definition. */
1315 350383 : if (dump_file)
1316 : {
1317 37 : fprintf (dump_file, "Trying to eliminate extension:\n");
1318 37 : print_rtl_single (dump_file, curr_cand->insn);
1319 : }
1320 :
1321 350383 : if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
1322 : {
1323 80488 : if (dump_file)
1324 17 : fprintf (dump_file, "Eliminated the extension.\n");
1325 80488 : num_realized++;
1326 : /* If the RHS of the current candidate is not (extend (reg)), then
1327 : we do not allow the optimization of extensions where
1328 : the source and destination registers do not match. Thus
1329 : checking REG_P here is correct. */
1330 80488 : rtx set = single_set (curr_cand->insn);
1331 80488 : if (REG_P (XEXP (SET_SRC (set), 0))
1332 80488 : && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
1333 : {
1334 24250 : reinsn_copy_list.safe_push (curr_cand->insn);
1335 24250 : reinsn_copy_list.safe_push (state.defs_list[0]);
1336 : }
1337 80488 : reinsn_del_list.safe_push (curr_cand->insn);
1338 80488 : state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
1339 : }
1340 : }
1341 :
1342 : /* The copy list contains pairs of insns which describe copies we
1343 : need to insert into the INSN stream.
1344 :
1345 : The first insn in each pair is the extension insn, from which
1346 : we derive the source and destination of the copy.
1347 :
1348 : The second insn in each pair is the memory reference where the
1349 : extension will ultimately happen. We emit the new copy
1350 : immediately after this insn.
1351 :
1352 : It may first appear that the arguments for the copy are reversed.
1353 : Remember that the memory reference will be changed to refer to the
1354 : destination of the extention. So we're actually emitting a copy
1355 : from the new destination to the old destination. */
1356 1026847 : for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
1357 : {
1358 24250 : rtx_insn *curr_insn = reinsn_copy_list[i];
1359 24250 : rtx_insn *def_insn = reinsn_copy_list[i + 1];
1360 :
1361 : /* Use the mode of the destination of the defining insn
1362 : for the mode of the copy. This is necessary if the
1363 : defining insn was used to eliminate a second extension
1364 : that was wider than the first. */
1365 24250 : rtx sub_rtx = *get_sub_rtx (def_insn);
1366 24250 : rtx set = single_set (curr_insn);
1367 48500 : rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1368 24250 : REGNO (XEXP (SET_SRC (set), 0)));
1369 48500 : rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1370 24250 : REGNO (SET_DEST (set)));
1371 24250 : rtx new_set = gen_rtx_SET (new_dst, new_src);
1372 24250 : emit_insn_after (new_set, def_insn);
1373 : }
1374 :
1375 : /* Delete all useless extensions here in one sweep. */
1376 1044478 : FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
1377 80488 : delete_insn (curr_insn);
1378 :
1379 963990 : reinsn_list.release ();
1380 963990 : XDELETEVEC (state.modified);
1381 :
1382 963990 : if (dump_file && num_re_opportunities > 0)
1383 21 : fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
1384 : num_re_opportunities, num_realized);
1385 963990 : }
1386 :
1387 : /* Find and remove redundant extensions. */
1388 :
1389 : static unsigned int
1390 963990 : rest_of_handle_ree (void)
1391 : {
1392 0 : find_and_remove_re ();
1393 963990 : return 0;
1394 : }
1395 :
1396 : namespace {
1397 :
1398 : const pass_data pass_data_ree =
1399 : {
1400 : RTL_PASS, /* type */
1401 : "ree", /* name */
1402 : OPTGROUP_NONE, /* optinfo_flags */
1403 : TV_REE, /* tv_id */
1404 : 0, /* properties_required */
1405 : 0, /* properties_provided */
1406 : 0, /* properties_destroyed */
1407 : 0, /* todo_flags_start */
1408 : TODO_df_finish, /* todo_flags_finish */
1409 : };
1410 :
1411 : class pass_ree : public rtl_opt_pass
1412 : {
1413 : public:
1414 285722 : pass_ree (gcc::context *ctxt)
1415 571444 : : rtl_opt_pass (pass_data_ree, ctxt)
1416 : {}
1417 :
1418 : /* opt_pass methods: */
1419 1471370 : bool gate (function *) final override { return (optimize > 0 && flag_ree); }
1420 963990 : unsigned int execute (function *) final override
1421 : {
1422 963990 : return rest_of_handle_ree ();
1423 : }
1424 :
1425 : }; // class pass_ree
1426 :
1427 : } // anon namespace
1428 :
1429 : rtl_opt_pass *
1430 285722 : make_pass_ree (gcc::context *ctxt)
1431 : {
1432 285722 : return new pass_ree (ctxt);
1433 : }
|