Line data Source code
1 : /* Subroutines used for code generation on IA-32.
2 : Copyright (C) 1988-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #define INCLUDE_STRING
21 : #define IN_TARGET_CODE 1
22 :
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "memmodel.h"
30 : #include "gimple.h"
31 : #include "cfghooks.h"
32 : #include "cfgloop.h"
33 : #include "df.h"
34 : #include "tm_p.h"
35 : #include "stringpool.h"
36 : #include "expmed.h"
37 : #include "optabs.h"
38 : #include "regs.h"
39 : #include "emit-rtl.h"
40 : #include "recog.h"
41 : #include "cgraph.h"
42 : #include "diagnostic.h"
43 : #include "cfgbuild.h"
44 : #include "alias.h"
45 : #include "fold-const.h"
46 : #include "attribs.h"
47 : #include "calls.h"
48 : #include "stor-layout.h"
49 : #include "varasm.h"
50 : #include "output.h"
51 : #include "insn-attr.h"
52 : #include "flags.h"
53 : #include "except.h"
54 : #include "explow.h"
55 : #include "expr.h"
56 : #include "cfgrtl.h"
57 : #include "common/common-target.h"
58 : #include "langhooks.h"
59 : #include "reload.h"
60 : #include "gimplify.h"
61 : #include "dwarf2.h"
62 : #include "tm-constrs.h"
63 : #include "cselib.h"
64 : #include "sched-int.h"
65 : #include "opts.h"
66 : #include "tree-pass.h"
67 : #include "context.h"
68 : #include "pass_manager.h"
69 : #include "target-globals.h"
70 : #include "gimple-iterator.h"
71 : #include "gimple-fold.h"
72 : #include "tree-vectorizer.h"
73 : #include "shrink-wrap.h"
74 : #include "builtins.h"
75 : #include "rtl-iter.h"
76 : #include "tree-iterator.h"
77 : #include "dbgcnt.h"
78 : #include "case-cfn-macros.h"
79 : #include "dojump.h"
80 : #include "fold-const-call.h"
81 : #include "tree-vrp.h"
82 : #include "tree-ssanames.h"
83 : #include "selftest.h"
84 : #include "selftest-rtl.h"
85 : #include "print-rtl.h"
86 : #include "intl.h"
87 : #include "ifcvt.h"
88 : #include "symbol-summary.h"
89 : #include "sreal.h"
90 : #include "ipa-cp.h"
91 : #include "ipa-prop.h"
92 : #include "ipa-fnsummary.h"
93 : #include "wide-int-bitmask.h"
94 : #include "tree-vector-builder.h"
95 : #include "debug.h"
96 : #include "dwarf2out.h"
97 : #include "i386-options.h"
98 : #include "i386-builtins.h"
99 : #include "i386-expand.h"
100 : #include "i386-features.h"
101 : #include "function-abi.h"
102 : #include "rtl-error.h"
103 : #include "gimple-pretty-print.h"
104 :
105 : /* This file should be included last. */
106 : #include "target-def.h"
107 :
108 : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109 : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110 :
111 :
112 : #ifndef CHECK_STACK_LIMIT
113 : #define CHECK_STACK_LIMIT (-1)
114 : #endif
115 :
116 : /* Return index of given mode in mult and division cost tables. */
117 : #define MODE_INDEX(mode) \
118 : ((mode) == QImode ? 0 \
119 : : (mode) == HImode ? 1 \
120 : : (mode) == SImode ? 2 \
121 : : (mode) == DImode ? 3 \
122 : : 4)
123 :
124 :
125 : /* Set by -mtune. */
126 : const struct processor_costs *ix86_tune_cost = NULL;
127 :
128 : /* Set by -mtune or -Os. */
129 : const struct processor_costs *ix86_cost = NULL;
130 :
131 : /* In case the average insn count for single function invocation is
132 : lower than this constant, emit fast (but longer) prologue and
133 : epilogue code. */
134 : #define FAST_PROLOGUE_INSN_COUNT 20
135 :
136 : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137 : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138 : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139 : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140 :
141 : /* Array of the smallest class containing reg number REGNO, indexed by
142 : REGNO. Used by REGNO_REG_CLASS in i386.h. */
143 :
144 : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145 : {
146 : /* ax, dx, cx, bx */
147 : AREG, DREG, CREG, BREG,
148 : /* si, di, bp, sp */
149 : SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 : /* FP registers */
151 : FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 : FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 : /* arg pointer, flags, fpsr, frame */
154 : NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 : /* SSE registers */
156 : SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 : /* MMX registers */
159 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 : /* REX registers */
162 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 : /* SSE REX registers */
165 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 : /* AVX-512 SSE registers */
168 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 : /* Mask registers. */
173 : ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 : MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 : /* REX2 registers */
176 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 : };
181 :
182 : /* The "default" register map used in 32bit mode. */
183 :
184 : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185 : {
186 : /* general regs */
187 : 0, 2, 1, 3, 6, 7, 4, 5,
188 : /* fp regs */
189 : 12, 13, 14, 15, 16, 17, 18, 19,
190 : /* arg, flags, fpsr, frame */
191 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 : /* SSE */
194 : 21, 22, 23, 24, 25, 26, 27, 28,
195 : /* MMX */
196 : 29, 30, 31, 32, 33, 34, 35, 36,
197 : /* extended integer registers */
198 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 : /* extended sse registers */
201 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 : /* AVX-512 registers 16-23 */
204 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 : /* AVX-512 registers 24-31 */
207 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 : /* Mask registers */
210 : 93, 94, 95, 96, 97, 98, 99, 100
211 : };
212 :
213 : /* The "default" register map used in 64bit mode. */
214 :
215 : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216 : {
217 : /* general regs */
218 : 0, 1, 2, 3, 4, 5, 6, 7,
219 : /* fp regs */
220 : 33, 34, 35, 36, 37, 38, 39, 40,
221 : /* arg, flags, fpsr, frame */
222 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 : /* SSE */
225 : 17, 18, 19, 20, 21, 22, 23, 24,
226 : /* MMX */
227 : 41, 42, 43, 44, 45, 46, 47, 48,
228 : /* extended integer registers */
229 : 8, 9, 10, 11, 12, 13, 14, 15,
230 : /* extended SSE registers */
231 : 25, 26, 27, 28, 29, 30, 31, 32,
232 : /* AVX-512 registers 16-23 */
233 : 67, 68, 69, 70, 71, 72, 73, 74,
234 : /* AVX-512 registers 24-31 */
235 : 75, 76, 77, 78, 79, 80, 81, 82,
236 : /* Mask registers */
237 : 118, 119, 120, 121, 122, 123, 124, 125,
238 : /* rex2 extend integer registers */
239 : 130, 131, 132, 133, 134, 135, 136, 137,
240 : 138, 139, 140, 141, 142, 143, 144, 145
241 : };
242 :
243 : /* Define the register numbers to be used in Dwarf debugging information.
244 : The SVR4 reference port C compiler uses the following register numbers
245 : in its Dwarf output code:
246 : 0 for %eax (gcc regno = 0)
247 : 1 for %ecx (gcc regno = 2)
248 : 2 for %edx (gcc regno = 1)
249 : 3 for %ebx (gcc regno = 3)
250 : 4 for %esp (gcc regno = 7)
251 : 5 for %ebp (gcc regno = 6)
252 : 6 for %esi (gcc regno = 4)
253 : 7 for %edi (gcc regno = 5)
254 : The following three DWARF register numbers are never generated by
255 : the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 : believed these numbers have these meanings.
257 : 8 for %eip (no gcc equivalent)
258 : 9 for %eflags (gcc regno = 17)
259 : 10 for %trapno (no gcc equivalent)
260 : It is not at all clear how we should number the FP stack registers
261 : for the x86 architecture. If the version of SDB on x86/svr4 were
262 : a bit less brain dead with respect to floating-point then we would
263 : have a precedent to follow with respect to DWARF register numbers
264 : for x86 FP registers, but the SDB on x86/svr4 was so completely
265 : broken with respect to FP registers that it is hardly worth thinking
266 : of it as something to strive for compatibility with.
267 : The version of x86/svr4 SDB I had does (partially)
268 : seem to believe that DWARF register number 11 is associated with
269 : the x86 register %st(0), but that's about all. Higher DWARF
270 : register numbers don't seem to be associated with anything in
271 : particular, and even for DWARF regno 11, SDB only seemed to under-
272 : stand that it should say that a variable lives in %st(0) (when
273 : asked via an `=' command) if we said it was in DWARF regno 11,
274 : but SDB still printed garbage when asked for the value of the
275 : variable in question (via a `/' command).
276 : (Also note that the labels SDB printed for various FP stack regs
277 : when doing an `x' command were all wrong.)
278 : Note that these problems generally don't affect the native SVR4
279 : C compiler because it doesn't allow the use of -O with -g and
280 : because when it is *not* optimizing, it allocates a memory
281 : location for each floating-point variable, and the memory
282 : location is what gets described in the DWARF AT_location
283 : attribute for the variable in question.
284 : Regardless of the severe mental illness of the x86/svr4 SDB, we
285 : do something sensible here and we use the following DWARF
286 : register numbers. Note that these are all stack-top-relative
287 : numbers.
288 : 11 for %st(0) (gcc regno = 8)
289 : 12 for %st(1) (gcc regno = 9)
290 : 13 for %st(2) (gcc regno = 10)
291 : 14 for %st(3) (gcc regno = 11)
292 : 15 for %st(4) (gcc regno = 12)
293 : 16 for %st(5) (gcc regno = 13)
294 : 17 for %st(6) (gcc regno = 14)
295 : 18 for %st(7) (gcc regno = 15)
296 : */
297 : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298 : {
299 : /* general regs */
300 : 0, 2, 1, 3, 6, 7, 5, 4,
301 : /* fp regs */
302 : 11, 12, 13, 14, 15, 16, 17, 18,
303 : /* arg, flags, fpsr, frame */
304 : IGNORED_DWARF_REGNUM, 9,
305 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 : /* SSE registers */
307 : 21, 22, 23, 24, 25, 26, 27, 28,
308 : /* MMX registers */
309 : 29, 30, 31, 32, 33, 34, 35, 36,
310 : /* extended integer registers */
311 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 : /* extended sse registers */
314 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 : /* AVX-512 registers 16-23 */
317 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 : /* AVX-512 registers 24-31 */
320 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 : /* Mask registers */
323 : 93, 94, 95, 96, 97, 98, 99, 100
324 : };
325 :
326 : /* Define parameter passing and return registers. */
327 :
328 : static int const x86_64_int_parameter_registers[6] =
329 : {
330 : DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331 : };
332 :
333 : static int const x86_64_ms_abi_int_parameter_registers[4] =
334 : {
335 : CX_REG, DX_REG, R8_REG, R9_REG
336 : };
337 :
338 : /* Similar as Clang's preserve_none function parameter passing.
339 : NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p. */
340 :
341 : static int const x86_64_preserve_none_int_parameter_registers[6] =
342 : {
343 : R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
344 : };
345 :
346 : static int const x86_64_int_return_registers[2] =
347 : {
348 : AX_REG, DX_REG
349 : };
350 :
351 : /* Define the structure for the machine field in struct function. */
352 :
353 : struct GTY(()) stack_local_entry {
354 : unsigned short mode;
355 : unsigned short n;
356 : rtx rtl;
357 : struct stack_local_entry *next;
358 : };
359 :
360 : /* Which cpu are we scheduling for. */
361 : enum attr_cpu ix86_schedule;
362 :
363 : /* Which cpu are we optimizing for. */
364 : enum processor_type ix86_tune;
365 :
366 : /* Which instruction set architecture to use. */
367 : enum processor_type ix86_arch;
368 :
369 : /* True if processor has SSE prefetch instruction. */
370 : unsigned char ix86_prefetch_sse;
371 :
372 : /* Preferred alignment for stack boundary in bits. */
373 : unsigned int ix86_preferred_stack_boundary;
374 :
375 : /* Alignment for incoming stack boundary in bits specified at
376 : command line. */
377 : unsigned int ix86_user_incoming_stack_boundary;
378 :
379 : /* Default alignment for incoming stack boundary in bits. */
380 : unsigned int ix86_default_incoming_stack_boundary;
381 :
382 : /* Alignment for incoming stack boundary in bits. */
383 : unsigned int ix86_incoming_stack_boundary;
384 :
385 : /* True if there is no direct access to extern symbols. */
386 : bool ix86_has_no_direct_extern_access;
387 :
388 : /* Calling abi specific va_list type nodes. */
389 : tree sysv_va_list_type_node;
390 : tree ms_va_list_type_node;
391 :
392 : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
393 : char internal_label_prefix[16];
394 : int internal_label_prefix_len;
395 :
396 : /* Fence to use after loop using movnt. */
397 : tree x86_mfence;
398 :
399 : /* Register class used for passing given 64bit part of the argument.
400 : These represent classes as documented by the PS ABI, with the exception
401 : of SSESF, SSEDF classes, that are basically SSE class, just gcc will
402 : use SF or DFmode move instead of DImode to avoid reformatting penalties.
403 :
404 : Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
405 : whenever possible (upper half does contain padding). */
406 : enum x86_64_reg_class
407 : {
408 : X86_64_NO_CLASS,
409 : X86_64_INTEGER_CLASS,
410 : X86_64_INTEGERSI_CLASS,
411 : X86_64_SSE_CLASS,
412 : X86_64_SSEHF_CLASS,
413 : X86_64_SSESF_CLASS,
414 : X86_64_SSEDF_CLASS,
415 : X86_64_SSEUP_CLASS,
416 : X86_64_X87_CLASS,
417 : X86_64_X87UP_CLASS,
418 : X86_64_COMPLEX_X87_CLASS,
419 : X86_64_MEMORY_CLASS
420 : };
421 :
422 : #define MAX_CLASSES 8
423 :
424 : /* Table of constants used by fldpi, fldln2, etc.... */
425 : static REAL_VALUE_TYPE ext_80387_constants_table [5];
426 : static bool ext_80387_constants_init;
427 :
428 :
429 : static rtx ix86_function_value (const_tree, const_tree, bool);
430 : static bool ix86_function_value_regno_p (const unsigned int);
431 : static unsigned int ix86_function_arg_boundary (machine_mode,
432 : const_tree);
433 : static rtx ix86_static_chain (const_tree, bool);
434 : static int ix86_function_regparm (const_tree, const_tree);
435 : static void ix86_compute_frame_layout (void);
436 : static tree ix86_canonical_va_list_type (tree);
437 : static unsigned int split_stack_prologue_scratch_regno (void);
438 : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
439 :
440 : static bool ix86_can_inline_p (tree, tree);
441 : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
442 :
443 : typedef enum ix86_flags_cc
444 : {
445 : X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
446 : X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
447 : X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
448 : X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
449 : } ix86_cc;
450 :
451 : static const char *ix86_ccmp_dfv_mapping[] =
452 : {
453 : "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
454 : "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
455 : "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
456 : "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
457 : };
458 :
459 :
460 : /* Whether -mtune= or -march= were specified */
461 : int ix86_tune_defaulted;
462 : int ix86_arch_specified;
463 :
464 : /* Return true if a red-zone is in use. We can't use red-zone when
465 : there are local indirect jumps, like "indirect_jump" or "tablejump",
466 : which jumps to another place in the function, since "call" in the
467 : indirect thunk pushes the return address onto stack, destroying
468 : red-zone.
469 :
470 : NB: Don't use red-zone for functions with no_caller_saved_registers
471 : and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
472 : for 31 GPRs or 15 GPRs + 16 XMM registers.
473 :
474 : TODO: If we can reserve the first 2 WORDs, for PUSH and, another
475 : for CALL, in red-zone, we can allow local indirect jumps with
476 : indirect thunk. */
477 :
478 : bool
479 9933591 : ix86_using_red_zone (void)
480 : {
481 9933591 : return (TARGET_RED_ZONE
482 8991252 : && !TARGET_64BIT_MS_ABI
483 8688543 : && ((!TARGET_APX_EGPR && !TARGET_SSE)
484 8665538 : || (cfun->machine->call_saved_registers
485 8665538 : != TYPE_NO_CALLER_SAVED_REGISTERS))
486 18622073 : && (!cfun->machine->has_local_indirect_jump
487 47318 : || cfun->machine->indirect_branch_type == indirect_branch_keep));
488 : }
489 :
490 : /* Return true, if profiling code should be emitted before
491 : prologue. Otherwise it returns false.
492 : Note: For x86 with "hotfix" it is sorried. */
493 : static bool
494 4516396 : ix86_profile_before_prologue (void)
495 : {
496 4516396 : return flag_fentry != 0;
497 : }
498 :
499 : /* Update register usage after having seen the compiler flags. */
500 :
501 : static void
502 841208 : ix86_conditional_register_usage (void)
503 : {
504 841208 : int i, c_mask;
505 :
506 : /* If there are no caller-saved registers, preserve all registers.
507 : except fixed_regs and registers used for function return value
508 : since aggregate_value_p checks call_used_regs[regno] on return
509 : value. */
510 841208 : if (cfun
511 69212 : && (cfun->machine->call_saved_registers
512 69212 : == TYPE_NO_CALLER_SAVED_REGISTERS))
513 462489 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
514 457516 : if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
515 422283 : call_used_regs[i] = 0;
516 :
517 : /* For 32-bit targets, disable the REX registers. */
518 841208 : if (! TARGET_64BIT)
519 : {
520 134550 : for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
521 119600 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
522 134550 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
523 119600 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
524 254150 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
525 239200 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
526 : }
527 :
528 : /* See the definition of CALL_USED_REGISTERS in i386.h. */
529 841208 : c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
530 :
531 841208 : CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
532 :
533 78232344 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
534 : {
535 : /* Set/reset conditionally defined registers from
536 : CALL_USED_REGISTERS initializer. */
537 77391136 : if (call_used_regs[i] > 1)
538 13379705 : call_used_regs[i] = !!(call_used_regs[i] & c_mask);
539 :
540 : /* Calculate registers of CLOBBERED_REGS register set
541 : as call used registers from GENERAL_REGS register set. */
542 77391136 : if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
543 77391136 : && call_used_regs[i])
544 23428097 : SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
545 : }
546 :
547 : /* If MMX is disabled, disable the registers. */
548 841208 : if (! TARGET_MMX)
549 400526 : accessible_reg_set &= ~reg_class_contents[MMX_REGS];
550 :
551 : /* If SSE is disabled, disable the registers. */
552 841208 : if (! TARGET_SSE)
553 394552 : accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
554 :
555 : /* If the FPU is disabled, disable the registers. */
556 841208 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
557 395772 : accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
558 :
559 : /* If AVX512F is disabled, disable the registers. */
560 841208 : if (! TARGET_AVX512F)
561 : {
562 10204131 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
563 9603888 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
564 :
565 1200486 : accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
566 : }
567 :
568 : /* If APX is disabled, disable the registers. */
569 841208 : if (! (TARGET_APX_EGPR && TARGET_64BIT))
570 : {
571 14289095 : for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
572 13448560 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
573 : }
574 841208 : }
575 :
576 : /* Canonicalize a comparison from one we don't have to one we do have. */
577 :
578 : static void
579 23954601 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
580 : bool op0_preserve_value)
581 : {
582 : /* The order of operands in x87 ficom compare is forced by combine in
583 : simplify_comparison () function. Float operator is treated as RTX_OBJ
584 : with a precedence over other operators and is always put in the first
585 : place. Swap condition and operands to match ficom instruction. */
586 23954601 : if (!op0_preserve_value
587 23156822 : && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
588 : {
589 14 : enum rtx_code scode = swap_condition ((enum rtx_code) *code);
590 :
591 : /* We are called only for compares that are split to SAHF instruction.
592 : Ensure that we have setcc/jcc insn for the swapped condition. */
593 14 : if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
594 : {
595 6 : std::swap (*op0, *op1);
596 6 : *code = (int) scode;
597 6 : return;
598 : }
599 : }
600 :
601 : /* SUB (a, b) underflows precisely when a < b. Convert
602 : (compare (minus (a b)) a) to (compare (a b))
603 : to match *sub<mode>_3 pattern. */
604 23156816 : if (!op0_preserve_value
605 23156816 : && (*code == GTU || *code == LEU)
606 1791497 : && GET_CODE (*op0) == MINUS
607 77766 : && rtx_equal_p (XEXP (*op0, 0), *op1))
608 : {
609 487 : *op1 = XEXP (*op0, 1);
610 487 : *op0 = XEXP (*op0, 0);
611 487 : *code = (int) swap_condition ((enum rtx_code) *code);
612 487 : return;
613 : }
614 :
615 : /* Swap operands of GTU comparison to canonicalize
616 : addcarry/subborrow comparison. */
617 23954108 : if (!op0_preserve_value
618 23156329 : && *code == GTU
619 820375 : && GET_CODE (*op0) == PLUS
620 322268 : && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
621 44135 : && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
622 23994141 : && GET_CODE (*op1) == ZERO_EXTEND)
623 : {
624 36734 : std::swap (*op0, *op1);
625 36734 : *code = (int) swap_condition ((enum rtx_code) *code);
626 36734 : return;
627 : }
628 : }
629 :
630 : /* Hook to determine if one function can safely inline another. */
631 :
632 : static bool
633 9635341 : ix86_can_inline_p (tree caller, tree callee)
634 : {
635 9635341 : tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
636 9635341 : tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
637 :
638 : /* Changes of those flags can be tolerated for always inlines. Lets hope
639 : user knows what he is doing. */
640 9635341 : unsigned HOST_WIDE_INT always_inline_safe_mask
641 : = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
642 : | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
643 : | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
644 : | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
645 : | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
646 : | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
647 : | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
648 :
649 :
650 9635341 : if (!callee_tree)
651 9038303 : callee_tree = target_option_default_node;
652 9635341 : if (!caller_tree)
653 9038368 : caller_tree = target_option_default_node;
654 9635341 : if (callee_tree == caller_tree)
655 : return true;
656 :
657 5292 : struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
658 5292 : struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
659 5292 : bool ret = false;
660 5292 : bool always_inline
661 5292 : = (DECL_DISREGARD_INLINE_LIMITS (callee)
662 9939 : && lookup_attribute ("always_inline",
663 4647 : DECL_ATTRIBUTES (callee)));
664 :
665 : /* If callee only uses GPRs, ignore MASK_80387. */
666 5292 : if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
667 1030 : always_inline_safe_mask |= MASK_80387;
668 :
669 5292 : cgraph_node *callee_node = cgraph_node::get (callee);
670 : /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
671 : function can inline a SSE2 function but a SSE2 function can't inline
672 : a SSE4 function. */
673 5292 : if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
674 : != callee_opts->x_ix86_isa_flags)
675 5056 : || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
676 : != callee_opts->x_ix86_isa_flags2))
677 : ret = false;
678 :
679 : /* See if we have the same non-isa options. */
680 5019 : else if ((!always_inline
681 388 : && caller_opts->x_target_flags != callee_opts->x_target_flags)
682 4975 : || (caller_opts->x_target_flags & ~always_inline_safe_mask)
683 4975 : != (callee_opts->x_target_flags & ~always_inline_safe_mask))
684 : ret = false;
685 :
686 4975 : else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
687 : /* If the callee doesn't use FP expressions differences in
688 : ix86_fpmath can be ignored. We are called from FEs
689 : for multi-versioning call optimization, so beware of
690 : ipa_fn_summaries not available. */
691 1247 : && (! ipa_fn_summaries
692 1247 : || ipa_fn_summaries->get (callee_node) == NULL
693 1247 : || ipa_fn_summaries->get (callee_node)->fp_expressions))
694 : ret = false;
695 :
696 : /* At this point we cannot identify whether arch or tune setting
697 : comes from target attribute or not. So the most conservative way
698 : is to allow the callee that uses default arch and tune string to
699 : be inlined. */
700 4701 : else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
701 1430 : && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
702 : ret = true;
703 :
704 : /* See if arch, tune, etc. are the same. As previous ISA flags already
705 : checks if callee's ISA is subset of caller's, do not block
706 : always_inline attribute for callee even it has different arch. */
707 3279 : else if (!always_inline && caller_opts->arch != callee_opts->arch)
708 : ret = false;
709 :
710 15 : else if (!always_inline && caller_opts->tune != callee_opts->tune)
711 : ret = false;
712 :
713 3279 : else if (!always_inline
714 15 : && caller_opts->branch_cost != callee_opts->branch_cost)
715 : ret = false;
716 :
717 : else
718 9634750 : ret = true;
719 :
720 : return ret;
721 : }
722 :
723 : /* Return true if this goes in large data/bss. */
724 :
725 : static bool
726 78359778 : ix86_in_large_data_p (tree exp)
727 : {
728 78359778 : if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
729 78359540 : && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
730 : return false;
731 :
732 1147 : if (exp == NULL_TREE)
733 : return false;
734 :
735 : /* Functions are never large data. */
736 1147 : if (TREE_CODE (exp) == FUNCTION_DECL)
737 : return false;
738 :
739 : /* Automatic variables are never large data. */
740 279 : if (VAR_P (exp) && !is_global_var (exp))
741 : return false;
742 :
743 279 : if (VAR_P (exp) && DECL_SECTION_NAME (exp))
744 : {
745 51 : const char *section = DECL_SECTION_NAME (exp);
746 51 : if (strcmp (section, ".ldata") == 0
747 51 : || startswith (section, ".ldata.")
748 51 : || strcmp (section, ".lbss") == 0
749 51 : || startswith (section, ".lbss.")
750 99 : || startswith (section, ".gnu.linkonce.lb."))
751 : return true;
752 : return false;
753 : }
754 : else
755 : {
756 228 : HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
757 :
758 : /* If this is an incomplete type with size 0, then we can't put it
759 : in data because it might be too big when completed. Also,
760 : int_size_in_bytes returns -1 if size can vary or is larger than
761 : an integer in which case also it is safer to assume that it goes in
762 : large data. */
763 228 : if (size <= 0 || size > ix86_section_threshold)
764 : return true;
765 : }
766 :
767 : return false;
768 : }
769 :
770 : /* i386-specific section flag to mark large sections. */
771 : #define SECTION_LARGE SECTION_MACH_DEP
772 :
773 : /* Switch to the appropriate section for output of DECL.
774 : DECL is either a `VAR_DECL' node or a constant of some sort.
775 : RELOC indicates whether forming the initial value of DECL requires
776 : link-time relocations. */
777 :
778 : ATTRIBUTE_UNUSED static section *
779 1667108 : x86_64_elf_select_section (tree decl, int reloc,
780 : unsigned HOST_WIDE_INT align)
781 : {
782 1667108 : if (ix86_in_large_data_p (decl))
783 : {
784 6 : const char *sname = NULL;
785 6 : unsigned int flags = SECTION_WRITE | SECTION_LARGE;
786 6 : switch (categorize_decl_for_section (decl, reloc))
787 : {
788 1 : case SECCAT_DATA:
789 1 : sname = ".ldata";
790 1 : break;
791 0 : case SECCAT_DATA_REL:
792 0 : sname = ".ldata.rel";
793 0 : break;
794 0 : case SECCAT_DATA_REL_LOCAL:
795 0 : sname = ".ldata.rel.local";
796 0 : break;
797 0 : case SECCAT_DATA_REL_RO:
798 0 : sname = ".ldata.rel.ro";
799 0 : break;
800 0 : case SECCAT_DATA_REL_RO_LOCAL:
801 0 : sname = ".ldata.rel.ro.local";
802 0 : break;
803 0 : case SECCAT_BSS:
804 0 : sname = ".lbss";
805 0 : flags |= SECTION_BSS;
806 0 : break;
807 : case SECCAT_RODATA:
808 : case SECCAT_RODATA_MERGE_STR:
809 : case SECCAT_RODATA_MERGE_STR_INIT:
810 : case SECCAT_RODATA_MERGE_CONST:
811 : sname = ".lrodata";
812 : flags &= ~SECTION_WRITE;
813 : break;
814 0 : case SECCAT_SRODATA:
815 0 : case SECCAT_SDATA:
816 0 : case SECCAT_SBSS:
817 0 : gcc_unreachable ();
818 : case SECCAT_TEXT:
819 : case SECCAT_TDATA:
820 : case SECCAT_TBSS:
821 : /* We don't split these for medium model. Place them into
822 : default sections and hope for best. */
823 : break;
824 : }
825 1 : if (sname)
826 : {
827 : /* We might get called with string constants, but get_named_section
828 : doesn't like them as they are not DECLs. Also, we need to set
829 : flags in that case. */
830 6 : if (!DECL_P (decl))
831 3 : return get_section (sname, flags, NULL);
832 3 : return get_named_section (decl, sname, reloc);
833 : }
834 : }
835 1667102 : return default_elf_select_section (decl, reloc, align);
836 : }
837 :
838 : /* Select a set of attributes for section NAME based on the properties
839 : of DECL and whether or not RELOC indicates that DECL's initializer
840 : might contain runtime relocations. */
841 :
842 : static unsigned int ATTRIBUTE_UNUSED
843 64885167 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
844 : {
845 64885167 : unsigned int flags = default_section_type_flags (decl, name, reloc);
846 :
847 64885167 : if (ix86_in_large_data_p (decl))
848 10 : flags |= SECTION_LARGE;
849 :
850 64885167 : if (decl == NULL_TREE
851 375 : && (strcmp (name, ".ldata.rel.ro") == 0
852 375 : || strcmp (name, ".ldata.rel.ro.local") == 0))
853 0 : flags |= SECTION_RELRO;
854 :
855 64885167 : if (strcmp (name, ".lbss") == 0
856 64885163 : || startswith (name, ".lbss.")
857 129770327 : || startswith (name, ".gnu.linkonce.lb."))
858 : {
859 7 : flags |= SECTION_BSS;
860 : /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
861 : default_elf_asm_named_section. */
862 7 : flags &= ~SECTION_NOTYPE;
863 : }
864 :
865 64885167 : return flags;
866 : }
867 :
868 : /* Build up a unique section name, expressed as a
869 : STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
870 : RELOC indicates whether the initial value of EXP requires
871 : link-time relocations. */
872 :
873 : static void ATTRIBUTE_UNUSED
874 1799939 : x86_64_elf_unique_section (tree decl, int reloc)
875 : {
876 1799939 : if (ix86_in_large_data_p (decl))
877 : {
878 3 : const char *prefix = NULL;
879 : /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
880 3 : bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
881 :
882 3 : switch (categorize_decl_for_section (decl, reloc))
883 : {
884 0 : case SECCAT_DATA:
885 0 : case SECCAT_DATA_REL:
886 0 : case SECCAT_DATA_REL_LOCAL:
887 0 : case SECCAT_DATA_REL_RO:
888 0 : case SECCAT_DATA_REL_RO_LOCAL:
889 0 : prefix = one_only ? ".ld" : ".ldata";
890 : break;
891 3 : case SECCAT_BSS:
892 3 : prefix = one_only ? ".lb" : ".lbss";
893 : break;
894 : case SECCAT_RODATA:
895 : case SECCAT_RODATA_MERGE_STR:
896 : case SECCAT_RODATA_MERGE_STR_INIT:
897 : case SECCAT_RODATA_MERGE_CONST:
898 : prefix = one_only ? ".lr" : ".lrodata";
899 : break;
900 0 : case SECCAT_SRODATA:
901 0 : case SECCAT_SDATA:
902 0 : case SECCAT_SBSS:
903 0 : gcc_unreachable ();
904 : case SECCAT_TEXT:
905 : case SECCAT_TDATA:
906 : case SECCAT_TBSS:
907 : /* We don't split these for medium model. Place them into
908 : default sections and hope for best. */
909 : break;
910 : }
911 3 : if (prefix)
912 : {
913 3 : const char *name, *linkonce;
914 3 : char *string;
915 :
916 3 : name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
917 3 : name = targetm.strip_name_encoding (name);
918 :
919 : /* If we're using one_only, then there needs to be a .gnu.linkonce
920 : prefix to the section name. */
921 3 : linkonce = one_only ? ".gnu.linkonce" : "";
922 :
923 3 : string = ACONCAT ((linkonce, prefix, ".", name, NULL));
924 :
925 3 : set_decl_section_name (decl, string);
926 3 : return;
927 : }
928 : }
929 1799936 : default_unique_section (decl, reloc);
930 : }
931 :
932 : /* Return true if TYPE has no_callee_saved_registers or preserve_none
933 : attribute. */
934 :
935 : bool
936 7551079 : ix86_type_no_callee_saved_registers_p (const_tree type)
937 : {
938 15102158 : return (lookup_attribute ("no_callee_saved_registers",
939 7551079 : TYPE_ATTRIBUTES (type)) != NULL
940 15102025 : || lookup_attribute ("preserve_none",
941 7550946 : TYPE_ATTRIBUTES (type)) != NULL);
942 : }
943 :
944 : #ifdef COMMON_ASM_OP
945 :
946 : #ifndef LARGECOMM_SECTION_ASM_OP
947 : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
948 : #endif
949 :
950 : /* This says how to output assembler code to declare an
951 : uninitialized external linkage data object.
952 :
953 : For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
954 : large objects. */
955 : void
956 172936 : x86_elf_aligned_decl_common (FILE *file, tree decl,
957 : const char *name, unsigned HOST_WIDE_INT size,
958 : unsigned align)
959 : {
960 172936 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
961 172930 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
962 7 : && size > (unsigned int)ix86_section_threshold)
963 : {
964 1 : switch_to_section (get_named_section (decl, ".lbss", 0));
965 1 : fputs (LARGECOMM_SECTION_ASM_OP, file);
966 : }
967 : else
968 172935 : fputs (COMMON_ASM_OP, file);
969 172936 : assemble_name (file, name);
970 172936 : fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
971 : size, align / BITS_PER_UNIT);
972 172936 : }
973 : #endif
974 :
975 : /* Utility function for targets to use in implementing
976 : ASM_OUTPUT_ALIGNED_BSS. */
977 :
978 : void
979 769881 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
980 : unsigned HOST_WIDE_INT size, unsigned align)
981 : {
982 769881 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
983 769871 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
984 42 : && size > (unsigned int)ix86_section_threshold)
985 3 : switch_to_section (get_named_section (decl, ".lbss", 0));
986 : else
987 769878 : switch_to_section (bss_section);
988 925781 : ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
989 : #ifdef ASM_DECLARE_OBJECT_NAME
990 769881 : last_assemble_variable_decl = decl;
991 769881 : ASM_DECLARE_OBJECT_NAME (file, name, decl);
992 : #else
993 : /* Standard thing is just output label for the object. */
994 : ASM_OUTPUT_LABEL (file, name);
995 : #endif /* ASM_DECLARE_OBJECT_NAME */
996 769881 : ASM_OUTPUT_SKIP (file, size ? size : 1);
997 769881 : }
998 :
999 : /* Decide whether we must probe the stack before any space allocation
1000 : on this target. It's essentially TARGET_STACK_PROBE except when
1001 : -fstack-check causes the stack to be already probed differently. */
1002 :
1003 : bool
1004 871471 : ix86_target_stack_probe (void)
1005 : {
1006 : /* Do not probe the stack twice if static stack checking is enabled. */
1007 871471 : if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
1008 : return false;
1009 :
1010 871471 : return TARGET_STACK_PROBE;
1011 : }
1012 :
1013 : /* Decide whether we can make a sibling call to a function. DECL is the
1014 : declaration of the function being targeted by the call and EXP is the
1015 : CALL_EXPR representing the call. */
1016 :
1017 : static bool
1018 135517 : ix86_function_ok_for_sibcall (tree decl, tree exp)
1019 : {
1020 135517 : tree type, decl_or_type;
1021 135517 : rtx a, b;
1022 135517 : bool bind_global = decl && !targetm.binds_local_p (decl);
1023 :
1024 135517 : if (ix86_function_naked (current_function_decl))
1025 : return false;
1026 :
1027 : /* Sibling call isn't OK if there are no caller-saved registers
1028 : since all registers must be preserved before return. */
1029 135515 : if (cfun->machine->call_saved_registers
1030 135515 : == TYPE_NO_CALLER_SAVED_REGISTERS)
1031 : return false;
1032 :
1033 : /* If we are generating position-independent code, we cannot sibcall
1034 : optimize direct calls to global functions, as the PLT requires
1035 : %ebx be live. (Darwin does not have a PLT.) */
1036 135486 : if (!TARGET_MACHO
1037 135486 : && !TARGET_64BIT
1038 11331 : && flag_pic
1039 8405 : && flag_plt
1040 8405 : && bind_global)
1041 : return false;
1042 :
1043 : /* If we need to align the outgoing stack, then sibcalling would
1044 : unalign the stack, which may break the called function. */
1045 130839 : if (ix86_minimum_incoming_stack_boundary (true)
1046 130839 : < PREFERRED_STACK_BOUNDARY)
1047 : return false;
1048 :
1049 130258 : if (decl)
1050 : {
1051 119385 : decl_or_type = decl;
1052 119385 : type = TREE_TYPE (decl);
1053 : }
1054 : else
1055 : {
1056 : /* We're looking at the CALL_EXPR, we need the type of the function. */
1057 10873 : type = CALL_EXPR_FN (exp); /* pointer expression */
1058 10873 : type = TREE_TYPE (type); /* pointer type */
1059 10873 : type = TREE_TYPE (type); /* function type */
1060 10873 : decl_or_type = type;
1061 : }
1062 :
1063 : /* Sibling call isn't OK if callee has no callee-saved registers
1064 : and the calling function has callee-saved registers. */
1065 130258 : if ((cfun->machine->call_saved_registers
1066 130258 : != TYPE_NO_CALLEE_SAVED_REGISTERS)
1067 130258 : && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
1068 130258 : && ix86_type_no_callee_saved_registers_p (type))
1069 : return false;
1070 :
1071 : /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1072 130242 : if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1073 130242 : != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1074 259738 : || (REG_PARM_STACK_SPACE (decl_or_type)
1075 129496 : != REG_PARM_STACK_SPACE (current_function_decl)))
1076 : {
1077 746 : maybe_complain_about_tail_call (exp,
1078 : "inconsistent size of stack space"
1079 : " allocated for arguments which are"
1080 : " passed in registers");
1081 746 : return false;
1082 : }
1083 :
1084 : /* Check that the return value locations are the same. Like
1085 : if we are returning floats on the 80387 register stack, we cannot
1086 : make a sibcall from a function that doesn't return a float to a
1087 : function that does or, conversely, from a function that does return
1088 : a float to a function that doesn't; the necessary stack adjustment
1089 : would not be executed. This is also the place we notice
1090 : differences in the return value ABI. Note that it is ok for one
1091 : of the functions to have void return type as long as the return
1092 : value of the other is passed in a register. */
1093 129496 : a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1094 129496 : b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1095 129496 : cfun->decl, false);
1096 129496 : if (STACK_REG_P (a) || STACK_REG_P (b))
1097 : {
1098 1020 : if (!rtx_equal_p (a, b))
1099 : return false;
1100 : }
1101 128476 : else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1102 : ;
1103 23786 : else if (!rtx_equal_p (a, b))
1104 : return false;
1105 :
1106 129115 : if (TARGET_64BIT)
1107 : {
1108 : /* The SYSV ABI has more call-clobbered registers;
1109 : disallow sibcalls from MS to SYSV. */
1110 122431 : if (cfun->machine->call_abi == MS_ABI
1111 122431 : && ix86_function_type_abi (type) == SYSV_ABI)
1112 : return false;
1113 : }
1114 : else
1115 : {
1116 : /* If this call is indirect, we'll need to be able to use a
1117 : call-clobbered register for the address of the target function.
1118 : Make sure that all such registers are not used for passing
1119 : parameters. Note that DLLIMPORT functions and call to global
1120 : function via GOT slot are indirect. */
1121 6684 : if (!decl
1122 4770 : || (bind_global && flag_pic && !flag_plt)
1123 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1124 4770 : || flag_force_indirect_call)
1125 : {
1126 : /* Check if regparm >= 3 since arg_reg_available is set to
1127 : false if regparm == 0. If regparm is 1 or 2, there is
1128 : always a call-clobbered register available.
1129 :
1130 : ??? The symbol indirect call doesn't need a call-clobbered
1131 : register. But we don't know if this is a symbol indirect
1132 : call or not here. */
1133 1914 : if (ix86_function_regparm (type, decl) >= 3
1134 1914 : && !cfun->machine->arg_reg_available)
1135 : return false;
1136 : }
1137 : }
1138 :
1139 129115 : if (decl && ix86_use_pseudo_pic_reg ())
1140 : {
1141 : /* When PIC register is used, it must be restored after ifunc
1142 : function returns. */
1143 2059 : cgraph_node *node = cgraph_node::get (decl);
1144 2059 : if (node && node->ifunc_resolver)
1145 : return false;
1146 : }
1147 :
1148 : /* Disable sibcall if callee has indirect_return attribute and
1149 : caller doesn't since callee will return to the caller's caller
1150 : via an indirect jump. */
1151 129115 : if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1152 : == (CF_RETURN | CF_BRANCH))
1153 50032 : && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1154 129119 : && !lookup_attribute ("indirect_return",
1155 4 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1156 : return false;
1157 :
1158 : /* Otherwise okay. That also includes certain types of indirect calls. */
1159 : return true;
1160 : }
1161 :
1162 : /* This function determines from TYPE the calling-convention. */
1163 :
1164 : unsigned int
1165 6280580 : ix86_get_callcvt (const_tree type)
1166 : {
1167 6280580 : unsigned int ret = 0;
1168 6280580 : bool is_stdarg;
1169 6280580 : tree attrs;
1170 :
1171 6280580 : if (TARGET_64BIT)
1172 : return IX86_CALLCVT_CDECL;
1173 :
1174 3270782 : attrs = TYPE_ATTRIBUTES (type);
1175 3270782 : if (attrs != NULL_TREE)
1176 : {
1177 67754 : if (lookup_attribute ("cdecl", attrs))
1178 : ret |= IX86_CALLCVT_CDECL;
1179 67754 : else if (lookup_attribute ("stdcall", attrs))
1180 : ret |= IX86_CALLCVT_STDCALL;
1181 67754 : else if (lookup_attribute ("fastcall", attrs))
1182 : ret |= IX86_CALLCVT_FASTCALL;
1183 67745 : else if (lookup_attribute ("thiscall", attrs))
1184 : ret |= IX86_CALLCVT_THISCALL;
1185 :
1186 : /* Regparm isn't allowed for thiscall and fastcall. */
1187 : if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1188 : {
1189 67745 : if (lookup_attribute ("regparm", attrs))
1190 15834 : ret |= IX86_CALLCVT_REGPARM;
1191 67745 : if (lookup_attribute ("sseregparm", attrs))
1192 0 : ret |= IX86_CALLCVT_SSEREGPARM;
1193 : }
1194 :
1195 67754 : if (IX86_BASE_CALLCVT(ret) != 0)
1196 9 : return ret;
1197 : }
1198 :
1199 3270773 : is_stdarg = stdarg_p (type);
1200 3270773 : if (TARGET_RTD && !is_stdarg)
1201 0 : return IX86_CALLCVT_STDCALL | ret;
1202 :
1203 3270773 : if (ret != 0
1204 3270773 : || is_stdarg
1205 3245909 : || TREE_CODE (type) != METHOD_TYPE
1206 3407406 : || ix86_function_type_abi (type) != MS_ABI)
1207 3270773 : return IX86_CALLCVT_CDECL | ret;
1208 :
1209 : return IX86_CALLCVT_THISCALL;
1210 : }
1211 :
1212 : /* Return 0 if the attributes for two types are incompatible, 1 if they
1213 : are compatible, and 2 if they are nearly compatible (which causes a
1214 : warning to be generated). */
1215 :
1216 : static int
1217 1522424 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
1218 : {
1219 1522424 : unsigned int ccvt1, ccvt2;
1220 :
1221 1522424 : if (TREE_CODE (type1) != FUNCTION_TYPE
1222 1522424 : && TREE_CODE (type1) != METHOD_TYPE)
1223 : return 1;
1224 :
1225 1516029 : ccvt1 = ix86_get_callcvt (type1);
1226 1516029 : ccvt2 = ix86_get_callcvt (type2);
1227 1516029 : if (ccvt1 != ccvt2)
1228 : return 0;
1229 3009918 : if (ix86_function_regparm (type1, NULL)
1230 1504959 : != ix86_function_regparm (type2, NULL))
1231 : return 0;
1232 :
1233 1467164 : if (ix86_type_no_callee_saved_registers_p (type1)
1234 733582 : != ix86_type_no_callee_saved_registers_p (type2))
1235 : return 0;
1236 :
1237 : /* preserve_none attribute uses a different calling convention is
1238 : only for 64-bit. */
1239 733454 : if (TARGET_64BIT
1240 1466848 : && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
1241 733394 : != lookup_attribute ("preserve_none",
1242 733394 : TYPE_ATTRIBUTES (type2))))
1243 : return 0;
1244 :
1245 : return 1;
1246 : }
1247 :
1248 : /* Return the regparm value for a function with the indicated TYPE and DECL.
1249 : DECL may be NULL when calling function indirectly
1250 : or considering a libcall. */
1251 :
1252 : static int
1253 4281768 : ix86_function_regparm (const_tree type, const_tree decl)
1254 : {
1255 4281768 : tree attr;
1256 4281768 : int regparm;
1257 4281768 : unsigned int ccvt;
1258 :
1259 4281768 : if (TARGET_64BIT)
1260 3009798 : return (ix86_function_type_abi (type) == SYSV_ABI
1261 3009798 : ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1262 1271970 : ccvt = ix86_get_callcvt (type);
1263 1271970 : regparm = ix86_regparm;
1264 :
1265 1271970 : if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1266 : {
1267 2020 : attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1268 2020 : if (attr)
1269 : {
1270 2020 : regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1271 2020 : return regparm;
1272 : }
1273 : }
1274 1269950 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1275 : return 2;
1276 1269950 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1277 : return 1;
1278 :
1279 : /* Use register calling convention for local functions when possible. */
1280 1269950 : if (decl
1281 1205654 : && TREE_CODE (decl) == FUNCTION_DECL)
1282 : {
1283 1195571 : cgraph_node *target = cgraph_node::get (decl);
1284 1195571 : if (target)
1285 1188065 : target = target->function_symbol ();
1286 :
1287 : /* Caller and callee must agree on the calling convention, so
1288 : checking here just optimize means that with
1289 : __attribute__((optimize (...))) caller could use regparm convention
1290 : and callee not, or vice versa. Instead look at whether the callee
1291 : is optimized or not. */
1292 1188065 : if (target && opt_for_fn (target->decl, optimize)
1293 2375238 : && !(profile_flag && !flag_fentry))
1294 : {
1295 1187173 : if (target->local && target->can_change_signature)
1296 : {
1297 140264 : int local_regparm, globals = 0, regno;
1298 :
1299 : /* Make sure no regparm register is taken by a
1300 : fixed register variable. */
1301 140264 : for (local_regparm = 0; local_regparm < REGPARM_MAX;
1302 : local_regparm++)
1303 105198 : if (fixed_regs[local_regparm])
1304 : break;
1305 :
1306 : /* We don't want to use regparm(3) for nested functions as
1307 : these use a static chain pointer in the third argument. */
1308 35066 : if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1309 : local_regparm = 2;
1310 :
1311 : /* Save a register for the split stack. */
1312 35066 : if (flag_split_stack)
1313 : {
1314 20696 : if (local_regparm == 3)
1315 : local_regparm = 2;
1316 707 : else if (local_regparm == 2
1317 707 : && DECL_STATIC_CHAIN (target->decl))
1318 : local_regparm = 1;
1319 : }
1320 :
1321 : /* Each fixed register usage increases register pressure,
1322 : so less registers should be used for argument passing.
1323 : This functionality can be overridden by an explicit
1324 : regparm value. */
1325 245462 : for (regno = AX_REG; regno <= DI_REG; regno++)
1326 210396 : if (fixed_regs[regno])
1327 0 : globals++;
1328 :
1329 35066 : local_regparm
1330 35066 : = globals < local_regparm ? local_regparm - globals : 0;
1331 :
1332 35066 : if (local_regparm > regparm)
1333 4281768 : regparm = local_regparm;
1334 : }
1335 : }
1336 : }
1337 :
1338 : return regparm;
1339 : }
1340 :
1341 : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1342 : DFmode (2) arguments in SSE registers for a function with the
1343 : indicated TYPE and DECL. DECL may be NULL when calling function
1344 : indirectly or considering a libcall. Return -1 if any FP parameter
1345 : should be rejected by error. This is used in siutation we imply SSE
1346 : calling convention but the function is called from another function with
1347 : SSE disabled. Otherwise return 0. */
1348 :
1349 : static int
1350 1077632 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1351 : {
1352 1077632 : gcc_assert (!TARGET_64BIT);
1353 :
1354 : /* Use SSE registers to pass SFmode and DFmode arguments if requested
1355 : by the sseregparm attribute. */
1356 1077632 : if (TARGET_SSEREGPARM
1357 1077632 : || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1358 : {
1359 0 : if (!TARGET_SSE)
1360 : {
1361 0 : if (warn)
1362 : {
1363 0 : if (decl)
1364 0 : error ("calling %qD with attribute sseregparm without "
1365 : "SSE/SSE2 enabled", decl);
1366 : else
1367 0 : error ("calling %qT with attribute sseregparm without "
1368 : "SSE/SSE2 enabled", type);
1369 : }
1370 0 : return 0;
1371 : }
1372 :
1373 : return 2;
1374 : }
1375 :
1376 1077632 : if (!decl)
1377 : return 0;
1378 :
1379 979041 : cgraph_node *target = cgraph_node::get (decl);
1380 979041 : if (target)
1381 971542 : target = target->function_symbol ();
1382 :
1383 : /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1384 : (and DFmode for SSE2) arguments in SSE registers. */
1385 971542 : if (target
1386 : /* TARGET_SSE_MATH */
1387 971542 : && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1388 1296 : && opt_for_fn (target->decl, optimize)
1389 972838 : && !(profile_flag && !flag_fentry))
1390 : {
1391 1296 : if (target->local && target->can_change_signature)
1392 : {
1393 : /* Refuse to produce wrong code when local function with SSE enabled
1394 : is called from SSE disabled function.
1395 : FIXME: We need a way to detect these cases cross-ltrans partition
1396 : and avoid using SSE calling conventions on local functions called
1397 : from function with SSE disabled. For now at least delay the
1398 : warning until we know we are going to produce wrong code.
1399 : See PR66047 */
1400 0 : if (!TARGET_SSE && warn)
1401 : return -1;
1402 0 : return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1403 0 : ->x_ix86_isa_flags) ? 2 : 1;
1404 : }
1405 : }
1406 :
1407 : return 0;
1408 : }
1409 :
1410 : /* Return true if EAX is live at the start of the function. Used by
1411 : ix86_expand_prologue to determine if we need special help before
1412 : calling allocate_stack_worker. */
1413 :
1414 : static bool
1415 7090 : ix86_eax_live_at_start_p (void)
1416 : {
1417 : /* Cheat. Don't bother working forward from ix86_function_regparm
1418 : to the function type to whether an actual argument is located in
1419 : eax. Instead just look at cfg info, which is still close enough
1420 : to correct at this point. This gives false positives for broken
1421 : functions that might use uninitialized data that happens to be
1422 : allocated in eax, but who cares? */
1423 7090 : return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1424 : }
1425 :
1426 : static bool
1427 159978 : ix86_keep_aggregate_return_pointer (tree fntype)
1428 : {
1429 159978 : tree attr;
1430 :
1431 159978 : if (!TARGET_64BIT)
1432 : {
1433 159978 : attr = lookup_attribute ("callee_pop_aggregate_return",
1434 159978 : TYPE_ATTRIBUTES (fntype));
1435 159978 : if (attr)
1436 0 : return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1437 :
1438 : /* For 32-bit MS-ABI the default is to keep aggregate
1439 : return pointer. */
1440 159978 : if (ix86_function_type_abi (fntype) == MS_ABI)
1441 : return true;
1442 : }
1443 : return KEEP_AGGREGATE_RETURN_POINTER != 0;
1444 : }
1445 :
1446 : /* Value is the number of bytes of arguments automatically
1447 : popped when returning from a subroutine call.
1448 : FUNDECL is the declaration node of the function (as a tree),
1449 : FUNTYPE is the data type of the function (as a tree),
1450 : or for a library call it is an identifier node for the subroutine name.
1451 : SIZE is the number of bytes of arguments passed on the stack.
1452 :
1453 : On the 80386, the RTD insn may be used to pop them if the number
1454 : of args is fixed, but if the number is variable then the caller
1455 : must pop them all. RTD can't be used for library calls now
1456 : because the library is compiled with the Unix compiler.
1457 : Use of RTD is a selectable option, since it is incompatible with
1458 : standard Unix calling sequences. If the option is not selected,
1459 : the caller must always pop the args.
1460 :
1461 : The attribute stdcall is equivalent to RTD on a per module basis. */
1462 :
1463 : static poly_int64
1464 7568092 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1465 : {
1466 7568092 : unsigned int ccvt;
1467 :
1468 : /* None of the 64-bit ABIs pop arguments. */
1469 7568092 : if (TARGET_64BIT)
1470 6694480 : return 0;
1471 :
1472 873612 : ccvt = ix86_get_callcvt (funtype);
1473 :
1474 873612 : if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1475 : | IX86_CALLCVT_THISCALL)) != 0
1476 873612 : && ! stdarg_p (funtype))
1477 3 : return size;
1478 :
1479 : /* Lose any fake structure return argument if it is passed on the stack. */
1480 873609 : if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1481 873609 : && !ix86_keep_aggregate_return_pointer (funtype))
1482 : {
1483 159978 : int nregs = ix86_function_regparm (funtype, fundecl);
1484 159978 : if (nregs == 0)
1485 459066 : return GET_MODE_SIZE (Pmode);
1486 : }
1487 :
1488 720587 : return 0;
1489 : }
1490 :
1491 : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1492 :
1493 : static bool
1494 9980433 : ix86_legitimate_combined_insn (rtx_insn *insn)
1495 : {
1496 9980433 : int i;
1497 :
1498 : /* Check operand constraints in case hard registers were propagated
1499 : into insn pattern. This check prevents combine pass from
1500 : generating insn patterns with invalid hard register operands.
1501 : These invalid insns can eventually confuse reload to error out
1502 : with a spill failure. See also PRs 46829 and 46843. */
1503 :
1504 9980433 : gcc_assert (INSN_CODE (insn) >= 0);
1505 :
1506 9980433 : extract_insn (insn);
1507 9980433 : preprocess_constraints (insn);
1508 :
1509 9980433 : int n_operands = recog_data.n_operands;
1510 9980433 : int n_alternatives = recog_data.n_alternatives;
1511 34138056 : for (i = 0; i < n_operands; i++)
1512 : {
1513 24161138 : rtx op = recog_data.operand[i];
1514 24161138 : machine_mode mode = GET_MODE (op);
1515 24161138 : const operand_alternative *op_alt;
1516 24161138 : int offset = 0;
1517 24161138 : bool win;
1518 24161138 : int j;
1519 :
1520 : /* A unary operator may be accepted by the predicate, but it
1521 : is irrelevant for matching constraints. */
1522 24161138 : if (UNARY_P (op))
1523 48422 : op = XEXP (op, 0);
1524 :
1525 24161138 : if (SUBREG_P (op))
1526 : {
1527 875049 : if (REG_P (SUBREG_REG (op))
1528 875049 : && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1529 54 : offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1530 54 : GET_MODE (SUBREG_REG (op)),
1531 54 : SUBREG_BYTE (op),
1532 54 : GET_MODE (op));
1533 875049 : op = SUBREG_REG (op);
1534 : }
1535 :
1536 24161138 : if (!(REG_P (op) && HARD_REGISTER_P (op)))
1537 23863146 : continue;
1538 :
1539 297992 : op_alt = recog_op_alt;
1540 :
1541 : /* Operand has no constraints, anything is OK. */
1542 297992 : win = !n_alternatives;
1543 :
1544 297992 : alternative_mask preferred = get_preferred_alternatives (insn);
1545 823977 : for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1546 : {
1547 522389 : if (!TEST_BIT (preferred, j))
1548 140690 : continue;
1549 381699 : if (op_alt[i].anything_ok
1550 203051 : || (op_alt[i].matches != -1
1551 33735 : && operands_match_p
1552 33735 : (recog_data.operand[i],
1553 33735 : recog_data.operand[op_alt[i].matches]))
1554 580627 : || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1555 : {
1556 : win = true;
1557 : break;
1558 : }
1559 : }
1560 :
1561 297992 : if (!win)
1562 : return false;
1563 : }
1564 :
1565 : return true;
1566 : }
1567 :
1568 : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1569 :
1570 : static unsigned HOST_WIDE_INT
1571 4814 : ix86_asan_shadow_offset (void)
1572 : {
1573 4814 : return SUBTARGET_SHADOW_OFFSET;
1574 : }
1575 :
1576 : /* Argument support functions. */
1577 :
1578 : /* Return true when register may be used to pass function parameters. */
1579 : bool
1580 1483838402 : ix86_function_arg_regno_p (int regno)
1581 : {
1582 1483838402 : int i;
1583 1483838402 : enum calling_abi call_abi;
1584 1483838402 : const int *parm_regs;
1585 :
1586 1480391343 : if (TARGET_SSE && SSE_REGNO_P (regno)
1587 2454078416 : && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1588 : return true;
1589 :
1590 1364010278 : if (!TARGET_64BIT)
1591 129246272 : return (regno < REGPARM_MAX
1592 129246272 : || (TARGET_MMX && MMX_REGNO_P (regno)
1593 11616272 : && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1594 :
1595 : /* TODO: The function should depend on current function ABI but
1596 : builtins.cc would need updating then. Therefore we use the
1597 : default ABI. */
1598 1234764006 : call_abi = ix86_cfun_abi ();
1599 :
1600 : /* RAX is used as hidden argument to va_arg functions. */
1601 1234764006 : if (call_abi == SYSV_ABI && regno == AX_REG)
1602 : return true;
1603 :
1604 1220494877 : if (cfun
1605 1220494545 : && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
1606 : parm_regs = x86_64_preserve_none_int_parameter_registers;
1607 1220475953 : else if (call_abi == MS_ABI)
1608 : parm_regs = x86_64_ms_abi_int_parameter_registers;
1609 : else
1610 1184480401 : parm_regs = x86_64_int_parameter_registers;
1611 :
1612 16334981790 : for (i = 0; i < (call_abi == MS_ABI
1613 8167490895 : ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1614 7034293727 : if (regno == parm_regs[i])
1615 : return true;
1616 : return false;
1617 : }
1618 :
1619 : /* Return if we do not know how to pass ARG solely in registers. */
1620 :
1621 : static bool
1622 403722770 : ix86_must_pass_in_stack (const function_arg_info &arg)
1623 : {
1624 403722770 : if (must_pass_in_stack_var_size_or_pad (arg))
1625 : return true;
1626 :
1627 : /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1628 : The layout_type routine is crafty and tries to trick us into passing
1629 : currently unsupported vector types on the stack by using TImode. */
1630 1772560 : return (!TARGET_64BIT && arg.mode == TImode
1631 403722733 : && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1632 : }
1633 :
1634 : /* It returns the size, in bytes, of the area reserved for arguments passed
1635 : in registers for the function represented by fndecl dependent to the used
1636 : abi format. */
1637 : int
1638 10654422 : ix86_reg_parm_stack_space (const_tree fndecl)
1639 : {
1640 10654422 : enum calling_abi call_abi = SYSV_ABI;
1641 10654422 : if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1642 10344022 : call_abi = ix86_function_abi (fndecl);
1643 : else
1644 310400 : call_abi = ix86_function_type_abi (fndecl);
1645 10654422 : if (TARGET_64BIT && call_abi == MS_ABI)
1646 119312 : return 32;
1647 : return 0;
1648 : }
1649 :
1650 : /* We add this as a workaround in order to use libc_has_function
1651 : hook in i386.md. */
1652 : bool
1653 0 : ix86_libc_has_function (enum function_class fn_class)
1654 : {
1655 0 : return targetm.libc_has_function (fn_class, NULL_TREE);
1656 : }
1657 :
1658 : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1659 : specifying the call abi used. */
1660 : enum calling_abi
1661 439153382 : ix86_function_type_abi (const_tree fntype)
1662 : {
1663 439153382 : enum calling_abi abi = ix86_abi;
1664 :
1665 439153382 : if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1666 : return abi;
1667 :
1668 17527157 : if (abi == SYSV_ABI
1669 17527157 : && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1670 : {
1671 2628110 : static int warned;
1672 2628110 : if (TARGET_X32 && !warned)
1673 : {
1674 1 : error ("X32 does not support %<ms_abi%> attribute");
1675 1 : warned = 1;
1676 : }
1677 :
1678 : abi = MS_ABI;
1679 : }
1680 14899047 : else if (abi == MS_ABI
1681 14899047 : && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1682 : abi = SYSV_ABI;
1683 :
1684 : return abi;
1685 : }
1686 :
1687 : enum calling_abi
1688 217225685 : ix86_function_abi (const_tree fndecl)
1689 : {
1690 217225685 : return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1691 : }
1692 :
1693 : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1694 : specifying the call abi used. */
1695 : enum calling_abi
1696 2086117287 : ix86_cfun_abi (void)
1697 : {
1698 2086117287 : return cfun ? cfun->machine->call_abi : ix86_abi;
1699 : }
1700 :
1701 : bool
1702 5015351 : ix86_function_ms_hook_prologue (const_tree fn)
1703 : {
1704 5015351 : if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1705 : {
1706 8 : if (decl_function_context (fn) != NULL_TREE)
1707 0 : error_at (DECL_SOURCE_LOCATION (fn),
1708 : "%<ms_hook_prologue%> attribute is not compatible "
1709 : "with nested function");
1710 : else
1711 : return true;
1712 : }
1713 : return false;
1714 : }
1715 :
1716 : bool
1717 115320726 : ix86_function_naked (const_tree fn)
1718 : {
1719 115320726 : if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1720 : return true;
1721 :
1722 : return false;
1723 : }
1724 :
1725 : /* Write the extra assembler code needed to declare a function properly. */
1726 :
1727 : void
1728 1557440 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
1729 : tree decl)
1730 : {
1731 1557440 : bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1732 :
1733 1557440 : if (cfun)
1734 1553834 : cfun->machine->function_label_emitted = true;
1735 :
1736 1557440 : if (is_ms_hook)
1737 : {
1738 2 : int i, filler_count = (TARGET_64BIT ? 32 : 16);
1739 2 : unsigned int filler_cc = 0xcccccccc;
1740 :
1741 18 : for (i = 0; i < filler_count; i += 4)
1742 16 : fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1743 : }
1744 :
1745 : #ifdef SUBTARGET_ASM_UNWIND_INIT
1746 : SUBTARGET_ASM_UNWIND_INIT (out_file);
1747 : #endif
1748 :
1749 1557440 : assemble_function_label_raw (out_file, fname);
1750 :
1751 : /* Output magic byte marker, if hot-patch attribute is set. */
1752 1557440 : if (is_ms_hook)
1753 : {
1754 2 : if (TARGET_64BIT)
1755 : {
1756 : /* leaq [%rsp + 0], %rsp */
1757 2 : fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1758 : out_file);
1759 : }
1760 : else
1761 : {
1762 : /* movl.s %edi, %edi
1763 : push %ebp
1764 : movl.s %esp, %ebp */
1765 0 : fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1766 : }
1767 : }
1768 1557440 : }
1769 :
1770 : /* Output a user-defined label. In AT&T syntax, registers are prefixed
1771 : with %, so labels require no punctuation. In Intel syntax, registers
1772 : are unprefixed, so labels may clash with registers or other operators,
1773 : and require quoting. */
1774 : void
1775 35028410 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1776 : {
1777 35028410 : if (ASSEMBLER_DIALECT == ASM_ATT)
1778 35027309 : fprintf (file, "%s%s", prefix, label);
1779 : else
1780 1101 : fprintf (file, "\"%s%s\"", prefix, label);
1781 35028410 : }
1782 :
1783 : /* Implementation of call abi switching target hook. Specific to FNDECL
1784 : the specific call register sets are set. See also
1785 : ix86_conditional_register_usage for more details. */
1786 : void
1787 196794778 : ix86_call_abi_override (const_tree fndecl)
1788 : {
1789 196794778 : cfun->machine->call_abi = ix86_function_abi (fndecl);
1790 196794778 : }
1791 :
1792 : /* Return 1 if pseudo register should be created and used to hold
1793 : GOT address for PIC code. */
1794 : bool
1795 170527898 : ix86_use_pseudo_pic_reg (void)
1796 : {
1797 170527898 : if ((TARGET_64BIT
1798 159479963 : && (ix86_cmodel == CM_SMALL_PIC
1799 : || TARGET_PECOFF))
1800 164845732 : || !flag_pic)
1801 165729926 : return false;
1802 : return true;
1803 : }
1804 :
1805 : /* Initialize large model PIC register. */
1806 :
1807 : static void
1808 56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
1809 : {
1810 56 : rtx_code_label *label;
1811 56 : rtx tmp_reg;
1812 :
1813 56 : gcc_assert (Pmode == DImode);
1814 56 : label = gen_label_rtx ();
1815 56 : emit_label (label);
1816 56 : LABEL_PRESERVE_P (label) = 1;
1817 56 : tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1818 56 : gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1819 56 : emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1820 : label));
1821 56 : emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1822 56 : emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1823 56 : const char *name = LABEL_NAME (label);
1824 56 : PUT_CODE (label, NOTE);
1825 56 : NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1826 56 : NOTE_DELETED_LABEL_NAME (label) = name;
1827 56 : }
1828 :
1829 : /* Create and initialize PIC register if required. */
1830 : static void
1831 1488370 : ix86_init_pic_reg (void)
1832 : {
1833 1488370 : edge entry_edge;
1834 1488370 : rtx_insn *seq;
1835 :
1836 1488370 : if (!ix86_use_pseudo_pic_reg ())
1837 : return;
1838 :
1839 40470 : start_sequence ();
1840 :
1841 40470 : if (TARGET_64BIT)
1842 : {
1843 69 : if (ix86_cmodel == CM_LARGE_PIC)
1844 53 : ix86_init_large_pic_reg (R11_REG);
1845 : else
1846 16 : emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1847 : }
1848 : else
1849 : {
1850 : /* If there is future mcount call in the function it is more profitable
1851 : to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1852 40401 : rtx reg = crtl->profile
1853 40401 : ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1854 40401 : : pic_offset_table_rtx;
1855 40401 : rtx_insn *insn = emit_insn (gen_set_got (reg));
1856 40401 : RTX_FRAME_RELATED_P (insn) = 1;
1857 40401 : if (crtl->profile)
1858 0 : emit_move_insn (pic_offset_table_rtx, reg);
1859 40401 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1860 : }
1861 :
1862 40470 : seq = end_sequence ();
1863 :
1864 40470 : entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1865 40470 : insert_insn_on_edge (seq, entry_edge);
1866 40470 : commit_one_edge_insertion (entry_edge);
1867 : }
1868 :
1869 : /* Initialize a variable CUM of type CUMULATIVE_ARGS
1870 : for a call to a function whose data type is FNTYPE.
1871 : For a library call, FNTYPE is 0. */
1872 :
1873 : void
1874 10378754 : init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1875 : tree fntype, /* tree ptr for function decl */
1876 : rtx libname, /* SYMBOL_REF of library name or 0 */
1877 : tree fndecl,
1878 : int caller)
1879 : {
1880 10378754 : struct cgraph_node *local_info_node = NULL;
1881 10378754 : struct cgraph_node *target = NULL;
1882 :
1883 : /* Set silent_p to false to raise an error for invalid calls when
1884 : expanding function body. */
1885 10378754 : cfun->machine->silent_p = false;
1886 :
1887 10378754 : memset (cum, 0, sizeof (*cum));
1888 :
1889 10378754 : tree preserve_none_type;
1890 10378754 : if (fndecl)
1891 : {
1892 10039654 : target = cgraph_node::get (fndecl);
1893 10039654 : if (target)
1894 : {
1895 9894558 : target = target->function_symbol ();
1896 9894558 : local_info_node = cgraph_node::local_info_node (target->decl);
1897 9894558 : cum->call_abi = ix86_function_abi (target->decl);
1898 9894558 : preserve_none_type = TREE_TYPE (target->decl);
1899 : }
1900 : else
1901 : {
1902 145096 : cum->call_abi = ix86_function_abi (fndecl);
1903 145096 : preserve_none_type = TREE_TYPE (fndecl);
1904 : }
1905 : }
1906 : else
1907 : {
1908 339100 : cum->call_abi = ix86_function_type_abi (fntype);
1909 339100 : preserve_none_type = fntype;
1910 : }
1911 10378754 : cum->preserve_none_abi
1912 10378754 : = (preserve_none_type
1913 20639813 : && (lookup_attribute ("preserve_none",
1914 10261059 : TYPE_ATTRIBUTES (preserve_none_type))
1915 : != nullptr));
1916 :
1917 10378754 : cum->caller = caller;
1918 :
1919 : /* Set up the number of registers to use for passing arguments. */
1920 10378754 : cum->nregs = ix86_regparm;
1921 10378754 : if (TARGET_64BIT)
1922 : {
1923 9342343 : cum->nregs = (cum->call_abi == SYSV_ABI
1924 9342343 : ? X86_64_REGPARM_MAX
1925 : : X86_64_MS_REGPARM_MAX);
1926 : }
1927 10378754 : if (TARGET_SSE)
1928 : {
1929 10369644 : cum->sse_nregs = SSE_REGPARM_MAX;
1930 10369644 : if (TARGET_64BIT)
1931 : {
1932 9333353 : cum->sse_nregs = (cum->call_abi == SYSV_ABI
1933 9333353 : ? X86_64_SSE_REGPARM_MAX
1934 : : X86_64_MS_SSE_REGPARM_MAX);
1935 : }
1936 : }
1937 10378754 : if (TARGET_MMX)
1938 11202389 : cum->mmx_nregs = MMX_REGPARM_MAX;
1939 10378754 : cum->warn_avx512f = true;
1940 10378754 : cum->warn_avx = true;
1941 10378754 : cum->warn_sse = true;
1942 10378754 : cum->warn_mmx = true;
1943 :
1944 : /* Because type might mismatch in between caller and callee, we need to
1945 : use actual type of function for local calls.
1946 : FIXME: cgraph_analyze can be told to actually record if function uses
1947 : va_start so for local functions maybe_vaarg can be made aggressive
1948 : helping K&R code.
1949 : FIXME: once typesytem is fixed, we won't need this code anymore. */
1950 10378754 : if (local_info_node && local_info_node->local
1951 421790 : && local_info_node->can_change_signature)
1952 398574 : fntype = TREE_TYPE (target->decl);
1953 10378754 : cum->stdarg = stdarg_p (fntype);
1954 20757508 : cum->maybe_vaarg = (fntype
1955 10849418 : ? (!prototype_p (fntype) || stdarg_p (fntype))
1956 117695 : : !libname);
1957 :
1958 10378754 : cum->decl = fndecl;
1959 :
1960 10378754 : cum->warn_empty = !warn_abi || cum->stdarg;
1961 10378754 : if (!cum->warn_empty && fntype)
1962 : {
1963 2601309 : function_args_iterator iter;
1964 2601309 : tree argtype;
1965 2601309 : bool seen_empty_type = false;
1966 7215694 : FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1967 : {
1968 7215631 : if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1969 : break;
1970 4634376 : if (TYPE_EMPTY_P (argtype))
1971 : seen_empty_type = true;
1972 4562635 : else if (seen_empty_type)
1973 : {
1974 19991 : cum->warn_empty = true;
1975 19991 : break;
1976 : }
1977 : }
1978 : }
1979 :
1980 10378754 : if (!TARGET_64BIT)
1981 : {
1982 : /* If there are variable arguments, then we won't pass anything
1983 : in registers in 32-bit mode. */
1984 1036411 : if (stdarg_p (fntype))
1985 : {
1986 9087 : cum->nregs = 0;
1987 : /* Since in 32-bit, variable arguments are always passed on
1988 : stack, there is scratch register available for indirect
1989 : sibcall. */
1990 9087 : cfun->machine->arg_reg_available = true;
1991 9087 : cum->sse_nregs = 0;
1992 9087 : cum->mmx_nregs = 0;
1993 9087 : cum->warn_avx512f = false;
1994 9087 : cum->warn_avx = false;
1995 9087 : cum->warn_sse = false;
1996 9087 : cum->warn_mmx = false;
1997 9087 : return;
1998 : }
1999 :
2000 : /* Use ecx and edx registers if function has fastcall attribute,
2001 : else look for regparm information. */
2002 1027324 : if (fntype)
2003 : {
2004 1014138 : unsigned int ccvt = ix86_get_callcvt (fntype);
2005 1014138 : if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
2006 : {
2007 0 : cum->nregs = 1;
2008 0 : cum->fastcall = 1; /* Same first register as in fastcall. */
2009 : }
2010 1014138 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
2011 : {
2012 4 : cum->nregs = 2;
2013 4 : cum->fastcall = 1;
2014 : }
2015 : else
2016 1014134 : cum->nregs = ix86_function_regparm (fntype, fndecl);
2017 : }
2018 :
2019 : /* Set up the number of SSE registers used for passing SFmode
2020 : and DFmode arguments. Warn for mismatching ABI. */
2021 1027324 : cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
2022 : }
2023 :
2024 10369667 : cfun->machine->arg_reg_available = (cum->nregs > 0);
2025 : }
2026 :
2027 : /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2028 : But in the case of vector types, it is some vector mode.
2029 :
2030 : When we have only some of our vector isa extensions enabled, then there
2031 : are some modes for which vector_mode_supported_p is false. For these
2032 : modes, the generic vector support in gcc will choose some non-vector mode
2033 : in order to implement the type. By computing the natural mode, we'll
2034 : select the proper ABI location for the operand and not depend on whatever
2035 : the middle-end decides to do with these vector types.
2036 :
2037 : The midde-end can't deal with the vector types > 16 bytes. In this
2038 : case, we return the original mode and warn ABI change if CUM isn't
2039 : NULL.
2040 :
2041 : If INT_RETURN is true, warn ABI change if the vector mode isn't
2042 : available for function return value. */
2043 :
2044 : static machine_mode
2045 228200560 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
2046 : bool in_return)
2047 : {
2048 228200560 : machine_mode mode = TYPE_MODE (type);
2049 :
2050 228200560 : if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
2051 : {
2052 467243 : HOST_WIDE_INT size = int_size_in_bytes (type);
2053 467243 : if ((size == 8 || size == 16 || size == 32 || size == 64)
2054 : /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2055 467243 : && TYPE_VECTOR_SUBPARTS (type) > 1)
2056 : {
2057 430671 : machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2058 :
2059 : /* There are no XFmode vector modes ... */
2060 430671 : if (innermode == XFmode)
2061 : return mode;
2062 :
2063 : /* ... and no decimal float vector modes. */
2064 430118 : if (DECIMAL_FLOAT_MODE_P (innermode))
2065 : return mode;
2066 :
2067 429825 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2068 : mode = MIN_MODE_VECTOR_FLOAT;
2069 : else
2070 360005 : mode = MIN_MODE_VECTOR_INT;
2071 :
2072 : /* Get the mode which has this inner mode and number of units. */
2073 9086658 : FOR_EACH_MODE_FROM (mode, mode)
2074 18908281 : if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2075 10251448 : && GET_MODE_INNER (mode) == innermode)
2076 : {
2077 429825 : if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2078 : {
2079 293564 : static bool warnedavx512f;
2080 293564 : static bool warnedavx512f_ret;
2081 :
2082 293564 : if (cum && cum->warn_avx512f && !warnedavx512f)
2083 : {
2084 1361 : if (warning (OPT_Wpsabi, "AVX512F vector argument "
2085 : "without AVX512F enabled changes the ABI"))
2086 2 : warnedavx512f = true;
2087 : }
2088 292203 : else if (in_return && !warnedavx512f_ret)
2089 : {
2090 283582 : if (warning (OPT_Wpsabi, "AVX512F vector return "
2091 : "without AVX512F enabled changes the ABI"))
2092 4 : warnedavx512f_ret = true;
2093 : }
2094 :
2095 293564 : return TYPE_MODE (type);
2096 : }
2097 136261 : else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2098 : {
2099 135712 : static bool warnedavx;
2100 135712 : static bool warnedavx_ret;
2101 :
2102 135712 : if (cum && cum->warn_avx && !warnedavx)
2103 : {
2104 770 : if (warning (OPT_Wpsabi, "AVX vector argument "
2105 : "without AVX enabled changes the ABI"))
2106 5 : warnedavx = true;
2107 : }
2108 134942 : else if (in_return && !warnedavx_ret)
2109 : {
2110 120871 : if (warning (OPT_Wpsabi, "AVX vector return "
2111 : "without AVX enabled changes the ABI"))
2112 10 : warnedavx_ret = true;
2113 : }
2114 :
2115 135712 : return TYPE_MODE (type);
2116 : }
2117 549 : else if (((size == 8 && TARGET_64BIT) || size == 16)
2118 546 : && !TARGET_SSE
2119 140 : && !TARGET_IAMCU)
2120 : {
2121 140 : static bool warnedsse;
2122 140 : static bool warnedsse_ret;
2123 :
2124 140 : if (cum && cum->warn_sse && !warnedsse)
2125 : {
2126 19 : if (warning (OPT_Wpsabi, "SSE vector argument "
2127 : "without SSE enabled changes the ABI"))
2128 6 : warnedsse = true;
2129 : }
2130 121 : else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2131 : {
2132 0 : if (warning (OPT_Wpsabi, "SSE vector return "
2133 : "without SSE enabled changes the ABI"))
2134 0 : warnedsse_ret = true;
2135 : }
2136 : }
2137 409 : else if ((size == 8 && !TARGET_64BIT)
2138 0 : && (!cfun
2139 0 : || cfun->machine->func_type == TYPE_NORMAL)
2140 0 : && !TARGET_MMX
2141 0 : && !TARGET_IAMCU)
2142 : {
2143 0 : static bool warnedmmx;
2144 0 : static bool warnedmmx_ret;
2145 :
2146 0 : if (cum && cum->warn_mmx && !warnedmmx)
2147 : {
2148 0 : if (warning (OPT_Wpsabi, "MMX vector argument "
2149 : "without MMX enabled changes the ABI"))
2150 0 : warnedmmx = true;
2151 : }
2152 0 : else if (in_return && !warnedmmx_ret)
2153 : {
2154 0 : if (warning (OPT_Wpsabi, "MMX vector return "
2155 : "without MMX enabled changes the ABI"))
2156 0 : warnedmmx_ret = true;
2157 : }
2158 : }
2159 549 : return mode;
2160 : }
2161 :
2162 0 : gcc_unreachable ();
2163 : }
2164 : }
2165 :
2166 : return mode;
2167 : }
2168 :
2169 : /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2170 : this may not agree with the mode that the type system has chosen for the
2171 : register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2172 : go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2173 :
2174 : static rtx
2175 36477301 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2176 : unsigned int regno)
2177 : {
2178 36477301 : rtx tmp;
2179 :
2180 36477301 : if (orig_mode != BLKmode)
2181 36477273 : tmp = gen_rtx_REG (orig_mode, regno);
2182 : else
2183 : {
2184 28 : tmp = gen_rtx_REG (mode, regno);
2185 28 : tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2186 28 : tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2187 : }
2188 :
2189 36477301 : return tmp;
2190 : }
2191 :
2192 : /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2193 : of this code is to classify each 8bytes of incoming argument by the register
2194 : class and assign registers accordingly. */
2195 :
2196 : /* Return the union class of CLASS1 and CLASS2.
2197 : See the x86-64 PS ABI for details. */
2198 :
2199 : static enum x86_64_reg_class
2200 55167844 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2201 : {
2202 : /* Rule #1: If both classes are equal, this is the resulting class. */
2203 53922334 : if (class1 == class2)
2204 : return class1;
2205 :
2206 : /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2207 : the other class. */
2208 47769811 : if (class1 == X86_64_NO_CLASS)
2209 : return class2;
2210 48595766 : if (class2 == X86_64_NO_CLASS)
2211 : return class1;
2212 :
2213 : /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2214 1657209 : if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2215 : return X86_64_MEMORY_CLASS;
2216 :
2217 : /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2218 1509699 : if ((class1 == X86_64_INTEGERSI_CLASS
2219 189076 : && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2220 1508493 : || (class2 == X86_64_INTEGERSI_CLASS
2221 916644 : && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2222 : return X86_64_INTEGERSI_CLASS;
2223 1504556 : if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2224 384758 : || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2225 : return X86_64_INTEGER_CLASS;
2226 :
2227 : /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2228 : MEMORY is used. */
2229 91086 : if (class1 == X86_64_X87_CLASS
2230 : || class1 == X86_64_X87UP_CLASS
2231 91086 : || class1 == X86_64_COMPLEX_X87_CLASS
2232 : || class2 == X86_64_X87_CLASS
2233 90181 : || class2 == X86_64_X87UP_CLASS
2234 59516 : || class2 == X86_64_COMPLEX_X87_CLASS)
2235 31570 : return X86_64_MEMORY_CLASS;
2236 :
2237 : /* Rule #6: Otherwise class SSE is used. */
2238 : return X86_64_SSE_CLASS;
2239 : }
2240 :
2241 : /* Classify the argument of type TYPE and mode MODE.
2242 : CLASSES will be filled by the register class used to pass each word
2243 : of the operand. The number of words is returned. In case the parameter
2244 : should be passed in memory, 0 is returned. As a special case for zero
2245 : sized containers, classes[0] will be NO_CLASS and 1 is returned.
2246 :
2247 : BIT_OFFSET is used internally for handling records and specifies offset
2248 : of the offset in bits modulo 512 to avoid overflow cases.
2249 :
2250 : See the x86-64 PS ABI for details.
2251 : */
2252 :
2253 : static int
2254 389660458 : classify_argument (machine_mode mode, const_tree type,
2255 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2256 : int &zero_width_bitfields)
2257 : {
2258 389660458 : HOST_WIDE_INT bytes
2259 773161324 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2260 389660458 : int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2261 :
2262 : /* Variable sized entities are always passed/returned in memory. */
2263 389660458 : if (bytes < 0)
2264 : return 0;
2265 :
2266 389659259 : if (mode != VOIDmode)
2267 : {
2268 : /* The value of "named" doesn't matter. */
2269 388548682 : function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2270 388548682 : if (targetm.calls.must_pass_in_stack (arg))
2271 37 : return 0;
2272 : }
2273 :
2274 389659222 : if (type && (AGGREGATE_TYPE_P (type)
2275 353818133 : || (BITINT_TYPE_P (type) && words > 1)))
2276 : {
2277 36954181 : int i;
2278 36954181 : tree field;
2279 36954181 : enum x86_64_reg_class subclasses[MAX_CLASSES];
2280 :
2281 : /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2282 36954181 : if (bytes > 64)
2283 : return 0;
2284 :
2285 92998629 : for (i = 0; i < words; i++)
2286 56876046 : classes[i] = X86_64_NO_CLASS;
2287 :
2288 : /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2289 : signalize memory class, so handle it as special case. */
2290 36122583 : if (!words)
2291 : {
2292 82718 : classes[0] = X86_64_NO_CLASS;
2293 82718 : return 1;
2294 : }
2295 :
2296 : /* Classify each field of record and merge classes. */
2297 36039865 : switch (TREE_CODE (type))
2298 : {
2299 34010132 : case RECORD_TYPE:
2300 : /* And now merge the fields of structure. */
2301 916029451 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2302 : {
2303 882538627 : if (TREE_CODE (field) == FIELD_DECL)
2304 : {
2305 50342019 : int num;
2306 :
2307 50342019 : if (TREE_TYPE (field) == error_mark_node)
2308 4 : continue;
2309 :
2310 : /* Bitfields are always classified as integer. Handle them
2311 : early, since later code would consider them to be
2312 : misaligned integers. */
2313 50342015 : if (DECL_BIT_FIELD (field))
2314 : {
2315 1254729 : if (integer_zerop (DECL_SIZE (field)))
2316 : {
2317 12839 : if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2318 8021 : continue;
2319 4818 : if (zero_width_bitfields != 2)
2320 : {
2321 4284 : zero_width_bitfields = 1;
2322 4284 : continue;
2323 : }
2324 : }
2325 1242424 : for (i = (int_bit_position (field)
2326 1242424 : + (bit_offset % 64)) / 8 / 8;
2327 2487934 : i < ((int_bit_position (field) + (bit_offset % 64))
2328 2487934 : + tree_to_shwi (DECL_SIZE (field))
2329 2487934 : + 63) / 8 / 8; i++)
2330 1245510 : classes[i]
2331 2491020 : = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2332 : }
2333 : else
2334 : {
2335 49087286 : int pos;
2336 :
2337 49087286 : type = TREE_TYPE (field);
2338 :
2339 : /* Flexible array member is ignored. */
2340 49087286 : if (TYPE_MODE (type) == BLKmode
2341 651016 : && TREE_CODE (type) == ARRAY_TYPE
2342 168531 : && TYPE_SIZE (type) == NULL_TREE
2343 2007 : && TYPE_DOMAIN (type) != NULL_TREE
2344 49088528 : && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2345 : == NULL_TREE))
2346 : {
2347 1242 : static bool warned;
2348 :
2349 1242 : if (!warned && warn_psabi)
2350 : {
2351 3 : warned = true;
2352 3 : inform (input_location,
2353 : "the ABI of passing struct with"
2354 : " a flexible array member has"
2355 : " changed in GCC 4.4");
2356 : }
2357 1242 : continue;
2358 1242 : }
2359 49086044 : num = classify_argument (TYPE_MODE (type), type,
2360 : subclasses,
2361 49086044 : (int_bit_position (field)
2362 49086044 : + bit_offset) % 512,
2363 : zero_width_bitfields);
2364 49086044 : if (!num)
2365 : return 0;
2366 48566736 : pos = (int_bit_position (field)
2367 48566736 : + (bit_offset % 64)) / 8 / 8;
2368 100524438 : for (i = 0; i < num && (i + pos) < words; i++)
2369 51957702 : classes[i + pos]
2370 51957702 : = merge_classes (subclasses[i], classes[i + pos]);
2371 : }
2372 : }
2373 : }
2374 : break;
2375 :
2376 445240 : case ARRAY_TYPE:
2377 : /* Arrays are handled as small records. */
2378 445240 : {
2379 445240 : int num;
2380 445240 : num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2381 445240 : TREE_TYPE (type), subclasses, bit_offset,
2382 : zero_width_bitfields);
2383 445240 : if (!num)
2384 : return 0;
2385 :
2386 : /* The partial classes are now full classes. */
2387 429371 : if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2388 13868 : subclasses[0] = X86_64_SSE_CLASS;
2389 429371 : if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2390 5126 : subclasses[0] = X86_64_SSE_CLASS;
2391 429371 : if (subclasses[0] == X86_64_INTEGERSI_CLASS
2392 161349 : && !((bit_offset % 64) == 0 && bytes == 4))
2393 130127 : subclasses[0] = X86_64_INTEGER_CLASS;
2394 :
2395 1326278 : for (i = 0; i < words; i++)
2396 896907 : classes[i] = subclasses[i % num];
2397 :
2398 : break;
2399 : }
2400 273008 : case UNION_TYPE:
2401 273008 : case QUAL_UNION_TYPE:
2402 : /* Unions are similar to RECORD_TYPE but offset is always 0.
2403 : */
2404 3038701 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2405 : {
2406 2800958 : if (TREE_CODE (field) == FIELD_DECL)
2407 : {
2408 1237591 : int num;
2409 :
2410 1237591 : if (TREE_TYPE (field) == error_mark_node)
2411 10 : continue;
2412 :
2413 1237581 : num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2414 1237581 : TREE_TYPE (field), subclasses,
2415 : bit_offset, zero_width_bitfields);
2416 1237581 : if (!num)
2417 : return 0;
2418 3166948 : for (i = 0; i < num && i < words; i++)
2419 1964632 : classes[i] = merge_classes (subclasses[i], classes[i]);
2420 : }
2421 : }
2422 : break;
2423 :
2424 1311485 : case BITINT_TYPE:
2425 1311485 : case ENUMERAL_TYPE:
2426 : /* _BitInt(N) for N > 64 is passed as structure containing
2427 : (N + 63) / 64 64-bit elements. */
2428 1311485 : if (words > 2)
2429 : return 0;
2430 75441 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2431 75441 : return 2;
2432 :
2433 0 : default:
2434 0 : gcc_unreachable ();
2435 : }
2436 :
2437 34157938 : if (words > 2)
2438 : {
2439 : /* When size > 16 bytes, if the first one isn't
2440 : X86_64_SSE_CLASS or any other ones aren't
2441 : X86_64_SSEUP_CLASS, everything should be passed in
2442 : memory. */
2443 1661806 : if (classes[0] != X86_64_SSE_CLASS)
2444 : return 0;
2445 :
2446 197316 : for (i = 1; i < words; i++)
2447 179129 : if (classes[i] != X86_64_SSEUP_CLASS)
2448 : return 0;
2449 : }
2450 :
2451 : /* Final merger cleanup. */
2452 76518474 : for (i = 0; i < words; i++)
2453 : {
2454 : /* If one class is MEMORY, everything should be passed in
2455 : memory. */
2456 44037382 : if (classes[i] == X86_64_MEMORY_CLASS)
2457 : return 0;
2458 :
2459 : /* The X86_64_SSEUP_CLASS should be always preceded by
2460 : X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2461 44006525 : if (classes[i] == X86_64_SSEUP_CLASS
2462 207011 : && classes[i - 1] != X86_64_SSE_CLASS
2463 76546 : && classes[i - 1] != X86_64_SSEUP_CLASS)
2464 : {
2465 : /* The first one should never be X86_64_SSEUP_CLASS. */
2466 1916 : gcc_assert (i != 0);
2467 1916 : classes[i] = X86_64_SSE_CLASS;
2468 : }
2469 :
2470 : /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2471 : everything should be passed in memory. */
2472 44006525 : if (classes[i] == X86_64_X87UP_CLASS
2473 178062 : && (classes[i - 1] != X86_64_X87_CLASS))
2474 : {
2475 2370 : static bool warned;
2476 :
2477 : /* The first one should never be X86_64_X87UP_CLASS. */
2478 2370 : gcc_assert (i != 0);
2479 2370 : if (!warned && warn_psabi)
2480 : {
2481 1 : warned = true;
2482 1 : inform (input_location,
2483 : "the ABI of passing union with %<long double%>"
2484 : " has changed in GCC 4.4");
2485 : }
2486 2370 : return 0;
2487 : }
2488 : }
2489 : return words;
2490 : }
2491 :
2492 : /* Compute alignment needed. We align all types to natural boundaries with
2493 : exception of XFmode that is aligned to 64bits. */
2494 352705041 : if (mode != VOIDmode && mode != BLKmode)
2495 : {
2496 351130470 : int mode_alignment = GET_MODE_BITSIZE (mode);
2497 :
2498 351130470 : if (mode == XFmode)
2499 : mode_alignment = 128;
2500 344256065 : else if (mode == XCmode)
2501 548711 : mode_alignment = 256;
2502 351130470 : if (COMPLEX_MODE_P (mode))
2503 2304594 : mode_alignment /= 2;
2504 : /* Misaligned fields are always returned in memory. */
2505 351130470 : if (bit_offset % mode_alignment)
2506 : return 0;
2507 : }
2508 :
2509 : /* for V1xx modes, just use the base mode */
2510 352697408 : if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2511 446184200 : && GET_MODE_UNIT_SIZE (mode) == bytes)
2512 6315 : mode = GET_MODE_INNER (mode);
2513 :
2514 : /* Classification of atomic types. */
2515 352697408 : switch (mode)
2516 : {
2517 207933 : case E_SDmode:
2518 207933 : case E_DDmode:
2519 207933 : classes[0] = X86_64_SSE_CLASS;
2520 207933 : return 1;
2521 98708 : case E_TDmode:
2522 98708 : classes[0] = X86_64_SSE_CLASS;
2523 98708 : classes[1] = X86_64_SSEUP_CLASS;
2524 98708 : return 2;
2525 229723207 : case E_DImode:
2526 229723207 : case E_SImode:
2527 229723207 : case E_HImode:
2528 229723207 : case E_QImode:
2529 229723207 : case E_CSImode:
2530 229723207 : case E_CHImode:
2531 229723207 : case E_CQImode:
2532 229723207 : {
2533 229723207 : int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2534 :
2535 : /* Analyze last 128 bits only. */
2536 229723207 : size = (size - 1) & 0x7f;
2537 :
2538 229723207 : if (size < 32)
2539 : {
2540 102235422 : classes[0] = X86_64_INTEGERSI_CLASS;
2541 102235422 : return 1;
2542 : }
2543 127487785 : else if (size < 64)
2544 : {
2545 116734219 : classes[0] = X86_64_INTEGER_CLASS;
2546 116734219 : return 1;
2547 : }
2548 10753566 : else if (size < 64+32)
2549 : {
2550 3994645 : classes[0] = X86_64_INTEGER_CLASS;
2551 3994645 : classes[1] = X86_64_INTEGERSI_CLASS;
2552 3994645 : return 2;
2553 : }
2554 6758921 : else if (size < 64+64)
2555 : {
2556 6758921 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2557 6758921 : return 2;
2558 : }
2559 : else
2560 : gcc_unreachable ();
2561 : }
2562 2459196 : case E_CDImode:
2563 2459196 : case E_TImode:
2564 2459196 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2565 2459196 : return 2;
2566 0 : case E_COImode:
2567 0 : case E_OImode:
2568 : /* OImode shouldn't be used directly. */
2569 0 : gcc_unreachable ();
2570 : case E_CTImode:
2571 : return 0;
2572 863975 : case E_HFmode:
2573 863975 : case E_BFmode:
2574 863975 : if (!(bit_offset % 64))
2575 861425 : classes[0] = X86_64_SSEHF_CLASS;
2576 : else
2577 2550 : classes[0] = X86_64_SSE_CLASS;
2578 : return 1;
2579 9723457 : case E_SFmode:
2580 9723457 : if (!(bit_offset % 64))
2581 9670278 : classes[0] = X86_64_SSESF_CLASS;
2582 : else
2583 53179 : classes[0] = X86_64_SSE_CLASS;
2584 : return 1;
2585 4258433 : case E_DFmode:
2586 4258433 : classes[0] = X86_64_SSEDF_CLASS;
2587 4258433 : return 1;
2588 6873689 : case E_XFmode:
2589 6873689 : classes[0] = X86_64_X87_CLASS;
2590 6873689 : classes[1] = X86_64_X87UP_CLASS;
2591 6873689 : return 2;
2592 1309162 : case E_TFmode:
2593 1309162 : classes[0] = X86_64_SSE_CLASS;
2594 1309162 : classes[1] = X86_64_SSEUP_CLASS;
2595 1309162 : return 2;
2596 77920 : case E_HCmode:
2597 77920 : case E_BCmode:
2598 77920 : classes[0] = X86_64_SSE_CLASS;
2599 77920 : if (!(bit_offset % 64))
2600 : return 1;
2601 : else
2602 : {
2603 98 : classes[1] = X86_64_SSEHF_CLASS;
2604 98 : return 2;
2605 : }
2606 690840 : case E_SCmode:
2607 690840 : classes[0] = X86_64_SSE_CLASS;
2608 690840 : if (!(bit_offset % 64))
2609 : return 1;
2610 : else
2611 : {
2612 1119 : static bool warned;
2613 :
2614 1119 : if (!warned && warn_psabi)
2615 : {
2616 2 : warned = true;
2617 2 : inform (input_location,
2618 : "the ABI of passing structure with %<complex float%>"
2619 : " member has changed in GCC 4.4");
2620 : }
2621 1119 : classes[1] = X86_64_SSESF_CLASS;
2622 1119 : return 2;
2623 : }
2624 700857 : case E_DCmode:
2625 700857 : classes[0] = X86_64_SSEDF_CLASS;
2626 700857 : classes[1] = X86_64_SSEDF_CLASS;
2627 700857 : return 2;
2628 548711 : case E_XCmode:
2629 548711 : classes[0] = X86_64_COMPLEX_X87_CLASS;
2630 548711 : return 1;
2631 : case E_TCmode:
2632 : /* This modes is larger than 16 bytes. */
2633 : return 0;
2634 25339404 : case E_V8SFmode:
2635 25339404 : case E_V8SImode:
2636 25339404 : case E_V32QImode:
2637 25339404 : case E_V16HFmode:
2638 25339404 : case E_V16BFmode:
2639 25339404 : case E_V16HImode:
2640 25339404 : case E_V4DFmode:
2641 25339404 : case E_V4DImode:
2642 25339404 : classes[0] = X86_64_SSE_CLASS;
2643 25339404 : classes[1] = X86_64_SSEUP_CLASS;
2644 25339404 : classes[2] = X86_64_SSEUP_CLASS;
2645 25339404 : classes[3] = X86_64_SSEUP_CLASS;
2646 25339404 : return 4;
2647 27478225 : case E_V8DFmode:
2648 27478225 : case E_V16SFmode:
2649 27478225 : case E_V32HFmode:
2650 27478225 : case E_V32BFmode:
2651 27478225 : case E_V8DImode:
2652 27478225 : case E_V16SImode:
2653 27478225 : case E_V32HImode:
2654 27478225 : case E_V64QImode:
2655 27478225 : classes[0] = X86_64_SSE_CLASS;
2656 27478225 : classes[1] = X86_64_SSEUP_CLASS;
2657 27478225 : classes[2] = X86_64_SSEUP_CLASS;
2658 27478225 : classes[3] = X86_64_SSEUP_CLASS;
2659 27478225 : classes[4] = X86_64_SSEUP_CLASS;
2660 27478225 : classes[5] = X86_64_SSEUP_CLASS;
2661 27478225 : classes[6] = X86_64_SSEUP_CLASS;
2662 27478225 : classes[7] = X86_64_SSEUP_CLASS;
2663 27478225 : return 8;
2664 37370843 : case E_V4SFmode:
2665 37370843 : case E_V4SImode:
2666 37370843 : case E_V16QImode:
2667 37370843 : case E_V8HImode:
2668 37370843 : case E_V8HFmode:
2669 37370843 : case E_V8BFmode:
2670 37370843 : case E_V2DFmode:
2671 37370843 : case E_V2DImode:
2672 37370843 : classes[0] = X86_64_SSE_CLASS;
2673 37370843 : classes[1] = X86_64_SSEUP_CLASS;
2674 37370843 : return 2;
2675 3266734 : case E_V1TImode:
2676 3266734 : case E_V1DImode:
2677 3266734 : case E_V2SFmode:
2678 3266734 : case E_V2SImode:
2679 3266734 : case E_V4HImode:
2680 3266734 : case E_V4HFmode:
2681 3266734 : case E_V4BFmode:
2682 3266734 : case E_V2HFmode:
2683 3266734 : case E_V2BFmode:
2684 3266734 : case E_V8QImode:
2685 3266734 : classes[0] = X86_64_SSE_CLASS;
2686 3266734 : return 1;
2687 : case E_BLKmode:
2688 : case E_VOIDmode:
2689 : return 0;
2690 45148 : default:
2691 45148 : gcc_assert (VECTOR_MODE_P (mode));
2692 :
2693 45148 : if (bytes > 16)
2694 : return 0;
2695 :
2696 60568 : gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2697 :
2698 60568 : if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2699 29850 : classes[0] = X86_64_INTEGERSI_CLASS;
2700 : else
2701 434 : classes[0] = X86_64_INTEGER_CLASS;
2702 30284 : classes[1] = X86_64_INTEGER_CLASS;
2703 30284 : return 1 + (bytes > 8);
2704 : }
2705 : }
2706 :
2707 : /* Wrapper around classify_argument with the extra zero_width_bitfields
2708 : argument, to diagnose GCC 12.1 ABI differences for C. */
2709 :
2710 : static int
2711 338891059 : classify_argument (machine_mode mode, const_tree type,
2712 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2713 : {
2714 338891059 : int zero_width_bitfields = 0;
2715 338891059 : static bool warned = false;
2716 338891059 : int n = classify_argument (mode, type, classes, bit_offset,
2717 : zero_width_bitfields);
2718 338891059 : if (!zero_width_bitfields || warned || !warn_psabi)
2719 : return n;
2720 534 : enum x86_64_reg_class alt_classes[MAX_CLASSES];
2721 534 : zero_width_bitfields = 2;
2722 534 : if (classify_argument (mode, type, alt_classes, bit_offset,
2723 : zero_width_bitfields) != n)
2724 0 : zero_width_bitfields = 3;
2725 : else
2726 1286 : for (int i = 0; i < n; i++)
2727 760 : if (classes[i] != alt_classes[i])
2728 : {
2729 8 : zero_width_bitfields = 3;
2730 8 : break;
2731 : }
2732 534 : if (zero_width_bitfields == 3)
2733 : {
2734 8 : warned = true;
2735 8 : const char *url
2736 : = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2737 :
2738 8 : inform (input_location,
2739 : "the ABI of passing C structures with zero-width bit-fields"
2740 : " has changed in GCC %{12.1%}", url);
2741 : }
2742 : return n;
2743 : }
2744 :
2745 : /* Examine the argument and return set number of register required in each
2746 : class. Return true iff parameter should be passed in memory. */
2747 :
2748 : static bool
2749 229306067 : examine_argument (machine_mode mode, const_tree type, bool in_return,
2750 : int *int_nregs, int *sse_nregs)
2751 : {
2752 229306067 : enum x86_64_reg_class regclass[MAX_CLASSES];
2753 229306067 : int n = classify_argument (mode, type, regclass, 0);
2754 :
2755 229306067 : *int_nregs = 0;
2756 229306067 : *sse_nregs = 0;
2757 :
2758 229306067 : if (!n)
2759 : return true;
2760 664510507 : for (n--; n >= 0; n--)
2761 440734285 : switch (regclass[n])
2762 : {
2763 152252834 : case X86_64_INTEGER_CLASS:
2764 152252834 : case X86_64_INTEGERSI_CLASS:
2765 152252834 : (*int_nregs)++;
2766 152252834 : break;
2767 74459857 : case X86_64_SSE_CLASS:
2768 74459857 : case X86_64_SSEHF_CLASS:
2769 74459857 : case X86_64_SSESF_CLASS:
2770 74459857 : case X86_64_SSEDF_CLASS:
2771 74459857 : (*sse_nregs)++;
2772 74459857 : break;
2773 : case X86_64_NO_CLASS:
2774 : case X86_64_SSEUP_CLASS:
2775 : break;
2776 9386961 : case X86_64_X87_CLASS:
2777 9386961 : case X86_64_X87UP_CLASS:
2778 9386961 : case X86_64_COMPLEX_X87_CLASS:
2779 9386961 : if (!in_return)
2780 : return true;
2781 : break;
2782 0 : case X86_64_MEMORY_CLASS:
2783 0 : gcc_unreachable ();
2784 : }
2785 :
2786 : return false;
2787 : }
2788 :
2789 : /* Construct container for the argument used by GCC interface. See
2790 : FUNCTION_ARG for the detailed description. */
2791 :
2792 : static rtx
2793 111198730 : construct_container (machine_mode mode, machine_mode orig_mode,
2794 : const_tree type, bool in_return, int nintregs,
2795 : int nsseregs, const int *intreg, int sse_regno)
2796 : {
2797 : /* The following variables hold the static issued_error state. */
2798 111198730 : static bool issued_sse_arg_error;
2799 111198730 : static bool issued_sse_ret_error;
2800 111198730 : static bool issued_x87_ret_error;
2801 :
2802 111198730 : machine_mode tmpmode;
2803 111198730 : int bytes
2804 221715444 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2805 111198730 : enum x86_64_reg_class regclass[MAX_CLASSES];
2806 111198730 : int n;
2807 111198730 : int i;
2808 111198730 : int nexps = 0;
2809 111198730 : int needed_sseregs, needed_intregs;
2810 111198730 : rtx exp[MAX_CLASSES];
2811 111198730 : rtx ret;
2812 :
2813 111198730 : if (examine_argument (mode, type, in_return, &needed_intregs,
2814 : &needed_sseregs))
2815 : return NULL;
2816 :
2817 110690236 : if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2818 : return NULL;
2819 :
2820 : /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2821 : some less clueful developer tries to use floating-point anyway. */
2822 109585063 : if (needed_sseregs
2823 36796220 : && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2824 : {
2825 : /* Return early if we shouldn't raise an error for invalid
2826 : calls. */
2827 71 : if (cfun != NULL && cfun->machine->silent_p)
2828 : return NULL;
2829 39 : if (in_return)
2830 : {
2831 34 : if (!issued_sse_ret_error)
2832 : {
2833 16 : if (VALID_SSE2_TYPE_MODE (mode))
2834 5 : error ("SSE register return with SSE2 disabled");
2835 : else
2836 11 : error ("SSE register return with SSE disabled");
2837 16 : issued_sse_ret_error = true;
2838 : }
2839 : }
2840 5 : else if (!issued_sse_arg_error)
2841 : {
2842 5 : if (VALID_SSE2_TYPE_MODE (mode))
2843 0 : error ("SSE register argument with SSE2 disabled");
2844 : else
2845 5 : error ("SSE register argument with SSE disabled");
2846 5 : issued_sse_arg_error = true;
2847 : }
2848 39 : return NULL;
2849 : }
2850 :
2851 109584992 : n = classify_argument (mode, type, regclass, 0);
2852 109584992 : gcc_assert (n);
2853 :
2854 : /* Likewise, error if the ABI requires us to return values in the
2855 : x87 registers and the user specified -mno-80387. */
2856 109584992 : if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2857 1424666 : for (i = 0; i < n; i++)
2858 751944 : if (regclass[i] == X86_64_X87_CLASS
2859 : || regclass[i] == X86_64_X87UP_CLASS
2860 751944 : || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2861 : {
2862 : /* Return early if we shouldn't raise an error for invalid
2863 : calls. */
2864 16 : if (cfun != NULL && cfun->machine->silent_p)
2865 : return NULL;
2866 13 : if (!issued_x87_ret_error)
2867 : {
2868 8 : error ("x87 register return with x87 disabled");
2869 8 : issued_x87_ret_error = true;
2870 : }
2871 13 : return NULL;
2872 : }
2873 :
2874 : /* First construct simple cases. Avoid SCmode, since we want to use
2875 : single register to pass this type. */
2876 109584976 : if (n == 1 && mode != SCmode && mode != HCmode)
2877 72684381 : switch (regclass[0])
2878 : {
2879 66631201 : case X86_64_INTEGER_CLASS:
2880 66631201 : case X86_64_INTEGERSI_CLASS:
2881 66631201 : return gen_rtx_REG (mode, intreg[0]);
2882 5853225 : case X86_64_SSE_CLASS:
2883 5853225 : case X86_64_SSEHF_CLASS:
2884 5853225 : case X86_64_SSESF_CLASS:
2885 5853225 : case X86_64_SSEDF_CLASS:
2886 5853225 : if (mode != BLKmode)
2887 11705642 : return gen_reg_or_parallel (mode, orig_mode,
2888 11705642 : GET_SSE_REGNO (sse_regno));
2889 : break;
2890 170953 : case X86_64_X87_CLASS:
2891 170953 : case X86_64_COMPLEX_X87_CLASS:
2892 170953 : return gen_rtx_REG (mode, FIRST_STACK_REG);
2893 : case X86_64_NO_CLASS:
2894 : /* Zero sized array, struct or class. */
2895 : return NULL;
2896 0 : default:
2897 0 : gcc_unreachable ();
2898 : }
2899 36900999 : if (n == 2
2900 19103430 : && regclass[0] == X86_64_SSE_CLASS
2901 12897286 : && regclass[1] == X86_64_SSEUP_CLASS
2902 12892131 : && mode != BLKmode)
2903 25784262 : return gen_reg_or_parallel (mode, orig_mode,
2904 25784262 : GET_SSE_REGNO (sse_regno));
2905 24008868 : if (n == 4
2906 8430456 : && regclass[0] == X86_64_SSE_CLASS
2907 8430456 : && regclass[1] == X86_64_SSEUP_CLASS
2908 8430456 : && regclass[2] == X86_64_SSEUP_CLASS
2909 8430456 : && regclass[3] == X86_64_SSEUP_CLASS
2910 8430456 : && mode != BLKmode)
2911 16857534 : return gen_reg_or_parallel (mode, orig_mode,
2912 16857534 : GET_SSE_REGNO (sse_regno));
2913 15580101 : if (n == 8
2914 9129315 : && regclass[0] == X86_64_SSE_CLASS
2915 9129315 : && regclass[1] == X86_64_SSEUP_CLASS
2916 9129315 : && regclass[2] == X86_64_SSEUP_CLASS
2917 9129315 : && regclass[3] == X86_64_SSEUP_CLASS
2918 9129315 : && regclass[4] == X86_64_SSEUP_CLASS
2919 9129315 : && regclass[5] == X86_64_SSEUP_CLASS
2920 9129315 : && regclass[6] == X86_64_SSEUP_CLASS
2921 9129315 : && regclass[7] == X86_64_SSEUP_CLASS
2922 9129315 : && mode != BLKmode)
2923 18254358 : return gen_reg_or_parallel (mode, orig_mode,
2924 18254358 : GET_SSE_REGNO (sse_regno));
2925 6452922 : if (n == 2
2926 6211299 : && regclass[0] == X86_64_X87_CLASS
2927 2242042 : && regclass[1] == X86_64_X87UP_CLASS)
2928 2242042 : return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2929 :
2930 4210880 : if (n == 2
2931 3969257 : && regclass[0] == X86_64_INTEGER_CLASS
2932 3547567 : && regclass[1] == X86_64_INTEGER_CLASS
2933 3539224 : && (mode == CDImode || mode == TImode || mode == BLKmode)
2934 3539224 : && intreg[0] + 1 == intreg[1])
2935 : {
2936 3220156 : if (mode == BLKmode)
2937 : {
2938 : /* Use TImode for BLKmode values in 2 integer registers. */
2939 515956 : exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2940 257978 : gen_rtx_REG (TImode, intreg[0]),
2941 : GEN_INT (0));
2942 257978 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2943 257978 : XVECEXP (ret, 0, 0) = exp[0];
2944 257978 : return ret;
2945 : }
2946 : else
2947 2962178 : return gen_rtx_REG (mode, intreg[0]);
2948 : }
2949 :
2950 : /* Otherwise figure out the entries of the PARALLEL. */
2951 2730549 : for (i = 0; i < n; i++)
2952 : {
2953 1739825 : int pos;
2954 :
2955 1739825 : switch (regclass[i])
2956 : {
2957 : case X86_64_NO_CLASS:
2958 : break;
2959 994383 : case X86_64_INTEGER_CLASS:
2960 994383 : case X86_64_INTEGERSI_CLASS:
2961 : /* Merge TImodes on aligned occasions here too. */
2962 994383 : if (i * 8 + 8 > bytes)
2963 : {
2964 3235 : unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2965 3235 : if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2966 : /* We've requested 24 bytes we
2967 : don't have mode for. Use DImode. */
2968 357 : tmpmode = DImode;
2969 : }
2970 991148 : else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2971 : tmpmode = SImode;
2972 : else
2973 818640 : tmpmode = DImode;
2974 1988766 : exp [nexps++]
2975 994383 : = gen_rtx_EXPR_LIST (VOIDmode,
2976 994383 : gen_rtx_REG (tmpmode, *intreg),
2977 994383 : GEN_INT (i*8));
2978 994383 : intreg++;
2979 994383 : break;
2980 592 : case X86_64_SSEHF_CLASS:
2981 592 : tmpmode = (mode == BFmode ? BFmode : HFmode);
2982 1184 : exp [nexps++]
2983 1184 : = gen_rtx_EXPR_LIST (VOIDmode,
2984 : gen_rtx_REG (tmpmode,
2985 592 : GET_SSE_REGNO (sse_regno)),
2986 592 : GEN_INT (i*8));
2987 592 : sse_regno++;
2988 592 : break;
2989 3052 : case X86_64_SSESF_CLASS:
2990 6104 : exp [nexps++]
2991 6104 : = gen_rtx_EXPR_LIST (VOIDmode,
2992 : gen_rtx_REG (SFmode,
2993 3052 : GET_SSE_REGNO (sse_regno)),
2994 3052 : GEN_INT (i*8));
2995 3052 : sse_regno++;
2996 3052 : break;
2997 482055 : case X86_64_SSEDF_CLASS:
2998 964110 : exp [nexps++]
2999 964110 : = gen_rtx_EXPR_LIST (VOIDmode,
3000 : gen_rtx_REG (DFmode,
3001 482055 : GET_SSE_REGNO (sse_regno)),
3002 482055 : GEN_INT (i*8));
3003 482055 : sse_regno++;
3004 482055 : break;
3005 251537 : case X86_64_SSE_CLASS:
3006 251537 : pos = i;
3007 251537 : switch (n)
3008 : {
3009 : case 1:
3010 : tmpmode = DImode;
3011 : break;
3012 10128 : case 2:
3013 10128 : if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
3014 : {
3015 0 : tmpmode = TImode;
3016 0 : i++;
3017 : }
3018 : else
3019 : tmpmode = DImode;
3020 : break;
3021 1689 : case 4:
3022 1689 : gcc_assert (i == 0
3023 : && regclass[1] == X86_64_SSEUP_CLASS
3024 : && regclass[2] == X86_64_SSEUP_CLASS
3025 : && regclass[3] == X86_64_SSEUP_CLASS);
3026 : tmpmode = OImode;
3027 : i += 3;
3028 : break;
3029 2136 : case 8:
3030 2136 : gcc_assert (i == 0
3031 : && regclass[1] == X86_64_SSEUP_CLASS
3032 : && regclass[2] == X86_64_SSEUP_CLASS
3033 : && regclass[3] == X86_64_SSEUP_CLASS
3034 : && regclass[4] == X86_64_SSEUP_CLASS
3035 : && regclass[5] == X86_64_SSEUP_CLASS
3036 : && regclass[6] == X86_64_SSEUP_CLASS
3037 : && regclass[7] == X86_64_SSEUP_CLASS);
3038 : tmpmode = XImode;
3039 : i += 7;
3040 : break;
3041 0 : default:
3042 0 : gcc_unreachable ();
3043 : }
3044 503074 : exp [nexps++]
3045 503074 : = gen_rtx_EXPR_LIST (VOIDmode,
3046 : gen_rtx_REG (tmpmode,
3047 251537 : GET_SSE_REGNO (sse_regno)),
3048 251537 : GEN_INT (pos*8));
3049 251537 : sse_regno++;
3050 251537 : break;
3051 0 : default:
3052 0 : gcc_unreachable ();
3053 : }
3054 : }
3055 :
3056 : /* Empty aligned struct, union or class. */
3057 990724 : if (nexps == 0)
3058 : return NULL;
3059 :
3060 990469 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3061 2722088 : for (i = 0; i < nexps; i++)
3062 1731619 : XVECEXP (ret, 0, i) = exp [i];
3063 : return ret;
3064 : }
3065 :
3066 : /* Update the data in CUM to advance over an argument of mode MODE
3067 : and data type TYPE. (TYPE is null for libcalls where that information
3068 : may not be available.)
3069 :
3070 : Return a number of integer registers advanced over. */
3071 :
3072 : static int
3073 2130430 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3074 : const_tree type, HOST_WIDE_INT bytes,
3075 : HOST_WIDE_INT words)
3076 : {
3077 2130430 : int res = 0;
3078 2130430 : bool error_p = false;
3079 :
3080 2130430 : if (TARGET_IAMCU)
3081 : {
3082 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3083 : bytes in registers. */
3084 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3085 0 : goto pass_in_reg;
3086 : return res;
3087 : }
3088 :
3089 2130430 : switch (mode)
3090 : {
3091 : default:
3092 : break;
3093 :
3094 93818 : case E_BLKmode:
3095 93818 : if (bytes < 0)
3096 : break;
3097 : /* FALLTHRU */
3098 :
3099 2093611 : case E_DImode:
3100 2093611 : case E_SImode:
3101 2093611 : case E_HImode:
3102 2093611 : case E_QImode:
3103 93818 : pass_in_reg:
3104 2093611 : cum->words += words;
3105 2093611 : cum->nregs -= words;
3106 2093611 : cum->regno += words;
3107 2093611 : if (cum->nregs >= 0)
3108 47414 : res = words;
3109 2093611 : if (cum->nregs <= 0)
3110 : {
3111 2059447 : cum->nregs = 0;
3112 2059447 : cfun->machine->arg_reg_available = false;
3113 2059447 : cum->regno = 0;
3114 : }
3115 : break;
3116 :
3117 0 : case E_OImode:
3118 : /* OImode shouldn't be used directly. */
3119 0 : gcc_unreachable ();
3120 :
3121 4743 : case E_DFmode:
3122 4743 : if (cum->float_in_sse == -1)
3123 0 : error_p = true;
3124 4743 : if (cum->float_in_sse < 2)
3125 : break;
3126 : /* FALLTHRU */
3127 1360 : case E_SFmode:
3128 1360 : if (cum->float_in_sse == -1)
3129 0 : error_p = true;
3130 1360 : if (cum->float_in_sse < 1)
3131 : break;
3132 : /* FALLTHRU */
3133 :
3134 52 : case E_V16HFmode:
3135 52 : case E_V16BFmode:
3136 52 : case E_V8SFmode:
3137 52 : case E_V8SImode:
3138 52 : case E_V64QImode:
3139 52 : case E_V32HImode:
3140 52 : case E_V16SImode:
3141 52 : case E_V8DImode:
3142 52 : case E_V32HFmode:
3143 52 : case E_V32BFmode:
3144 52 : case E_V16SFmode:
3145 52 : case E_V8DFmode:
3146 52 : case E_V32QImode:
3147 52 : case E_V16HImode:
3148 52 : case E_V4DFmode:
3149 52 : case E_V4DImode:
3150 52 : case E_TImode:
3151 52 : case E_V16QImode:
3152 52 : case E_V8HImode:
3153 52 : case E_V4SImode:
3154 52 : case E_V2DImode:
3155 52 : case E_V8HFmode:
3156 52 : case E_V8BFmode:
3157 52 : case E_V4SFmode:
3158 52 : case E_V2DFmode:
3159 52 : if (!type || !AGGREGATE_TYPE_P (type))
3160 : {
3161 52 : cum->sse_words += words;
3162 52 : cum->sse_nregs -= 1;
3163 52 : cum->sse_regno += 1;
3164 52 : if (cum->sse_nregs <= 0)
3165 : {
3166 4 : cum->sse_nregs = 0;
3167 4 : cum->sse_regno = 0;
3168 : }
3169 : }
3170 : break;
3171 :
3172 16 : case E_V8QImode:
3173 16 : case E_V4HImode:
3174 16 : case E_V4HFmode:
3175 16 : case E_V4BFmode:
3176 16 : case E_V2SImode:
3177 16 : case E_V2SFmode:
3178 16 : case E_V1TImode:
3179 16 : case E_V1DImode:
3180 16 : if (!type || !AGGREGATE_TYPE_P (type))
3181 : {
3182 16 : cum->mmx_words += words;
3183 16 : cum->mmx_nregs -= 1;
3184 16 : cum->mmx_regno += 1;
3185 16 : if (cum->mmx_nregs <= 0)
3186 : {
3187 0 : cum->mmx_nregs = 0;
3188 0 : cum->mmx_regno = 0;
3189 : }
3190 : }
3191 : break;
3192 : }
3193 2065602 : if (error_p)
3194 : {
3195 0 : cum->float_in_sse = 0;
3196 0 : error ("calling %qD with SSE calling convention without "
3197 : "SSE/SSE2 enabled", cum->decl);
3198 0 : sorry ("this is a GCC bug that can be worked around by adding "
3199 : "attribute used to function called");
3200 : }
3201 :
3202 : return res;
3203 : }
3204 :
3205 : static int
3206 18945027 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3207 : const_tree type, HOST_WIDE_INT words, bool named)
3208 : {
3209 18945027 : int int_nregs, sse_nregs;
3210 :
3211 : /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3212 18945027 : if (!named && (VALID_AVX512F_REG_MODE (mode)
3213 : || VALID_AVX256_REG_MODE (mode)))
3214 : return 0;
3215 :
3216 18944663 : if (!examine_argument (mode, type, false, &int_nregs, &sse_nregs)
3217 18944663 : && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3218 : {
3219 16666545 : cum->nregs -= int_nregs;
3220 16666545 : cum->sse_nregs -= sse_nregs;
3221 16666545 : cum->regno += int_nregs;
3222 16666545 : cum->sse_regno += sse_nregs;
3223 16666545 : return int_nregs;
3224 : }
3225 : else
3226 : {
3227 2278118 : int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3228 2278118 : cum->words = ROUND_UP (cum->words, align);
3229 2278118 : cum->words += words;
3230 2278118 : return 0;
3231 : }
3232 : }
3233 :
3234 : static int
3235 447161 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3236 : HOST_WIDE_INT words)
3237 : {
3238 : /* Otherwise, this should be passed indirect. */
3239 447161 : gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3240 :
3241 447161 : cum->words += words;
3242 447161 : if (cum->nregs > 0)
3243 : {
3244 289519 : cum->nregs -= 1;
3245 289519 : cum->regno += 1;
3246 289519 : return 1;
3247 : }
3248 : return 0;
3249 : }
3250 :
3251 : /* Update the data in CUM to advance over argument ARG. */
3252 :
3253 : static void
3254 21522985 : ix86_function_arg_advance (cumulative_args_t cum_v,
3255 : const function_arg_info &arg)
3256 : {
3257 21522985 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3258 21522985 : machine_mode mode = arg.mode;
3259 21522985 : HOST_WIDE_INT bytes, words;
3260 21522985 : int nregs;
3261 :
3262 : /* The argument of interrupt handler is a special case and is
3263 : handled in ix86_function_arg. */
3264 21522985 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3265 : return;
3266 :
3267 21522618 : bytes = arg.promoted_size_in_bytes ();
3268 21522618 : words = CEIL (bytes, UNITS_PER_WORD);
3269 :
3270 21522618 : if (arg.type)
3271 21208438 : mode = type_natural_mode (arg.type, NULL, false);
3272 :
3273 21522618 : if (TARGET_64BIT)
3274 : {
3275 19392188 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3276 :
3277 19392188 : if (call_abi == MS_ABI)
3278 447161 : nregs = function_arg_advance_ms_64 (cum, bytes, words);
3279 : else
3280 18945027 : nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3281 18945027 : arg.named);
3282 : }
3283 : else
3284 2130430 : nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3285 :
3286 21522618 : if (!nregs)
3287 : {
3288 : /* Track if there are outgoing arguments on stack. */
3289 5712704 : if (cum->caller)
3290 2720340 : cfun->machine->outgoing_args_on_stack = true;
3291 : }
3292 : }
3293 :
3294 : /* Define where to put the arguments to a function.
3295 : Value is zero to push the argument on the stack,
3296 : or a hard register in which to store the argument.
3297 :
3298 : MODE is the argument's machine mode.
3299 : TYPE is the data type of the argument (as a tree).
3300 : This is null for libcalls where that information may
3301 : not be available.
3302 : CUM is a variable of type CUMULATIVE_ARGS which gives info about
3303 : the preceding args and about the function being called.
3304 : NAMED is nonzero if this argument is a named parameter
3305 : (otherwise it is an extra parameter matching an ellipsis). */
3306 :
3307 : static rtx
3308 2557294 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3309 : machine_mode orig_mode, const_tree type,
3310 : HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3311 : {
3312 2557294 : bool error_p = false;
3313 :
3314 : /* Avoid the AL settings for the Unix64 ABI. */
3315 2557294 : if (mode == VOIDmode)
3316 742139 : return constm1_rtx;
3317 :
3318 1815155 : if (TARGET_IAMCU)
3319 : {
3320 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3321 : bytes in registers. */
3322 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3323 0 : goto pass_in_reg;
3324 : return NULL_RTX;
3325 : }
3326 :
3327 1815155 : switch (mode)
3328 : {
3329 : default:
3330 : break;
3331 :
3332 77786 : case E_BLKmode:
3333 77786 : if (bytes < 0)
3334 : break;
3335 : /* FALLTHRU */
3336 1781621 : case E_DImode:
3337 1781621 : case E_SImode:
3338 1781621 : case E_HImode:
3339 1781621 : case E_QImode:
3340 77786 : pass_in_reg:
3341 1781621 : if (words <= cum->nregs)
3342 : {
3343 45582 : int regno = cum->regno;
3344 :
3345 : /* Fastcall allocates the first two DWORD (SImode) or
3346 : smaller arguments to ECX and EDX if it isn't an
3347 : aggregate type . */
3348 45582 : if (cum->fastcall)
3349 : {
3350 6 : if (mode == BLKmode
3351 6 : || mode == DImode
3352 6 : || (type && AGGREGATE_TYPE_P (type)))
3353 : break;
3354 :
3355 : /* ECX not EAX is the first allocated register. */
3356 6 : if (regno == AX_REG)
3357 45582 : regno = CX_REG;
3358 : }
3359 45582 : return gen_rtx_REG (mode, regno);
3360 : }
3361 : break;
3362 :
3363 3353 : case E_DFmode:
3364 3353 : if (cum->float_in_sse == -1)
3365 0 : error_p = true;
3366 3353 : if (cum->float_in_sse < 2)
3367 : break;
3368 : /* FALLTHRU */
3369 960 : case E_SFmode:
3370 960 : if (cum->float_in_sse == -1)
3371 0 : error_p = true;
3372 960 : if (cum->float_in_sse < 1)
3373 : break;
3374 : /* FALLTHRU */
3375 12 : case E_TImode:
3376 : /* In 32bit, we pass TImode in xmm registers. */
3377 12 : case E_V16QImode:
3378 12 : case E_V8HImode:
3379 12 : case E_V4SImode:
3380 12 : case E_V2DImode:
3381 12 : case E_V8HFmode:
3382 12 : case E_V8BFmode:
3383 12 : case E_V4SFmode:
3384 12 : case E_V2DFmode:
3385 12 : if (!type || !AGGREGATE_TYPE_P (type))
3386 : {
3387 12 : if (cum->sse_nregs)
3388 12 : return gen_reg_or_parallel (mode, orig_mode,
3389 12 : cum->sse_regno + FIRST_SSE_REG);
3390 : }
3391 : break;
3392 :
3393 0 : case E_OImode:
3394 0 : case E_XImode:
3395 : /* OImode and XImode shouldn't be used directly. */
3396 0 : gcc_unreachable ();
3397 :
3398 9 : case E_V64QImode:
3399 9 : case E_V32HImode:
3400 9 : case E_V16SImode:
3401 9 : case E_V8DImode:
3402 9 : case E_V32HFmode:
3403 9 : case E_V32BFmode:
3404 9 : case E_V16SFmode:
3405 9 : case E_V8DFmode:
3406 9 : case E_V16HFmode:
3407 9 : case E_V16BFmode:
3408 9 : case E_V8SFmode:
3409 9 : case E_V8SImode:
3410 9 : case E_V32QImode:
3411 9 : case E_V16HImode:
3412 9 : case E_V4DFmode:
3413 9 : case E_V4DImode:
3414 9 : if (!type || !AGGREGATE_TYPE_P (type))
3415 : {
3416 9 : if (cum->sse_nregs)
3417 9 : return gen_reg_or_parallel (mode, orig_mode,
3418 9 : cum->sse_regno + FIRST_SSE_REG);
3419 : }
3420 : break;
3421 :
3422 8 : case E_V8QImode:
3423 8 : case E_V4HImode:
3424 8 : case E_V4HFmode:
3425 8 : case E_V4BFmode:
3426 8 : case E_V2SImode:
3427 8 : case E_V2SFmode:
3428 8 : case E_V1TImode:
3429 8 : case E_V1DImode:
3430 8 : if (!type || !AGGREGATE_TYPE_P (type))
3431 : {
3432 8 : if (cum->mmx_nregs)
3433 8 : return gen_reg_or_parallel (mode, orig_mode,
3434 8 : cum->mmx_regno + FIRST_MMX_REG);
3435 : }
3436 : break;
3437 : }
3438 4313 : if (error_p)
3439 : {
3440 0 : cum->float_in_sse = 0;
3441 0 : error ("calling %qD with SSE calling convention without "
3442 : "SSE/SSE2 enabled", cum->decl);
3443 0 : sorry ("this is a GCC bug that can be worked around by adding "
3444 : "attribute used to function called");
3445 : }
3446 :
3447 : return NULL_RTX;
3448 : }
3449 :
3450 : static rtx
3451 18655554 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3452 : machine_mode orig_mode, const_tree type, bool named)
3453 : {
3454 : /* Handle a hidden AL argument containing number of registers
3455 : for varargs x86-64 functions. */
3456 18655554 : if (mode == VOIDmode)
3457 5174802 : return GEN_INT (cum->maybe_vaarg
3458 : ? (cum->sse_nregs < 0
3459 : ? X86_64_SSE_REGPARM_MAX
3460 : : cum->sse_regno)
3461 : : -1);
3462 :
3463 13480752 : switch (mode)
3464 : {
3465 : default:
3466 : break;
3467 :
3468 90563 : case E_V16HFmode:
3469 90563 : case E_V16BFmode:
3470 90563 : case E_V8SFmode:
3471 90563 : case E_V8SImode:
3472 90563 : case E_V32QImode:
3473 90563 : case E_V16HImode:
3474 90563 : case E_V4DFmode:
3475 90563 : case E_V4DImode:
3476 90563 : case E_V32HFmode:
3477 90563 : case E_V32BFmode:
3478 90563 : case E_V16SFmode:
3479 90563 : case E_V16SImode:
3480 90563 : case E_V64QImode:
3481 90563 : case E_V32HImode:
3482 90563 : case E_V8DFmode:
3483 90563 : case E_V8DImode:
3484 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3485 90563 : if (!named)
3486 : return NULL;
3487 : break;
3488 : }
3489 :
3490 13480388 : const int *parm_regs;
3491 13480388 : if (cum->preserve_none_abi)
3492 : parm_regs = x86_64_preserve_none_int_parameter_registers;
3493 : else
3494 13480259 : parm_regs = x86_64_int_parameter_registers;
3495 :
3496 13480388 : return construct_container (mode, orig_mode, type, false,
3497 13480388 : cum->nregs, cum->sse_nregs,
3498 13480388 : &parm_regs[cum->regno],
3499 13480388 : cum->sse_regno);
3500 : }
3501 :
3502 : static rtx
3503 296428 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3504 : machine_mode orig_mode, bool named, const_tree type,
3505 : HOST_WIDE_INT bytes)
3506 : {
3507 296428 : unsigned int regno;
3508 :
3509 : /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3510 : We use value of -2 to specify that current function call is MSABI. */
3511 296428 : if (mode == VOIDmode)
3512 36295 : return GEN_INT (-2);
3513 :
3514 : /* If we've run out of registers, it goes on the stack. */
3515 260133 : if (cum->nregs == 0)
3516 : return NULL_RTX;
3517 :
3518 176374 : regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3519 :
3520 : /* Only floating point modes less than 64 bits are passed in anything but
3521 : integer regs. Larger floating point types are excluded as the Windows
3522 : ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
3523 176374 : if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
3524 : {
3525 38260 : if (named)
3526 : {
3527 38260 : if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3528 37263 : regno = cum->regno + FIRST_SSE_REG;
3529 : }
3530 : else
3531 : {
3532 0 : rtx t1, t2;
3533 :
3534 : /* Unnamed floating parameters are passed in both the
3535 : SSE and integer registers. */
3536 0 : t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3537 0 : t2 = gen_rtx_REG (mode, regno);
3538 0 : t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3539 0 : t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3540 0 : return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3541 : }
3542 : }
3543 : /* Handle aggregated types passed in register. */
3544 176374 : if (orig_mode == BLKmode)
3545 : {
3546 0 : if (bytes > 0 && bytes <= 8)
3547 0 : mode = (bytes > 4 ? DImode : SImode);
3548 0 : if (mode == BLKmode)
3549 0 : mode = DImode;
3550 : }
3551 :
3552 176374 : return gen_reg_or_parallel (mode, orig_mode, regno);
3553 : }
3554 :
3555 : /* Return where to put the arguments to a function.
3556 : Return zero to push the argument on the stack, or a hard register in which to store the argument.
3557 :
3558 : ARG describes the argument while CUM gives information about the
3559 : preceding args and about the function being called. */
3560 :
3561 : static rtx
3562 21509463 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3563 : {
3564 21509463 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3565 21509463 : machine_mode mode = arg.mode;
3566 21509463 : HOST_WIDE_INT bytes, words;
3567 21509463 : rtx reg;
3568 :
3569 21509463 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3570 : {
3571 187 : gcc_assert (arg.type != NULL_TREE);
3572 187 : if (POINTER_TYPE_P (arg.type))
3573 : {
3574 : /* This is the pointer argument. */
3575 122 : gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3576 : /* It is at -WORD(AP) in the current frame in interrupt and
3577 : exception handlers. */
3578 122 : reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3579 : }
3580 : else
3581 : {
3582 65 : gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3583 : && TREE_CODE (arg.type) == INTEGER_TYPE
3584 : && TYPE_MODE (arg.type) == word_mode);
3585 : /* The error code is the word-mode integer argument at
3586 : -2 * WORD(AP) in the current frame of the exception
3587 : handler. */
3588 65 : reg = gen_rtx_MEM (word_mode,
3589 65 : plus_constant (Pmode,
3590 : arg_pointer_rtx,
3591 65 : -2 * UNITS_PER_WORD));
3592 : }
3593 187 : return reg;
3594 : }
3595 :
3596 21509276 : bytes = arg.promoted_size_in_bytes ();
3597 21509276 : words = CEIL (bytes, UNITS_PER_WORD);
3598 :
3599 : /* To simplify the code below, represent vector types with a vector mode
3600 : even if MMX/SSE are not active. */
3601 21509276 : if (arg.type && VECTOR_TYPE_P (arg.type))
3602 172194 : mode = type_natural_mode (arg.type, cum, false);
3603 :
3604 21509276 : if (TARGET_64BIT)
3605 : {
3606 18951982 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3607 :
3608 18951982 : if (call_abi == MS_ABI)
3609 296428 : reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3610 296428 : arg.type, bytes);
3611 : else
3612 18655554 : reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3613 : }
3614 : else
3615 2557294 : reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3616 :
3617 : /* Track if there are outgoing arguments on stack. */
3618 21509276 : if (reg == NULL_RTX && cum->caller)
3619 2201158 : cfun->machine->outgoing_args_on_stack = true;
3620 :
3621 : return reg;
3622 : }
3623 :
3624 : /* A C expression that indicates when an argument must be passed by
3625 : reference. If nonzero for an argument, a copy of that argument is
3626 : made in memory and a pointer to the argument is passed instead of
3627 : the argument itself. The pointer is passed in whatever way is
3628 : appropriate for passing a pointer to that type. */
3629 :
3630 : static bool
3631 21443588 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3632 : {
3633 21443588 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3634 :
3635 21443588 : if (TARGET_64BIT)
3636 : {
3637 19323398 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3638 :
3639 : /* See Windows x64 Software Convention. */
3640 19323398 : if (call_abi == MS_ABI)
3641 : {
3642 441562 : HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3643 :
3644 441562 : if (tree type = arg.type)
3645 : {
3646 : /* Arrays are passed by reference. */
3647 441562 : if (TREE_CODE (type) == ARRAY_TYPE)
3648 : return true;
3649 :
3650 441562 : if (RECORD_OR_UNION_TYPE_P (type))
3651 : {
3652 : /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3653 : are passed by reference. */
3654 15103 : msize = int_size_in_bytes (type);
3655 : }
3656 : }
3657 :
3658 : /* __m128 is passed by reference. */
3659 873131 : return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3660 : }
3661 18881836 : else if (arg.type && int_size_in_bytes (arg.type) == -1)
3662 : return true;
3663 : }
3664 :
3665 : return false;
3666 : }
3667 :
3668 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3669 : passing ABI. XXX: This function is obsolete and is only used for
3670 : checking psABI compatibility with previous versions of GCC. */
3671 :
3672 : static bool
3673 1975539 : ix86_compat_aligned_value_p (const_tree type)
3674 : {
3675 1975539 : machine_mode mode = TYPE_MODE (type);
3676 1975539 : if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3677 1975497 : || mode == TDmode
3678 1975497 : || mode == TFmode
3679 : || mode == TCmode)
3680 1975751 : && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3681 : return true;
3682 1975327 : if (TYPE_ALIGN (type) < 128)
3683 : return false;
3684 :
3685 0 : if (AGGREGATE_TYPE_P (type))
3686 : {
3687 : /* Walk the aggregates recursively. */
3688 0 : switch (TREE_CODE (type))
3689 : {
3690 0 : case RECORD_TYPE:
3691 0 : case UNION_TYPE:
3692 0 : case QUAL_UNION_TYPE:
3693 0 : {
3694 0 : tree field;
3695 :
3696 : /* Walk all the structure fields. */
3697 0 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3698 : {
3699 0 : if (TREE_CODE (field) == FIELD_DECL
3700 0 : && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3701 : return true;
3702 : }
3703 : break;
3704 : }
3705 :
3706 0 : case ARRAY_TYPE:
3707 : /* Just for use if some languages passes arrays by value. */
3708 0 : if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3709 : return true;
3710 : break;
3711 :
3712 : default:
3713 : gcc_unreachable ();
3714 : }
3715 : }
3716 : return false;
3717 : }
3718 :
3719 : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3720 : XXX: This function is obsolete and is only used for checking psABI
3721 : compatibility with previous versions of GCC. */
3722 :
3723 : static unsigned int
3724 5554748 : ix86_compat_function_arg_boundary (machine_mode mode,
3725 : const_tree type, unsigned int align)
3726 : {
3727 : /* In 32bit, only _Decimal128 and __float128 are aligned to their
3728 : natural boundaries. */
3729 5554748 : if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3730 : {
3731 : /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3732 : make an exception for SSE modes since these require 128bit
3733 : alignment.
3734 :
3735 : The handling here differs from field_alignment. ICC aligns MMX
3736 : arguments to 4 byte boundaries, while structure fields are aligned
3737 : to 8 byte boundaries. */
3738 1987447 : if (!type)
3739 : {
3740 11908 : if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3741 1987235 : align = PARM_BOUNDARY;
3742 : }
3743 : else
3744 : {
3745 1975539 : if (!ix86_compat_aligned_value_p (type))
3746 1987235 : align = PARM_BOUNDARY;
3747 : }
3748 : }
3749 10708329 : if (align > BIGGEST_ALIGNMENT)
3750 90 : align = BIGGEST_ALIGNMENT;
3751 5554748 : return align;
3752 : }
3753 :
3754 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3755 : passing ABI. */
3756 :
3757 : static bool
3758 1978222 : ix86_contains_aligned_value_p (const_tree type)
3759 : {
3760 1978222 : machine_mode mode = TYPE_MODE (type);
3761 :
3762 1978222 : if (mode == XFmode || mode == XCmode)
3763 : return false;
3764 :
3765 1976081 : if (TYPE_ALIGN (type) < 128)
3766 : return false;
3767 :
3768 2895 : if (AGGREGATE_TYPE_P (type))
3769 : {
3770 : /* Walk the aggregates recursively. */
3771 0 : switch (TREE_CODE (type))
3772 : {
3773 0 : case RECORD_TYPE:
3774 0 : case UNION_TYPE:
3775 0 : case QUAL_UNION_TYPE:
3776 0 : {
3777 0 : tree field;
3778 :
3779 : /* Walk all the structure fields. */
3780 0 : for (field = TYPE_FIELDS (type);
3781 0 : field;
3782 0 : field = DECL_CHAIN (field))
3783 : {
3784 0 : if (TREE_CODE (field) == FIELD_DECL
3785 0 : && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3786 : return true;
3787 : }
3788 : break;
3789 : }
3790 :
3791 0 : case ARRAY_TYPE:
3792 : /* Just for use if some languages passes arrays by value. */
3793 0 : if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3794 : return true;
3795 : break;
3796 :
3797 : default:
3798 : gcc_unreachable ();
3799 : }
3800 : }
3801 : else
3802 2895 : return TYPE_ALIGN (type) >= 128;
3803 :
3804 : return false;
3805 : }
3806 :
3807 : /* Gives the alignment boundary, in bits, of an argument with the
3808 : specified mode and type. */
3809 :
3810 : static unsigned int
3811 10962581 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
3812 : {
3813 10962581 : unsigned int align;
3814 10962581 : if (type)
3815 : {
3816 : /* Since the main variant type is used for call, we convert it to
3817 : the main variant type. */
3818 10922796 : type = TYPE_MAIN_VARIANT (type);
3819 10922796 : align = TYPE_ALIGN (type);
3820 10922796 : if (TYPE_EMPTY_P (type))
3821 25004 : return PARM_BOUNDARY;
3822 : }
3823 : else
3824 39785 : align = GET_MODE_ALIGNMENT (mode);
3825 12962922 : if (align < PARM_BOUNDARY)
3826 4120158 : align = PARM_BOUNDARY;
3827 : else
3828 : {
3829 6817419 : static bool warned;
3830 6817419 : unsigned int saved_align = align;
3831 :
3832 6817419 : if (!TARGET_64BIT)
3833 : {
3834 : /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3835 2013984 : if (!type)
3836 : {
3837 35762 : if (mode == XFmode || mode == XCmode)
3838 : align = PARM_BOUNDARY;
3839 : }
3840 1978222 : else if (!ix86_contains_aligned_value_p (type))
3841 : align = PARM_BOUNDARY;
3842 :
3843 38657 : if (align < 128)
3844 1987235 : align = PARM_BOUNDARY;
3845 : }
3846 :
3847 6817419 : if (warn_psabi
3848 5559584 : && !warned
3849 12372167 : && align != ix86_compat_function_arg_boundary (mode, type,
3850 : saved_align))
3851 : {
3852 90 : warned = true;
3853 90 : inform (input_location,
3854 : "the ABI for passing parameters with %d-byte"
3855 : " alignment has changed in GCC 4.6",
3856 : align / BITS_PER_UNIT);
3857 : }
3858 : }
3859 :
3860 : return align;
3861 : }
3862 :
3863 : /* Return true if N is a possible register number of function value. */
3864 :
3865 : static bool
3866 4689721 : ix86_function_value_regno_p (const unsigned int regno)
3867 : {
3868 4689721 : switch (regno)
3869 : {
3870 : case AX_REG:
3871 : return true;
3872 102091 : case DX_REG:
3873 102091 : return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3874 :
3875 : /* Complex values are returned in %st(0)/%st(1) pair. */
3876 25299 : case ST0_REG:
3877 25299 : case ST1_REG:
3878 : /* TODO: The function should depend on current function ABI but
3879 : builtins.cc would need updating then. Therefore we use the
3880 : default ABI. */
3881 25299 : if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3882 : return false;
3883 25299 : return TARGET_FLOAT_RETURNS_IN_80387;
3884 :
3885 : /* Complex values are returned in %xmm0/%xmm1 pair. */
3886 1291414 : case XMM0_REG:
3887 1291414 : case XMM1_REG:
3888 1291414 : return TARGET_SSE;
3889 :
3890 10080 : case MM0_REG:
3891 10080 : if (TARGET_MACHO || TARGET_64BIT)
3892 : return false;
3893 2494 : return TARGET_MMX;
3894 : }
3895 :
3896 : return false;
3897 : }
3898 :
3899 : /* Check whether the register REGNO should be zeroed on X86.
3900 : When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3901 : together, no need to zero it again.
3902 : When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3903 :
3904 : static bool
3905 1377 : zero_call_used_regno_p (const unsigned int regno,
3906 : bool all_sse_zeroed,
3907 : bool need_zero_mmx)
3908 : {
3909 835 : return GENERAL_REGNO_P (regno)
3910 819 : || (!all_sse_zeroed && SSE_REGNO_P (regno))
3911 439 : || MASK_REGNO_P (regno)
3912 1800 : || (need_zero_mmx && MMX_REGNO_P (regno));
3913 : }
3914 :
3915 : /* Return the machine_mode that is used to zero register REGNO. */
3916 :
3917 : static machine_mode
3918 954 : zero_call_used_regno_mode (const unsigned int regno)
3919 : {
3920 : /* NB: We only need to zero the lower 32 bits for integer registers
3921 : and the lower 128 bits for vector registers since destination are
3922 : zero-extended to the full register width. */
3923 954 : if (GENERAL_REGNO_P (regno))
3924 : return SImode;
3925 : else if (SSE_REGNO_P (regno))
3926 380 : return V4SFmode;
3927 : else if (MASK_REGNO_P (regno))
3928 : return HImode;
3929 : else if (MMX_REGNO_P (regno))
3930 0 : return V2SImode;
3931 : else
3932 0 : gcc_unreachable ();
3933 : }
3934 :
3935 : /* Generate a rtx to zero all vector registers together if possible,
3936 : otherwise, return NULL. */
3937 :
3938 : static rtx
3939 131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3940 : {
3941 131 : if (!TARGET_AVX)
3942 : return NULL;
3943 :
3944 372 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3945 368 : if ((LEGACY_SSE_REGNO_P (regno)
3946 336 : || (TARGET_64BIT
3947 336 : && (REX_SSE_REGNO_P (regno)
3948 304 : || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3949 432 : && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3950 : return NULL;
3951 :
3952 4 : return gen_avx_vzeroall ();
3953 : }
3954 :
3955 : /* Generate insns to zero all st registers together.
3956 : Return true when zeroing instructions are generated.
3957 : Assume the number of st registers that are zeroed is num_of_st,
3958 : we will emit the following sequence to zero them together:
3959 : fldz; \
3960 : fldz; \
3961 : ...
3962 : fldz; \
3963 : fstp %%st(0); \
3964 : fstp %%st(0); \
3965 : ...
3966 : fstp %%st(0);
3967 : i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3968 : mark stack slots empty.
3969 :
3970 : How to compute the num_of_st:
3971 : There is no direct mapping from stack registers to hard register
3972 : numbers. If one stack register needs to be cleared, we don't know
3973 : where in the stack the value remains. So, if any stack register
3974 : needs to be cleared, the whole stack should be cleared. However,
3975 : x87 stack registers that hold the return value should be excluded.
3976 : x87 returns in the top (two for complex values) register, so
3977 : num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3978 : return the value of num_of_st. */
3979 :
3980 :
3981 : static int
3982 131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3983 : {
3984 :
3985 : /* If the FPU is disabled, no need to zero all st registers. */
3986 131 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3987 : return 0;
3988 :
3989 10329 : unsigned int num_of_st = 0;
3990 10329 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3991 10220 : if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3992 10220 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3993 : {
3994 : num_of_st++;
3995 : break;
3996 : }
3997 :
3998 130 : if (num_of_st == 0)
3999 : return 0;
4000 :
4001 21 : bool return_with_x87 = false;
4002 42 : return_with_x87 = (crtl->return_rtx
4003 21 : && (STACK_REG_P (crtl->return_rtx)));
4004 :
4005 21 : bool complex_return = false;
4006 42 : complex_return = (crtl->return_rtx
4007 21 : && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
4008 :
4009 21 : if (return_with_x87)
4010 2 : if (complex_return)
4011 : num_of_st = 6;
4012 : else
4013 1 : num_of_st = 7;
4014 : else
4015 : num_of_st = 8;
4016 :
4017 21 : rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
4018 186 : for (unsigned int i = 0; i < num_of_st; i++)
4019 165 : emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
4020 :
4021 186 : for (unsigned int i = 0; i < num_of_st; i++)
4022 : {
4023 165 : rtx insn;
4024 165 : insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
4025 165 : add_reg_note (insn, REG_DEAD, st_reg);
4026 : }
4027 21 : return num_of_st;
4028 : }
4029 :
4030 :
4031 : /* When the routine exit in MMX mode, if any ST register needs
4032 : to be zeroed, we should clear all MMX registers except the
4033 : RET_MMX_REGNO that holds the return value. */
4034 : static bool
4035 0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
4036 : unsigned int ret_mmx_regno)
4037 : {
4038 0 : bool need_zero_all_mm = false;
4039 0 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4040 0 : if (STACK_REGNO_P (regno)
4041 0 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4042 : {
4043 : need_zero_all_mm = true;
4044 : break;
4045 : }
4046 :
4047 0 : if (!need_zero_all_mm)
4048 : return false;
4049 :
4050 : machine_mode mode = V2SImode;
4051 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4052 0 : if (regno != ret_mmx_regno)
4053 : {
4054 0 : rtx reg = gen_rtx_REG (mode, regno);
4055 0 : emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
4056 : }
4057 : return true;
4058 : }
4059 :
4060 : /* TARGET_ZERO_CALL_USED_REGS. */
4061 : /* Generate a sequence of instructions that zero registers specified by
4062 : NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
4063 : zeroed. */
4064 : static HARD_REG_SET
4065 131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
4066 : {
4067 131 : HARD_REG_SET zeroed_hardregs;
4068 131 : bool all_sse_zeroed = false;
4069 131 : int all_st_zeroed_num = 0;
4070 131 : bool all_mm_zeroed = false;
4071 :
4072 131 : CLEAR_HARD_REG_SET (zeroed_hardregs);
4073 :
4074 : /* first, let's see whether we can zero all vector registers together. */
4075 131 : rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4076 131 : if (zero_all_vec_insn)
4077 : {
4078 4 : emit_insn (zero_all_vec_insn);
4079 4 : all_sse_zeroed = true;
4080 4 : if (TARGET_64BIT && TARGET_AVX512F)
4081 : {
4082 2 : rtx zero = CONST0_RTX (V4SFmode);
4083 34 : for (unsigned int regno = XMM16_REG;
4084 34 : regno <= XMM31_REG;
4085 : regno++)
4086 : {
4087 32 : rtx reg = gen_rtx_REG (V4SFmode, regno);
4088 32 : emit_move_insn (reg, zero);
4089 : }
4090 : }
4091 : }
4092 :
4093 : /* mm/st registers are shared registers set, we should follow the following
4094 : rules to clear them:
4095 : MMX exit mode x87 exit mode
4096 : -------------|----------------------|---------------
4097 : uses x87 reg | clear all MMX | clear all x87
4098 : uses MMX reg | clear individual MMX | clear all x87
4099 : x87 + MMX | clear all MMX | clear all x87
4100 :
4101 : first, we should decide which mode (MMX mode or x87 mode) the function
4102 : exit with. */
4103 :
4104 131 : bool exit_with_mmx_mode = (crtl->return_rtx
4105 131 : && (MMX_REG_P (crtl->return_rtx)));
4106 :
4107 131 : if (!exit_with_mmx_mode)
4108 : /* x87 exit mode, we should zero all st registers together. */
4109 : {
4110 131 : all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4111 :
4112 131 : if (all_st_zeroed_num > 0)
4113 189 : for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4114 : /* x87 stack registers that hold the return value should be excluded.
4115 : x87 returns in the top (two for complex values) register. */
4116 168 : if (all_st_zeroed_num == 8
4117 168 : || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4118 : || (all_st_zeroed_num == 6
4119 7 : && (regno == (REGNO (crtl->return_rtx) + 1)))))
4120 165 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4121 : }
4122 : else
4123 : /* MMX exit mode, check whether we can zero all mm registers. */
4124 : {
4125 0 : unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4126 0 : all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4127 : exit_mmx_regno);
4128 0 : if (all_mm_zeroed)
4129 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4130 0 : if (regno != exit_mmx_regno)
4131 0 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4132 : }
4133 :
4134 : /* Now, generate instructions to zero all the other registers. */
4135 :
4136 12183 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4137 : {
4138 12052 : if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4139 10675 : continue;
4140 1800 : if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4141 1377 : exit_with_mmx_mode && !all_mm_zeroed))
4142 423 : continue;
4143 :
4144 954 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4145 :
4146 954 : machine_mode mode = zero_call_used_regno_mode (regno);
4147 :
4148 954 : rtx reg = gen_rtx_REG (mode, regno);
4149 954 : rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4150 :
4151 954 : switch (mode)
4152 : {
4153 558 : case E_SImode:
4154 558 : if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4155 : {
4156 558 : rtx clob = gen_rtx_CLOBBER (VOIDmode,
4157 : gen_rtx_REG (CCmode,
4158 : FLAGS_REG));
4159 558 : tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4160 : tmp,
4161 : clob));
4162 : }
4163 : /* FALLTHRU. */
4164 :
4165 954 : case E_V4SFmode:
4166 954 : case E_HImode:
4167 954 : case E_V2SImode:
4168 954 : emit_insn (tmp);
4169 954 : break;
4170 :
4171 0 : default:
4172 0 : gcc_unreachable ();
4173 : }
4174 : }
4175 131 : return zeroed_hardregs;
4176 : }
4177 :
4178 : /* Define how to find the value returned by a function.
4179 : VALTYPE is the data type of the value (as a tree).
4180 : If the precise function being called is known, FUNC is its FUNCTION_DECL;
4181 : otherwise, FUNC is 0. */
4182 :
4183 : static rtx
4184 3933821 : function_value_32 (machine_mode orig_mode, machine_mode mode,
4185 : const_tree fntype, const_tree fn)
4186 : {
4187 3933821 : unsigned int regno;
4188 :
4189 : /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4190 : we normally prevent this case when mmx is not available. However
4191 : some ABIs may require the result to be returned like DImode. */
4192 4201906 : if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4193 : regno = FIRST_MMX_REG;
4194 :
4195 : /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4196 : we prevent this case when sse is not available. However some ABIs
4197 : may require the result to be returned like integer TImode. */
4198 3924545 : else if (mode == TImode
4199 4183354 : || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4200 : regno = FIRST_SSE_REG;
4201 :
4202 : /* 32-byte vector modes in %ymm0. */
4203 3965456 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4204 : regno = FIRST_SSE_REG;
4205 :
4206 : /* 64-byte vector modes in %zmm0. */
4207 3821258 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4208 : regno = FIRST_SSE_REG;
4209 :
4210 : /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4211 3665736 : else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4212 : regno = FIRST_FLOAT_REG;
4213 : else
4214 : /* Most things go in %eax. */
4215 3601349 : regno = AX_REG;
4216 :
4217 : /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4218 3933821 : if (mode == HFmode || mode == BFmode)
4219 : {
4220 1907 : if (!TARGET_SSE2)
4221 : {
4222 0 : error ("SSE register return with SSE2 disabled");
4223 0 : regno = AX_REG;
4224 : }
4225 : else
4226 : regno = FIRST_SSE_REG;
4227 : }
4228 :
4229 3933821 : if (mode == HCmode)
4230 : {
4231 129 : if (!TARGET_SSE2)
4232 0 : error ("SSE register return with SSE2 disabled");
4233 :
4234 129 : rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4235 258 : XVECEXP (ret, 0, 0)
4236 258 : = gen_rtx_EXPR_LIST (VOIDmode,
4237 : gen_rtx_REG (SImode,
4238 129 : TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4239 : GEN_INT (0));
4240 129 : return ret;
4241 : }
4242 :
4243 : /* Override FP return register with %xmm0 for local functions when
4244 : SSE math is enabled or for functions with sseregparm attribute. */
4245 3933692 : if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4246 : {
4247 50308 : int sse_level = ix86_function_sseregparm (fntype, fn, false);
4248 50308 : if (sse_level == -1)
4249 : {
4250 0 : error ("calling %qD with SSE calling convention without "
4251 : "SSE/SSE2 enabled", fn);
4252 0 : sorry ("this is a GCC bug that can be worked around by adding "
4253 : "attribute used to function called");
4254 : }
4255 50308 : else if ((sse_level >= 1 && mode == SFmode)
4256 50308 : || (sse_level == 2 && mode == DFmode))
4257 : regno = FIRST_SSE_REG;
4258 : }
4259 :
4260 : /* OImode shouldn't be used directly. */
4261 3933692 : gcc_assert (mode != OImode);
4262 :
4263 3933692 : return gen_rtx_REG (orig_mode, regno);
4264 : }
4265 :
4266 : static rtx
4267 97769118 : function_value_64 (machine_mode orig_mode, machine_mode mode,
4268 : const_tree valtype)
4269 : {
4270 97769118 : rtx ret;
4271 :
4272 : /* Handle libcalls, which don't provide a type node. */
4273 97769118 : if (valtype == NULL)
4274 : {
4275 102755 : unsigned int regno;
4276 :
4277 102755 : switch (mode)
4278 : {
4279 : case E_BFmode:
4280 : case E_HFmode:
4281 : case E_HCmode:
4282 : case E_SFmode:
4283 : case E_SCmode:
4284 : case E_DFmode:
4285 : case E_DCmode:
4286 : case E_TFmode:
4287 : case E_SDmode:
4288 : case E_DDmode:
4289 : case E_TDmode:
4290 : regno = FIRST_SSE_REG;
4291 : break;
4292 1040 : case E_XFmode:
4293 1040 : case E_XCmode:
4294 1040 : regno = FIRST_FLOAT_REG;
4295 1040 : break;
4296 : case E_TCmode:
4297 : return NULL;
4298 56288 : default:
4299 56288 : regno = AX_REG;
4300 : }
4301 :
4302 102755 : return gen_rtx_REG (mode, regno);
4303 : }
4304 97666363 : else if (POINTER_TYPE_P (valtype))
4305 : {
4306 : /* Pointers are always returned in word_mode. */
4307 16048163 : mode = word_mode;
4308 : }
4309 :
4310 97666363 : ret = construct_container (mode, orig_mode, valtype, true,
4311 : X86_64_MAX_RETURN_NREGS,
4312 : X86_64_MAX_SSE_RETURN_NREGS,
4313 : x86_64_int_return_registers, 0);
4314 :
4315 : /* For zero sized structures, construct_container returns NULL, but we
4316 : need to keep rest of compiler happy by returning meaningful value. */
4317 97666363 : if (!ret)
4318 200846 : ret = gen_rtx_REG (orig_mode, AX_REG);
4319 :
4320 : return ret;
4321 : }
4322 :
4323 : static rtx
4324 0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4325 : const_tree fntype, const_tree fn, const_tree valtype)
4326 : {
4327 0 : unsigned int regno;
4328 :
4329 : /* Floating point return values in %st(0)
4330 : (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4331 0 : if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4332 0 : && (GET_MODE_SIZE (mode) > 8
4333 0 : || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4334 : {
4335 0 : regno = FIRST_FLOAT_REG;
4336 0 : return gen_rtx_REG (orig_mode, regno);
4337 : }
4338 : else
4339 0 : return function_value_32(orig_mode, mode, fntype,fn);
4340 : }
4341 :
4342 : static rtx
4343 787708 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4344 : const_tree valtype)
4345 : {
4346 787708 : unsigned int regno = AX_REG;
4347 :
4348 787708 : if (TARGET_SSE)
4349 : {
4350 786981 : unsigned int mode_size = GET_MODE_SIZE (mode);
4351 :
4352 786981 : switch (mode_size)
4353 : {
4354 34397 : case 16:
4355 34397 : case 32:
4356 34397 : case 64:
4357 34397 : if (mode_size == 32 && !TARGET_AVX)
4358 : break;
4359 34397 : if (mode_size == 64 && !TARGET_AVX512F)
4360 : break;
4361 34397 : if (valtype != NULL_TREE
4362 34397 : && !VECTOR_INTEGER_TYPE_P (valtype)
4363 15828 : && !INTEGRAL_TYPE_P (valtype)
4364 50225 : && !VECTOR_FLOAT_TYPE_P (valtype))
4365 : break;
4366 34397 : if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4367 : && !COMPLEX_MODE_P (mode))
4368 218079 : regno = FIRST_SSE_REG;
4369 : break;
4370 741282 : case 8:
4371 741282 : case 4:
4372 741282 : case 2:
4373 741282 : if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4374 : break;
4375 723988 : if (mode == HFmode || mode == SFmode || mode == DFmode)
4376 218079 : regno = FIRST_SSE_REG;
4377 : break;
4378 : default:
4379 : break;
4380 : }
4381 : }
4382 787708 : return gen_rtx_REG (orig_mode, regno);
4383 : }
4384 :
4385 : static rtx
4386 102490647 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4387 : machine_mode orig_mode, machine_mode mode)
4388 : {
4389 102490647 : const_tree fn, fntype;
4390 :
4391 102490647 : fn = NULL_TREE;
4392 102490647 : if (fntype_or_decl && DECL_P (fntype_or_decl))
4393 3535719 : fn = fntype_or_decl;
4394 3535719 : fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4395 :
4396 102490647 : if (ix86_function_type_abi (fntype) == MS_ABI)
4397 : {
4398 787708 : if (TARGET_64BIT)
4399 787708 : return function_value_ms_64 (orig_mode, mode, valtype);
4400 : else
4401 0 : return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4402 : }
4403 101702939 : else if (TARGET_64BIT)
4404 97769118 : return function_value_64 (orig_mode, mode, valtype);
4405 : else
4406 3933821 : return function_value_32 (orig_mode, mode, fntype, fn);
4407 : }
4408 :
4409 : static rtx
4410 102384754 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4411 : {
4412 102384754 : machine_mode mode, orig_mode;
4413 :
4414 102384754 : orig_mode = TYPE_MODE (valtype);
4415 102384754 : mode = type_natural_mode (valtype, NULL, true);
4416 102384754 : return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4417 : }
4418 :
4419 : /* Pointer function arguments and return values are promoted to
4420 : word_mode for normal functions. */
4421 :
4422 : static machine_mode
4423 31959264 : ix86_promote_function_mode (const_tree type, machine_mode mode,
4424 : int *punsignedp, const_tree fntype,
4425 : int for_return)
4426 : {
4427 31959264 : if (cfun->machine->func_type == TYPE_NORMAL
4428 31958241 : && type != NULL_TREE
4429 31924110 : && POINTER_TYPE_P (type))
4430 : {
4431 15932190 : *punsignedp = POINTERS_EXTEND_UNSIGNED;
4432 15932190 : return word_mode;
4433 : }
4434 16027074 : return default_promote_function_mode (type, mode, punsignedp, fntype,
4435 16027074 : for_return);
4436 : }
4437 :
4438 : /* Return true if a structure, union or array with MODE containing FIELD
4439 : should be accessed using BLKmode. */
4440 :
4441 : static bool
4442 143127462 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4443 : {
4444 : /* Union with XFmode must be in BLKmode. */
4445 143127462 : return (mode == XFmode
4446 143264159 : && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4447 129665 : || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4448 : }
4449 :
4450 : rtx
4451 105893 : ix86_libcall_value (machine_mode mode)
4452 : {
4453 105893 : return ix86_function_value_1 (NULL, NULL, mode, mode);
4454 : }
4455 :
4456 : /* Return true iff type is returned in memory. */
4457 :
4458 : static bool
4459 104383167 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4460 : {
4461 104383167 : const machine_mode mode = type_natural_mode (type, NULL, true);
4462 104383167 : HOST_WIDE_INT size;
4463 :
4464 104383167 : if (TARGET_64BIT)
4465 : {
4466 99840893 : if (ix86_function_type_abi (fntype) == MS_ABI)
4467 : {
4468 707133 : size = int_size_in_bytes (type);
4469 :
4470 : /* __m128 is returned in xmm0. 256/512-bit vector values are
4471 : returned in ymm0/zmm0 when AVX/AVX512 is enabled. */
4472 707133 : if ((!type || VECTOR_INTEGER_TYPE_P (type)
4473 687562 : || INTEGRAL_TYPE_P (type)
4474 217159 : || VECTOR_FLOAT_TYPE_P (type))
4475 505802 : && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4476 : && !COMPLEX_MODE_P (mode)
4477 1212935 : && ((GET_MODE_SIZE (mode) == 16 || size == 16)
4478 519126 : || (TARGET_AVX && (GET_MODE_SIZE (mode) == 32 || size == 32))
4479 482061 : || (TARGET_AVX512F
4480 16590 : && (GET_MODE_SIZE (mode) == 64 || size == 64))))
4481 : return false;
4482 :
4483 : /* Otherwise, the size must be exactly in [1248]. */
4484 1329376 : return size != 1 && size != 2 && size != 4 && size != 8;
4485 : }
4486 : else
4487 : {
4488 99133760 : int needed_intregs, needed_sseregs;
4489 :
4490 99133760 : return examine_argument (mode, type, true,
4491 : &needed_intregs, &needed_sseregs);
4492 : }
4493 : }
4494 : else
4495 : {
4496 4542274 : size = int_size_in_bytes (type);
4497 :
4498 : /* Intel MCU psABI returns scalars and aggregates no larger than 8
4499 : bytes in registers. */
4500 4542274 : if (TARGET_IAMCU)
4501 0 : return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4502 :
4503 4542274 : if (mode == BLKmode)
4504 : return true;
4505 :
4506 4542274 : if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4507 : return false;
4508 :
4509 4542274 : if (VECTOR_MODE_P (mode) || mode == TImode)
4510 : {
4511 : /* User-created vectors small enough to fit in EAX. */
4512 268055 : if (size < 8)
4513 : return false;
4514 :
4515 : /* Unless ABI prescribes otherwise,
4516 : MMX/3dNow values are returned in MM0 if available. */
4517 :
4518 268055 : if (size == 8)
4519 9266 : return TARGET_VECT8_RETURNS || !TARGET_MMX;
4520 :
4521 : /* SSE values are returned in XMM0 if available. */
4522 258789 : if (size == 16)
4523 108939 : return !TARGET_SSE;
4524 :
4525 : /* AVX values are returned in YMM0 if available. */
4526 149850 : if (size == 32)
4527 72090 : return !TARGET_AVX;
4528 :
4529 : /* AVX512F values are returned in ZMM0 if available. */
4530 77760 : if (size == 64)
4531 77760 : return !TARGET_AVX512F;
4532 : }
4533 :
4534 4274219 : if (mode == XFmode)
4535 : return false;
4536 :
4537 4262499 : if (size > 12)
4538 : return true;
4539 :
4540 : /* OImode shouldn't be used directly. */
4541 3280693 : gcc_assert (mode != OImode);
4542 :
4543 : return false;
4544 : }
4545 : }
4546 :
4547 : /* Implement TARGET_PUSH_ARGUMENT. */
4548 :
4549 : static bool
4550 9323946 : ix86_push_argument (unsigned int npush)
4551 : {
4552 : /* If SSE2 is available, use vector move to put large argument onto
4553 : stack. NB: In 32-bit mode, use 8-byte vector move. */
4554 11750283 : return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4555 9058769 : && TARGET_PUSH_ARGS
4556 18382617 : && !ACCUMULATE_OUTGOING_ARGS);
4557 : }
4558 :
4559 :
4560 : /* Create the va_list data type. */
4561 :
4562 : static tree
4563 291323 : ix86_build_builtin_va_list_64 (void)
4564 : {
4565 291323 : tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4566 :
4567 291323 : record = lang_hooks.types.make_type (RECORD_TYPE);
4568 291323 : type_decl = build_decl (BUILTINS_LOCATION,
4569 : TYPE_DECL, get_identifier ("__va_list_tag"), record);
4570 :
4571 291323 : f_gpr = build_decl (BUILTINS_LOCATION,
4572 : FIELD_DECL, get_identifier ("gp_offset"),
4573 : unsigned_type_node);
4574 291323 : f_fpr = build_decl (BUILTINS_LOCATION,
4575 : FIELD_DECL, get_identifier ("fp_offset"),
4576 : unsigned_type_node);
4577 291323 : f_ovf = build_decl (BUILTINS_LOCATION,
4578 : FIELD_DECL, get_identifier ("overflow_arg_area"),
4579 : ptr_type_node);
4580 291323 : f_sav = build_decl (BUILTINS_LOCATION,
4581 : FIELD_DECL, get_identifier ("reg_save_area"),
4582 : ptr_type_node);
4583 :
4584 291323 : va_list_gpr_counter_field = f_gpr;
4585 291323 : va_list_fpr_counter_field = f_fpr;
4586 :
4587 291323 : DECL_FIELD_CONTEXT (f_gpr) = record;
4588 291323 : DECL_FIELD_CONTEXT (f_fpr) = record;
4589 291323 : DECL_FIELD_CONTEXT (f_ovf) = record;
4590 291323 : DECL_FIELD_CONTEXT (f_sav) = record;
4591 :
4592 291323 : TYPE_STUB_DECL (record) = type_decl;
4593 291323 : TYPE_NAME (record) = type_decl;
4594 291323 : TYPE_FIELDS (record) = f_gpr;
4595 291323 : DECL_CHAIN (f_gpr) = f_fpr;
4596 291323 : DECL_CHAIN (f_fpr) = f_ovf;
4597 291323 : DECL_CHAIN (f_ovf) = f_sav;
4598 291323 : TREE_PUBLIC (type_decl) = 1;
4599 :
4600 291323 : layout_type (record);
4601 :
4602 291323 : TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4603 291323 : NULL_TREE, TYPE_ATTRIBUTES (record));
4604 :
4605 : /* The correct type is an array type of one element. */
4606 291323 : return build_array_type (record, build_index_type (size_zero_node));
4607 : }
4608 :
4609 : /* Setup the builtin va_list data type and for 64-bit the additional
4610 : calling convention specific va_list data types. */
4611 :
4612 : static tree
4613 298479 : ix86_build_builtin_va_list (void)
4614 : {
4615 298479 : if (TARGET_64BIT)
4616 : {
4617 : /* Initialize ABI specific va_list builtin types.
4618 :
4619 : In lto1, we can encounter two va_list types:
4620 : - one as a result of the type-merge across TUs, and
4621 : - the one constructed here.
4622 : These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4623 : a type identity check in canonical_va_list_type based on
4624 : TYPE_MAIN_VARIANT (which we used to have) will not work.
4625 : Instead, we tag each va_list_type_node with its unique attribute, and
4626 : look for the attribute in the type identity check in
4627 : canonical_va_list_type.
4628 :
4629 : Tagging sysv_va_list_type_node directly with the attribute is
4630 : problematic since it's a array of one record, which will degrade into a
4631 : pointer to record when used as parameter (see build_va_arg comments for
4632 : an example), dropping the attribute in the process. So we tag the
4633 : record instead. */
4634 :
4635 : /* For SYSV_ABI we use an array of one record. */
4636 291323 : sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4637 :
4638 : /* For MS_ABI we use plain pointer to argument area. */
4639 291323 : tree char_ptr_type = build_pointer_type (char_type_node);
4640 291323 : tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4641 291323 : TYPE_ATTRIBUTES (char_ptr_type));
4642 291323 : ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4643 :
4644 291323 : return ((ix86_abi == MS_ABI)
4645 291323 : ? ms_va_list_type_node
4646 291323 : : sysv_va_list_type_node);
4647 : }
4648 : else
4649 : {
4650 : /* For i386 we use plain pointer to argument area. */
4651 7156 : return build_pointer_type (char_type_node);
4652 : }
4653 : }
4654 :
4655 : /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4656 :
4657 : static void
4658 15778 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4659 : {
4660 15778 : rtx save_area, mem;
4661 15778 : alias_set_type set;
4662 15778 : int i, max;
4663 :
4664 : /* GPR size of varargs save area. */
4665 15778 : if (cfun->va_list_gpr_size)
4666 15315 : ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4667 : else
4668 463 : ix86_varargs_gpr_size = 0;
4669 :
4670 : /* FPR size of varargs save area. We don't need it if we don't pass
4671 : anything in SSE registers. */
4672 15778 : if (TARGET_SSE && cfun->va_list_fpr_size)
4673 14718 : ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4674 : else
4675 1060 : ix86_varargs_fpr_size = 0;
4676 :
4677 15778 : if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4678 : return;
4679 :
4680 15484 : save_area = frame_pointer_rtx;
4681 15484 : set = get_varargs_alias_set ();
4682 :
4683 15484 : max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4684 15484 : if (max > X86_64_REGPARM_MAX)
4685 : max = X86_64_REGPARM_MAX;
4686 :
4687 15484 : const int *parm_regs;
4688 15484 : if (cum->preserve_none_abi)
4689 : parm_regs = x86_64_preserve_none_int_parameter_registers;
4690 : else
4691 15483 : parm_regs = x86_64_int_parameter_registers;
4692 :
4693 86019 : for (i = cum->regno; i < max; i++)
4694 : {
4695 70535 : mem = gen_rtx_MEM (word_mode,
4696 70535 : plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4697 70535 : MEM_NOTRAP_P (mem) = 1;
4698 70535 : set_mem_alias_set (mem, set);
4699 70535 : emit_move_insn (mem,
4700 70535 : gen_rtx_REG (word_mode, parm_regs[i]));
4701 : }
4702 :
4703 15484 : if (ix86_varargs_fpr_size)
4704 : {
4705 14718 : machine_mode smode;
4706 14718 : rtx_code_label *label;
4707 14718 : rtx test;
4708 :
4709 : /* Now emit code to save SSE registers. The AX parameter contains number
4710 : of SSE parameter registers used to call this function, though all we
4711 : actually check here is the zero/non-zero status. */
4712 :
4713 14718 : label = gen_label_rtx ();
4714 14718 : test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4715 14718 : emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4716 : label));
4717 :
4718 : /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4719 : we used movdqa (i.e. TImode) instead? Perhaps even better would
4720 : be if we could determine the real mode of the data, via a hook
4721 : into pass_stdarg. Ignore all that for now. */
4722 14718 : smode = V4SFmode;
4723 14718 : if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4724 4220 : crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4725 :
4726 14718 : max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4727 14718 : if (max > X86_64_SSE_REGPARM_MAX)
4728 : max = X86_64_SSE_REGPARM_MAX;
4729 :
4730 130838 : for (i = cum->sse_regno; i < max; ++i)
4731 : {
4732 116120 : mem = plus_constant (Pmode, save_area,
4733 116120 : i * 16 + ix86_varargs_gpr_size);
4734 116120 : mem = gen_rtx_MEM (smode, mem);
4735 116120 : MEM_NOTRAP_P (mem) = 1;
4736 116120 : set_mem_alias_set (mem, set);
4737 116120 : set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4738 :
4739 116120 : emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4740 : }
4741 :
4742 14718 : emit_label (label);
4743 : }
4744 : }
4745 :
4746 : static void
4747 5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4748 : {
4749 5652 : alias_set_type set = get_varargs_alias_set ();
4750 5652 : int i;
4751 :
4752 : /* Reset to zero, as there might be a sysv vaarg used
4753 : before. */
4754 5652 : ix86_varargs_gpr_size = 0;
4755 5652 : ix86_varargs_fpr_size = 0;
4756 :
4757 14154 : for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4758 : {
4759 8502 : rtx reg, mem;
4760 :
4761 8502 : mem = gen_rtx_MEM (Pmode,
4762 8502 : plus_constant (Pmode, virtual_incoming_args_rtx,
4763 8502 : i * UNITS_PER_WORD));
4764 8502 : MEM_NOTRAP_P (mem) = 1;
4765 8502 : set_mem_alias_set (mem, set);
4766 :
4767 8502 : reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4768 8502 : emit_move_insn (mem, reg);
4769 : }
4770 5652 : }
4771 :
4772 : static void
4773 21584 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4774 : const function_arg_info &arg,
4775 : int *, int no_rtl)
4776 : {
4777 21584 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4778 21584 : CUMULATIVE_ARGS next_cum;
4779 21584 : tree fntype;
4780 :
4781 : /* This argument doesn't appear to be used anymore. Which is good,
4782 : because the old code here didn't suppress rtl generation. */
4783 21584 : gcc_assert (!no_rtl);
4784 :
4785 21584 : if (!TARGET_64BIT)
4786 154 : return;
4787 :
4788 21430 : fntype = TREE_TYPE (current_function_decl);
4789 :
4790 : /* For varargs, we do not want to skip the dummy va_dcl argument.
4791 : For stdargs, we do want to skip the last named argument. */
4792 21430 : next_cum = *cum;
4793 21430 : if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4794 151 : || arg.type != NULL_TREE)
4795 21467 : && stdarg_p (fntype))
4796 21316 : ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4797 :
4798 21430 : if (cum->call_abi == MS_ABI)
4799 5652 : setup_incoming_varargs_ms_64 (&next_cum);
4800 : else
4801 15778 : setup_incoming_varargs_64 (&next_cum);
4802 : }
4803 :
4804 : /* Checks if TYPE is of kind va_list char *. */
4805 :
4806 : static bool
4807 73339 : is_va_list_char_pointer (tree type)
4808 : {
4809 73339 : tree canonic;
4810 :
4811 : /* For 32-bit it is always true. */
4812 73339 : if (!TARGET_64BIT)
4813 : return true;
4814 73177 : canonic = ix86_canonical_va_list_type (type);
4815 73177 : return (canonic == ms_va_list_type_node
4816 73177 : || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4817 : }
4818 :
4819 : /* Implement va_start. */
4820 :
4821 : static void
4822 21072 : ix86_va_start (tree valist, rtx nextarg)
4823 : {
4824 21072 : HOST_WIDE_INT words, n_gpr, n_fpr;
4825 21072 : tree f_gpr, f_fpr, f_ovf, f_sav;
4826 21072 : tree gpr, fpr, ovf, sav, t;
4827 21072 : tree type;
4828 21072 : rtx ovf_rtx;
4829 :
4830 21072 : if (flag_split_stack
4831 12 : && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4832 : {
4833 12 : unsigned int scratch_regno;
4834 :
4835 : /* When we are splitting the stack, we can't refer to the stack
4836 : arguments using internal_arg_pointer, because they may be on
4837 : the old stack. The split stack prologue will arrange to
4838 : leave a pointer to the old stack arguments in a scratch
4839 : register, which we here copy to a pseudo-register. The split
4840 : stack prologue can't set the pseudo-register directly because
4841 : it (the prologue) runs before any registers have been saved. */
4842 :
4843 12 : scratch_regno = split_stack_prologue_scratch_regno ();
4844 12 : if (scratch_regno != INVALID_REGNUM)
4845 : {
4846 12 : rtx reg;
4847 12 : rtx_insn *seq;
4848 :
4849 16 : reg = gen_reg_rtx (Pmode);
4850 12 : cfun->machine->split_stack_varargs_pointer = reg;
4851 :
4852 12 : start_sequence ();
4853 16 : emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4854 12 : seq = end_sequence ();
4855 :
4856 12 : push_topmost_sequence ();
4857 12 : emit_insn_after (seq, entry_of_function ());
4858 12 : pop_topmost_sequence ();
4859 : }
4860 : }
4861 :
4862 : /* Only 64bit target needs something special. */
4863 21072 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4864 : {
4865 5656 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4866 5652 : std_expand_builtin_va_start (valist, nextarg);
4867 : else
4868 : {
4869 4 : rtx va_r, next;
4870 :
4871 4 : va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4872 8 : next = expand_binop (ptr_mode, add_optab,
4873 4 : cfun->machine->split_stack_varargs_pointer,
4874 : crtl->args.arg_offset_rtx,
4875 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4876 4 : convert_move (va_r, next, 0);
4877 : }
4878 5656 : return;
4879 : }
4880 :
4881 15416 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4882 15416 : f_fpr = DECL_CHAIN (f_gpr);
4883 15416 : f_ovf = DECL_CHAIN (f_fpr);
4884 15416 : f_sav = DECL_CHAIN (f_ovf);
4885 :
4886 15416 : valist = build_simple_mem_ref (valist);
4887 15416 : TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4888 : /* The following should be folded into the MEM_REF offset. */
4889 15416 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4890 : f_gpr, NULL_TREE);
4891 15416 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4892 : f_fpr, NULL_TREE);
4893 15416 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4894 : f_ovf, NULL_TREE);
4895 15416 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4896 : f_sav, NULL_TREE);
4897 :
4898 : /* Count number of gp and fp argument registers used. */
4899 15416 : words = crtl->args.info.words;
4900 15416 : n_gpr = crtl->args.info.regno;
4901 15416 : n_fpr = crtl->args.info.sse_regno;
4902 :
4903 15416 : if (cfun->va_list_gpr_size)
4904 : {
4905 15169 : type = TREE_TYPE (gpr);
4906 15169 : t = build2 (MODIFY_EXPR, type,
4907 15169 : gpr, build_int_cst (type, n_gpr * 8));
4908 15169 : TREE_SIDE_EFFECTS (t) = 1;
4909 15169 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4910 : }
4911 :
4912 15416 : if (TARGET_SSE && cfun->va_list_fpr_size)
4913 : {
4914 14560 : type = TREE_TYPE (fpr);
4915 14560 : t = build2 (MODIFY_EXPR, type, fpr,
4916 14560 : build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4917 14560 : TREE_SIDE_EFFECTS (t) = 1;
4918 14560 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4919 : }
4920 :
4921 : /* Find the overflow area. */
4922 15416 : type = TREE_TYPE (ovf);
4923 15416 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4924 15408 : ovf_rtx = crtl->args.internal_arg_pointer;
4925 : else
4926 : ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4927 15416 : t = make_tree (type, ovf_rtx);
4928 15416 : if (words != 0)
4929 492 : t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4930 :
4931 15416 : t = build2 (MODIFY_EXPR, type, ovf, t);
4932 15416 : TREE_SIDE_EFFECTS (t) = 1;
4933 15416 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4934 :
4935 15416 : if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4936 : {
4937 : /* Find the register save area.
4938 : Prologue of the function save it right above stack frame. */
4939 15338 : type = TREE_TYPE (sav);
4940 15338 : t = make_tree (type, frame_pointer_rtx);
4941 15338 : if (!ix86_varargs_gpr_size)
4942 169 : t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4943 :
4944 15338 : t = build2 (MODIFY_EXPR, type, sav, t);
4945 15338 : TREE_SIDE_EFFECTS (t) = 1;
4946 15338 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4947 : }
4948 : }
4949 :
4950 : /* Implement va_arg. */
4951 :
4952 : static tree
4953 52267 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4954 : gimple_seq *post_p)
4955 : {
4956 52267 : static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4957 52267 : tree f_gpr, f_fpr, f_ovf, f_sav;
4958 52267 : tree gpr, fpr, ovf, sav, t;
4959 52267 : int size, rsize;
4960 52267 : tree lab_false, lab_over = NULL_TREE;
4961 52267 : tree addr, t2;
4962 52267 : rtx container;
4963 52267 : int indirect_p = 0;
4964 52267 : tree ptrtype;
4965 52267 : machine_mode nat_mode;
4966 52267 : unsigned int arg_boundary;
4967 52267 : unsigned int type_align;
4968 :
4969 : /* Only 64bit target needs something special. */
4970 52267 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4971 260 : return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4972 :
4973 52007 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4974 52007 : f_fpr = DECL_CHAIN (f_gpr);
4975 52007 : f_ovf = DECL_CHAIN (f_fpr);
4976 52007 : f_sav = DECL_CHAIN (f_ovf);
4977 :
4978 52007 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4979 : valist, f_gpr, NULL_TREE);
4980 :
4981 52007 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4982 52007 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4983 52007 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4984 :
4985 52007 : indirect_p = pass_va_arg_by_reference (type);
4986 52007 : if (indirect_p)
4987 103 : type = build_pointer_type (type);
4988 52007 : size = arg_int_size_in_bytes (type);
4989 52007 : rsize = CEIL (size, UNITS_PER_WORD);
4990 :
4991 52007 : nat_mode = type_natural_mode (type, NULL, false);
4992 52007 : switch (nat_mode)
4993 : {
4994 28 : case E_V16HFmode:
4995 28 : case E_V16BFmode:
4996 28 : case E_V8SFmode:
4997 28 : case E_V8SImode:
4998 28 : case E_V32QImode:
4999 28 : case E_V16HImode:
5000 28 : case E_V4DFmode:
5001 28 : case E_V4DImode:
5002 28 : case E_V32HFmode:
5003 28 : case E_V32BFmode:
5004 28 : case E_V16SFmode:
5005 28 : case E_V16SImode:
5006 28 : case E_V64QImode:
5007 28 : case E_V32HImode:
5008 28 : case E_V8DFmode:
5009 28 : case E_V8DImode:
5010 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
5011 28 : if (!TARGET_64BIT_MS_ABI)
5012 : {
5013 : container = NULL;
5014 : break;
5015 : }
5016 : /* FALLTHRU */
5017 :
5018 51979 : default:
5019 51979 : container = construct_container (nat_mode, TYPE_MODE (type),
5020 : type, false, X86_64_REGPARM_MAX,
5021 : X86_64_SSE_REGPARM_MAX, intreg, 0);
5022 51979 : break;
5023 : }
5024 :
5025 : /* Pull the value out of the saved registers. */
5026 :
5027 52007 : addr = create_tmp_var (ptr_type_node, "addr");
5028 52007 : type_align = TYPE_ALIGN (type);
5029 :
5030 52007 : if (container)
5031 : {
5032 28914 : int needed_intregs, needed_sseregs;
5033 28914 : bool need_temp;
5034 28914 : tree int_addr, sse_addr;
5035 :
5036 28914 : lab_false = create_artificial_label (UNKNOWN_LOCATION);
5037 28914 : lab_over = create_artificial_label (UNKNOWN_LOCATION);
5038 :
5039 28914 : examine_argument (nat_mode, type, false,
5040 : &needed_intregs, &needed_sseregs);
5041 :
5042 28914 : bool container_in_reg = false;
5043 28914 : if (REG_P (container))
5044 : container_in_reg = true;
5045 1641 : else if (GET_CODE (container) == PARALLEL
5046 1641 : && GET_MODE (container) == BLKmode
5047 580 : && XVECLEN (container, 0) == 1)
5048 : {
5049 : /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
5050 : expression in a TImode register. In this case, temp isn't
5051 : needed. Otherwise, the TImode variable will be put in the
5052 : GPR save area which guarantees only 8-byte alignment. */
5053 509 : rtx x = XVECEXP (container, 0, 0);
5054 509 : if (GET_CODE (x) == EXPR_LIST
5055 509 : && REG_P (XEXP (x, 0))
5056 509 : && XEXP (x, 1) == const0_rtx)
5057 : container_in_reg = true;
5058 : }
5059 :
5060 680 : need_temp = (!container_in_reg
5061 1150 : && ((needed_intregs && TYPE_ALIGN (type) > 64)
5062 680 : || TYPE_ALIGN (type) > 128));
5063 :
5064 : /* In case we are passing structure, verify that it is consecutive block
5065 : on the register save area. If not we need to do moves. */
5066 680 : if (!need_temp && !container_in_reg)
5067 : {
5068 : /* Verify that all registers are strictly consecutive */
5069 966 : if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5070 : {
5071 : int i;
5072 :
5073 815 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5074 : {
5075 529 : rtx slot = XVECEXP (container, 0, i);
5076 529 : if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5077 529 : || INTVAL (XEXP (slot, 1)) != i * 16)
5078 : need_temp = true;
5079 : }
5080 : }
5081 : else
5082 : {
5083 : int i;
5084 :
5085 1120 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5086 : {
5087 726 : rtx slot = XVECEXP (container, 0, i);
5088 726 : if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5089 726 : || INTVAL (XEXP (slot, 1)) != i * 8)
5090 : need_temp = true;
5091 : }
5092 : }
5093 : }
5094 28914 : if (!need_temp)
5095 : {
5096 : int_addr = addr;
5097 : sse_addr = addr;
5098 : }
5099 : else
5100 : {
5101 877 : int_addr = create_tmp_var (ptr_type_node, "int_addr");
5102 877 : sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5103 : }
5104 :
5105 : /* First ensure that we fit completely in registers. */
5106 28914 : if (needed_intregs)
5107 : {
5108 18148 : t = build_int_cst (TREE_TYPE (gpr),
5109 18148 : (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5110 18148 : t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5111 18148 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5112 18148 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5113 18148 : gimplify_and_add (t, pre_p);
5114 : }
5115 28914 : if (needed_sseregs)
5116 : {
5117 11158 : t = build_int_cst (TREE_TYPE (fpr),
5118 : (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5119 11158 : + X86_64_REGPARM_MAX * 8);
5120 11158 : t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5121 11158 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5122 11158 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5123 11158 : gimplify_and_add (t, pre_p);
5124 : }
5125 :
5126 : /* Compute index to start of area used for integer regs. */
5127 28914 : if (needed_intregs)
5128 : {
5129 : /* int_addr = gpr + sav; */
5130 18148 : t = fold_build_pointer_plus (sav, gpr);
5131 18148 : gimplify_assign (int_addr, t, pre_p);
5132 : }
5133 28914 : if (needed_sseregs)
5134 : {
5135 : /* sse_addr = fpr + sav; */
5136 11158 : t = fold_build_pointer_plus (sav, fpr);
5137 11158 : gimplify_assign (sse_addr, t, pre_p);
5138 : }
5139 28914 : if (need_temp)
5140 : {
5141 877 : int i, prev_size = 0;
5142 877 : tree temp = create_tmp_var (type, "va_arg_tmp");
5143 877 : TREE_ADDRESSABLE (temp) = 1;
5144 :
5145 : /* addr = &temp; */
5146 877 : t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5147 877 : gimplify_assign (addr, t, pre_p);
5148 :
5149 2241 : for (i = 0; i < XVECLEN (container, 0); i++)
5150 : {
5151 1364 : rtx slot = XVECEXP (container, 0, i);
5152 1364 : rtx reg = XEXP (slot, 0);
5153 1364 : machine_mode mode = GET_MODE (reg);
5154 1364 : tree piece_type;
5155 1364 : tree addr_type;
5156 1364 : tree daddr_type;
5157 1364 : tree src_addr, src;
5158 1364 : int src_offset;
5159 1364 : tree dest_addr, dest;
5160 1364 : int cur_size = GET_MODE_SIZE (mode);
5161 :
5162 1364 : gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5163 1364 : prev_size = INTVAL (XEXP (slot, 1));
5164 1364 : if (prev_size + cur_size > size)
5165 : {
5166 30 : cur_size = size - prev_size;
5167 30 : unsigned int nbits = cur_size * BITS_PER_UNIT;
5168 30 : if (!int_mode_for_size (nbits, 1).exists (&mode))
5169 10 : mode = QImode;
5170 : }
5171 1364 : piece_type = lang_hooks.types.type_for_mode (mode, 1);
5172 1364 : if (mode == GET_MODE (reg))
5173 1334 : addr_type = build_pointer_type (piece_type);
5174 : else
5175 30 : addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5176 : true);
5177 1364 : daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5178 : true);
5179 :
5180 1364 : if (SSE_REGNO_P (REGNO (reg)))
5181 : {
5182 534 : src_addr = sse_addr;
5183 534 : src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5184 : }
5185 : else
5186 : {
5187 830 : src_addr = int_addr;
5188 830 : src_offset = REGNO (reg) * 8;
5189 : }
5190 1364 : src_addr = fold_convert (addr_type, src_addr);
5191 1364 : src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5192 :
5193 1364 : dest_addr = fold_convert (daddr_type, addr);
5194 1364 : dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5195 2728 : if (cur_size == GET_MODE_SIZE (mode))
5196 : {
5197 1354 : src = build_va_arg_indirect_ref (src_addr);
5198 1354 : dest = build_va_arg_indirect_ref (dest_addr);
5199 :
5200 1354 : gimplify_assign (dest, src, pre_p);
5201 : }
5202 : else
5203 : {
5204 10 : tree copy
5205 20 : = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5206 : 3, dest_addr, src_addr,
5207 10 : size_int (cur_size));
5208 10 : gimplify_and_add (copy, pre_p);
5209 : }
5210 1364 : prev_size += cur_size;
5211 : }
5212 : }
5213 :
5214 28914 : if (needed_intregs)
5215 : {
5216 18148 : t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5217 18148 : build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5218 18148 : gimplify_assign (gpr, t, pre_p);
5219 : /* The GPR save area guarantees only 8-byte alignment. */
5220 18148 : if (!need_temp)
5221 17344 : type_align = MIN (type_align, 64);
5222 : }
5223 :
5224 28914 : if (needed_sseregs)
5225 : {
5226 11158 : t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5227 11158 : build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5228 11158 : gimplify_assign (unshare_expr (fpr), t, pre_p);
5229 : }
5230 :
5231 28914 : gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5232 :
5233 28914 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5234 : }
5235 :
5236 : /* ... otherwise out of the overflow area. */
5237 :
5238 : /* When we align parameter on stack for caller, if the parameter
5239 : alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5240 : aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5241 : here with caller. */
5242 52007 : arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5243 52007 : if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5244 : arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5245 :
5246 : /* Care for on-stack alignment if needed. */
5247 52007 : if (arg_boundary <= 64 || size == 0)
5248 34967 : t = ovf;
5249 : else
5250 : {
5251 17040 : HOST_WIDE_INT align = arg_boundary / 8;
5252 17040 : t = fold_build_pointer_plus_hwi (ovf, align - 1);
5253 17040 : t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5254 17040 : build_int_cst (TREE_TYPE (t), -align));
5255 : }
5256 :
5257 52007 : gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5258 52007 : gimplify_assign (addr, t, pre_p);
5259 :
5260 52007 : t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5261 52007 : gimplify_assign (unshare_expr (ovf), t, pre_p);
5262 :
5263 52007 : if (container)
5264 28914 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5265 :
5266 52007 : type = build_aligned_type (type, type_align);
5267 52007 : ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5268 52007 : addr = fold_convert (ptrtype, addr);
5269 :
5270 52007 : if (indirect_p)
5271 103 : addr = build_va_arg_indirect_ref (addr);
5272 52007 : return build_va_arg_indirect_ref (addr);
5273 : }
5274 :
5275 : /* Return true if OPNUM's MEM should be matched
5276 : in movabs* patterns. */
5277 :
5278 : bool
5279 480 : ix86_check_movabs (rtx insn, int opnum)
5280 : {
5281 480 : rtx set, mem;
5282 :
5283 480 : set = PATTERN (insn);
5284 480 : if (GET_CODE (set) == PARALLEL)
5285 0 : set = XVECEXP (set, 0, 0);
5286 480 : gcc_assert (GET_CODE (set) == SET);
5287 480 : mem = XEXP (set, opnum);
5288 480 : while (SUBREG_P (mem))
5289 0 : mem = SUBREG_REG (mem);
5290 480 : gcc_assert (MEM_P (mem));
5291 480 : return volatile_ok || !MEM_VOLATILE_P (mem);
5292 : }
5293 :
5294 : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5295 : bool
5296 218135 : ix86_check_movs (rtx insn, int idx)
5297 : {
5298 218135 : rtx pat = PATTERN (insn);
5299 218135 : gcc_assert (GET_CODE (pat) == PARALLEL);
5300 :
5301 218135 : rtx set = XVECEXP (pat, 0, idx);
5302 218135 : gcc_assert (GET_CODE (set) == SET);
5303 :
5304 218135 : rtx dst = SET_DEST (set);
5305 218135 : gcc_assert (MEM_P (dst));
5306 :
5307 218135 : rtx src = SET_SRC (set);
5308 218135 : gcc_assert (MEM_P (src));
5309 :
5310 218135 : return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5311 436270 : && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5312 0 : || Pmode == word_mode));
5313 : }
5314 :
5315 : /* Return false if INSN contains a MEM with a non-default address space. */
5316 : bool
5317 65436 : ix86_check_no_addr_space (rtx insn)
5318 : {
5319 65436 : subrtx_var_iterator::array_type array;
5320 1440044 : FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5321 : {
5322 1374608 : rtx x = *iter;
5323 1505480 : if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5324 0 : return false;
5325 : }
5326 65436 : return true;
5327 65436 : }
5328 :
5329 : /* Initialize the table of extra 80387 mathematical constants. */
5330 :
5331 : static void
5332 2353 : init_ext_80387_constants (void)
5333 : {
5334 2353 : static const char * cst[5] =
5335 : {
5336 : "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5337 : "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5338 : "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5339 : "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5340 : "3.1415926535897932385128089594061862044", /* 4: fldpi */
5341 : };
5342 2353 : int i;
5343 :
5344 14118 : for (i = 0; i < 5; i++)
5345 : {
5346 11765 : real_from_string (&ext_80387_constants_table[i], cst[i]);
5347 : /* Ensure each constant is rounded to XFmode precision. */
5348 11765 : real_convert (&ext_80387_constants_table[i],
5349 23530 : XFmode, &ext_80387_constants_table[i]);
5350 : }
5351 :
5352 2353 : ext_80387_constants_init = 1;
5353 2353 : }
5354 :
5355 : /* Return non-zero if the constant is something that
5356 : can be loaded with a special instruction. */
5357 :
5358 : int
5359 5043671 : standard_80387_constant_p (rtx x)
5360 : {
5361 5043671 : machine_mode mode = GET_MODE (x);
5362 :
5363 5043671 : const REAL_VALUE_TYPE *r;
5364 :
5365 5043671 : if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5366 : return -1;
5367 :
5368 4582530 : if (x == CONST0_RTX (mode))
5369 : return 1;
5370 2113838 : if (x == CONST1_RTX (mode))
5371 : return 2;
5372 :
5373 1230986 : r = CONST_DOUBLE_REAL_VALUE (x);
5374 :
5375 : /* For XFmode constants, try to find a special 80387 instruction when
5376 : optimizing for size or on those CPUs that benefit from them. */
5377 1230986 : if (mode == XFmode
5378 796383 : && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5379 2027369 : && !flag_rounding_math)
5380 : {
5381 788333 : int i;
5382 :
5383 788333 : if (! ext_80387_constants_init)
5384 2346 : init_ext_80387_constants ();
5385 :
5386 4719506 : for (i = 0; i < 5; i++)
5387 3940004 : if (real_identical (r, &ext_80387_constants_table[i]))
5388 8831 : return i + 3;
5389 : }
5390 :
5391 : /* Load of the constant -0.0 or -1.0 will be split as
5392 : fldz;fchs or fld1;fchs sequence. */
5393 1222155 : if (real_isnegzero (r))
5394 : return 8;
5395 1205665 : if (real_identical (r, &dconstm1))
5396 301849 : return 9;
5397 :
5398 : return 0;
5399 : }
5400 :
5401 : /* Return the opcode of the special instruction to be used to load
5402 : the constant X. */
5403 :
5404 : const char *
5405 54462 : standard_80387_constant_opcode (rtx x)
5406 : {
5407 54462 : switch (standard_80387_constant_p (x))
5408 : {
5409 : case 1:
5410 : return "fldz";
5411 33984 : case 2:
5412 33984 : return "fld1";
5413 1 : case 3:
5414 1 : return "fldlg2";
5415 10 : case 4:
5416 10 : return "fldln2";
5417 12 : case 5:
5418 12 : return "fldl2e";
5419 2 : case 6:
5420 2 : return "fldl2t";
5421 192 : case 7:
5422 192 : return "fldpi";
5423 0 : case 8:
5424 0 : case 9:
5425 0 : return "#";
5426 0 : default:
5427 0 : gcc_unreachable ();
5428 : }
5429 : }
5430 :
5431 : /* Return the CONST_DOUBLE representing the 80387 constant that is
5432 : loaded by the specified special instruction. The argument IDX
5433 : matches the return value from standard_80387_constant_p. */
5434 :
5435 : rtx
5436 24 : standard_80387_constant_rtx (int idx)
5437 : {
5438 24 : int i;
5439 :
5440 24 : if (! ext_80387_constants_init)
5441 7 : init_ext_80387_constants ();
5442 :
5443 24 : switch (idx)
5444 : {
5445 24 : case 3:
5446 24 : case 4:
5447 24 : case 5:
5448 24 : case 6:
5449 24 : case 7:
5450 24 : i = idx - 3;
5451 24 : break;
5452 :
5453 0 : default:
5454 0 : gcc_unreachable ();
5455 : }
5456 :
5457 24 : return const_double_from_real_value (ext_80387_constants_table[i],
5458 24 : XFmode);
5459 : }
5460 :
5461 : /* Return 1 if X is all bits 0, 2 if X is all bits 1
5462 : and 3 if X is all bits 1 with zero extend
5463 : in supported SSE/AVX vector mode. */
5464 :
5465 : int
5466 55141975 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
5467 : {
5468 55141975 : machine_mode mode;
5469 :
5470 55141975 : if (!TARGET_SSE)
5471 : return 0;
5472 :
5473 54972944 : mode = GET_MODE (x);
5474 :
5475 54972944 : if (x == const0_rtx || const0_operand (x, mode))
5476 13117044 : return 1;
5477 :
5478 41855900 : if (x == constm1_rtx
5479 41717496 : || vector_all_ones_operand (x, mode)
5480 83003772 : || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5481 34434176 : || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5482 6714643 : && float_vector_all_ones_operand (x, mode)))
5483 : {
5484 : /* VOIDmode integer constant, get mode from the predicate. */
5485 710119 : if (mode == VOIDmode)
5486 138404 : mode = pred_mode;
5487 :
5488 1420238 : switch (GET_MODE_SIZE (mode))
5489 : {
5490 30660 : case 64:
5491 30660 : if (TARGET_AVX512F)
5492 : return 2;
5493 : break;
5494 39941 : case 32:
5495 39941 : if (TARGET_AVX2)
5496 : return 2;
5497 : break;
5498 627139 : case 16:
5499 627139 : if (TARGET_SSE2)
5500 : return 2;
5501 : break;
5502 0 : case 0:
5503 : /* VOIDmode */
5504 0 : gcc_unreachable ();
5505 : default:
5506 : break;
5507 : }
5508 : }
5509 :
5510 41159091 : if (vector_all_ones_zero_extend_half_operand (x, mode)
5511 41159091 : || vector_all_ones_zero_extend_quarter_operand (x, mode))
5512 706 : return 3;
5513 :
5514 : return 0;
5515 : }
5516 :
5517 : /* Return the opcode of the special instruction to be used to load
5518 : the constant operands[1] into operands[0]. */
5519 :
5520 : const char *
5521 464886 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5522 : {
5523 464886 : machine_mode mode;
5524 464886 : rtx x = operands[1];
5525 :
5526 464886 : gcc_assert (TARGET_SSE);
5527 :
5528 464886 : mode = GET_MODE (x);
5529 :
5530 464886 : if (x == const0_rtx || const0_operand (x, mode))
5531 : {
5532 453196 : switch (get_attr_mode (insn))
5533 : {
5534 435435 : case MODE_TI:
5535 435435 : if (!EXT_REX_SSE_REG_P (operands[0]))
5536 : return "%vpxor\t%0, %d0";
5537 : /* FALLTHRU */
5538 6184 : case MODE_XI:
5539 6184 : case MODE_OI:
5540 6184 : if (EXT_REX_SSE_REG_P (operands[0]))
5541 : {
5542 67 : if (TARGET_AVX512VL)
5543 : return "vpxord\t%x0, %x0, %x0";
5544 : else
5545 28 : return "vpxord\t%g0, %g0, %g0";
5546 : }
5547 : return "vpxor\t%x0, %x0, %x0";
5548 :
5549 2107 : case MODE_V2DF:
5550 2107 : if (!EXT_REX_SSE_REG_P (operands[0]))
5551 : return "%vxorpd\t%0, %d0";
5552 : /* FALLTHRU */
5553 847 : case MODE_V8DF:
5554 847 : case MODE_V4DF:
5555 847 : if (EXT_REX_SSE_REG_P (operands[0]))
5556 : {
5557 4 : if (TARGET_AVX512DQ)
5558 : {
5559 0 : if (TARGET_AVX512VL)
5560 : return "vxorpd\t%x0, %x0, %x0";
5561 : else
5562 0 : return "vxorpd\t%g0, %g0, %g0";
5563 : }
5564 : else
5565 : {
5566 4 : if (TARGET_AVX512VL)
5567 : return "vpxorq\t%x0, %x0, %x0";
5568 : else
5569 4 : return "vpxorq\t%g0, %g0, %g0";
5570 : }
5571 : }
5572 : return "vxorpd\t%x0, %x0, %x0";
5573 :
5574 6672 : case MODE_V4SF:
5575 6672 : if (!EXT_REX_SSE_REG_P (operands[0]))
5576 : return "%vxorps\t%0, %d0";
5577 : /* FALLTHRU */
5578 2015 : case MODE_V16SF:
5579 2015 : case MODE_V8SF:
5580 2015 : if (EXT_REX_SSE_REG_P (operands[0]))
5581 : {
5582 65 : if (TARGET_AVX512DQ)
5583 : {
5584 26 : if (TARGET_AVX512VL)
5585 : return "vxorps\t%x0, %x0, %x0";
5586 : else
5587 0 : return "vxorps\t%g0, %g0, %g0";
5588 : }
5589 : else
5590 : {
5591 39 : if (TARGET_AVX512VL)
5592 : return "vpxord\t%x0, %x0, %x0";
5593 : else
5594 37 : return "vpxord\t%g0, %g0, %g0";
5595 : }
5596 : }
5597 : return "vxorps\t%x0, %x0, %x0";
5598 :
5599 0 : default:
5600 0 : gcc_unreachable ();
5601 : }
5602 : }
5603 11690 : else if (x == constm1_rtx
5604 11679 : || vector_all_ones_operand (x, mode)
5605 11757 : || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5606 45 : && float_vector_all_ones_operand (x, mode)))
5607 : {
5608 11668 : enum attr_mode insn_mode = get_attr_mode (insn);
5609 :
5610 11668 : switch (insn_mode)
5611 : {
5612 3 : case MODE_XI:
5613 3 : case MODE_V8DF:
5614 3 : case MODE_V16SF:
5615 3 : gcc_assert (TARGET_AVX512F);
5616 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5617 :
5618 959 : case MODE_OI:
5619 959 : case MODE_V4DF:
5620 959 : case MODE_V8SF:
5621 959 : gcc_assert (TARGET_AVX2);
5622 : /* FALLTHRU */
5623 11665 : case MODE_TI:
5624 11665 : case MODE_V2DF:
5625 11665 : case MODE_V4SF:
5626 11665 : gcc_assert (TARGET_SSE2);
5627 11665 : if (EXT_REX_SSE_REG_P (operands[0]))
5628 : {
5629 2 : if (TARGET_AVX512VL)
5630 : return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5631 : else
5632 0 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5633 : }
5634 11663 : return (TARGET_AVX
5635 11663 : ? "vpcmpeqd\t%0, %0, %0"
5636 11663 : : "pcmpeqd\t%0, %0");
5637 :
5638 0 : default:
5639 0 : gcc_unreachable ();
5640 : }
5641 : }
5642 22 : else if (vector_all_ones_zero_extend_half_operand (x, mode))
5643 : {
5644 40 : if (GET_MODE_SIZE (mode) == 64)
5645 : {
5646 5 : gcc_assert (TARGET_AVX512F);
5647 : return "vpcmpeqd\t%t0, %t0, %t0";
5648 : }
5649 30 : else if (GET_MODE_SIZE (mode) == 32)
5650 : {
5651 15 : gcc_assert (TARGET_AVX);
5652 : return "vpcmpeqd\t%x0, %x0, %x0";
5653 : }
5654 0 : gcc_unreachable ();
5655 : }
5656 2 : else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5657 : {
5658 2 : gcc_assert (TARGET_AVX512F);
5659 : return "vpcmpeqd\t%x0, %x0, %x0";
5660 : }
5661 :
5662 0 : gcc_unreachable ();
5663 : }
5664 :
5665 : /* Returns true if INSN can be transformed from a memory load
5666 : to a supported FP constant load. */
5667 :
5668 : bool
5669 2143789 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5670 : {
5671 2143789 : rtx src = find_constant_src (insn);
5672 :
5673 2143789 : gcc_assert (REG_P (dst));
5674 :
5675 2143789 : if (src == NULL
5676 597391 : || (SSE_REGNO_P (REGNO (dst))
5677 465329 : && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5678 165988 : || (!TARGET_AVX512VL
5679 165927 : && EXT_REX_SSE_REGNO_P (REGNO (dst))
5680 0 : && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5681 2309777 : || (STACK_REGNO_P (REGNO (dst))
5682 132062 : && standard_80387_constant_p (src) < 1))
5683 2067258 : return false;
5684 :
5685 : return true;
5686 : }
5687 :
5688 : /* Predicate for pre-reload splitters with associated instructions,
5689 : which can match any time before the split1 pass (usually combine),
5690 : then are unconditionally split in that pass and should not be
5691 : matched again afterwards. */
5692 :
5693 : bool
5694 17717758 : ix86_pre_reload_split (void)
5695 : {
5696 17717758 : return (can_create_pseudo_p ()
5697 27033743 : && !(cfun->curr_properties & PROP_rtl_split_insns));
5698 : }
5699 :
5700 : /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5701 : or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5702 : TARGET_AVX512VL or it is a register to register move which can
5703 : be done with zmm register move. */
5704 :
5705 : static const char *
5706 4164947 : ix86_get_ssemov (rtx *operands, unsigned size,
5707 : enum attr_mode insn_mode, machine_mode mode)
5708 : {
5709 4164947 : char buf[128];
5710 4164947 : bool misaligned_p = (misaligned_operand (operands[0], mode)
5711 4164947 : || misaligned_operand (operands[1], mode));
5712 4164947 : bool evex_reg_p = (size == 64
5713 4078437 : || EXT_REX_SSE_REG_P (operands[0])
5714 8242643 : || EXT_REX_SSE_REG_P (operands[1]));
5715 :
5716 4164947 : bool egpr_p = (TARGET_APX_EGPR
5717 4164947 : && (x86_extended_rex2reg_mentioned_p (operands[0])
5718 182 : || x86_extended_rex2reg_mentioned_p (operands[1])));
5719 196 : bool egpr_vl = egpr_p && TARGET_AVX512VL;
5720 :
5721 4164947 : machine_mode scalar_mode;
5722 :
5723 4164947 : const char *opcode = NULL;
5724 4164947 : enum
5725 : {
5726 : opcode_int,
5727 : opcode_float,
5728 : opcode_double
5729 4164947 : } type = opcode_int;
5730 :
5731 4164947 : switch (insn_mode)
5732 : {
5733 : case MODE_V16SF:
5734 : case MODE_V8SF:
5735 : case MODE_V4SF:
5736 : scalar_mode = E_SFmode;
5737 : type = opcode_float;
5738 : break;
5739 209355 : case MODE_V8DF:
5740 209355 : case MODE_V4DF:
5741 209355 : case MODE_V2DF:
5742 209355 : scalar_mode = E_DFmode;
5743 209355 : type = opcode_double;
5744 209355 : break;
5745 1508589 : case MODE_XI:
5746 1508589 : case MODE_OI:
5747 1508589 : case MODE_TI:
5748 1508589 : scalar_mode = GET_MODE_INNER (mode);
5749 : break;
5750 0 : default:
5751 0 : gcc_unreachable ();
5752 : }
5753 :
5754 : /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5755 : we can only use zmm register move without memory operand. */
5756 4164947 : if (evex_reg_p
5757 88559 : && !TARGET_AVX512VL
5758 4215199 : && GET_MODE_SIZE (mode) < 64)
5759 : {
5760 : /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5761 : xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5762 : AVX512VL is disabled, LRA can still generate reg to
5763 : reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5764 : modes. */
5765 0 : if (memory_operand (operands[0], mode)
5766 0 : || memory_operand (operands[1], mode))
5767 0 : gcc_unreachable ();
5768 0 : size = 64;
5769 0 : switch (type)
5770 : {
5771 0 : case opcode_int:
5772 0 : if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5773 0 : opcode = (misaligned_p
5774 0 : ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5775 : : "vmovdqa64");
5776 : else
5777 0 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5778 : break;
5779 0 : case opcode_float:
5780 0 : opcode = misaligned_p ? "vmovups" : "vmovaps";
5781 : break;
5782 0 : case opcode_double:
5783 0 : opcode = misaligned_p ? "vmovupd" : "vmovapd";
5784 : break;
5785 : }
5786 : }
5787 4164947 : else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5788 : {
5789 2832649 : switch (scalar_mode)
5790 : {
5791 36799 : case E_HFmode:
5792 36799 : case E_BFmode:
5793 36799 : if (evex_reg_p || egpr_vl)
5794 11597 : opcode = (misaligned_p
5795 173 : ? (TARGET_AVX512BW
5796 : ? "vmovdqu16"
5797 : : "vmovdqu64")
5798 : : "vmovdqa64");
5799 25202 : else if (egpr_p)
5800 790287 : opcode = (misaligned_p
5801 0 : ? (TARGET_AVX512BW
5802 0 : ? "vmovdqu16"
5803 : : "%vmovups")
5804 : : "%vmovaps");
5805 : else
5806 412967 : opcode = (misaligned_p
5807 25202 : ? (TARGET_AVX512BW && evex_reg_p
5808 : ? "vmovdqu16"
5809 : : "%vmovdqu")
5810 : : "%vmovdqa");
5811 : break;
5812 2447003 : case E_SFmode:
5813 2447003 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5814 : break;
5815 209355 : case E_DFmode:
5816 209355 : opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5817 : break;
5818 139492 : case E_TFmode:
5819 139492 : if (evex_reg_p || egpr_vl)
5820 14 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5821 139478 : else if (egpr_p)
5822 0 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5823 : else
5824 139478 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5825 : break;
5826 0 : default:
5827 0 : gcc_unreachable ();
5828 : }
5829 : }
5830 1332298 : else if (SCALAR_INT_MODE_P (scalar_mode))
5831 : {
5832 1332298 : switch (scalar_mode)
5833 : {
5834 101329 : case E_QImode:
5835 101329 : if (evex_reg_p || egpr_vl)
5836 4175158 : opcode = (misaligned_p
5837 10211 : ? (TARGET_AVX512BW
5838 5071 : ? "vmovdqu8"
5839 : : "vmovdqu64")
5840 : : "vmovdqa64");
5841 91118 : else if (egpr_p)
5842 30 : opcode = (misaligned_p
5843 0 : ? (TARGET_AVX512BW
5844 : ? "vmovdqu8"
5845 : : "%vmovups")
5846 : : "%vmovaps");
5847 : else
5848 91088 : opcode = (misaligned_p
5849 : ? (TARGET_AVX512BW && evex_reg_p
5850 : ? "vmovdqu8"
5851 : : "%vmovdqu")
5852 : : "%vmovdqa");
5853 : break;
5854 42943 : case E_HImode:
5855 42943 : if (evex_reg_p || egpr_vl)
5856 3836 : opcode = (misaligned_p
5857 300 : ? (TARGET_AVX512BW
5858 : ? "vmovdqu16"
5859 : : "vmovdqu64")
5860 : : "vmovdqa64");
5861 39107 : else if (egpr_p)
5862 790287 : opcode = (misaligned_p
5863 27 : ? (TARGET_AVX512BW
5864 0 : ? "vmovdqu16"
5865 : : "%vmovups")
5866 : : "%vmovaps");
5867 : else
5868 387765 : opcode = (misaligned_p
5869 39080 : ? (TARGET_AVX512BW && evex_reg_p
5870 : ? "vmovdqu16"
5871 : : "%vmovdqu")
5872 : : "%vmovdqa");
5873 : break;
5874 182384 : case E_SImode:
5875 182384 : if (evex_reg_p || egpr_vl)
5876 8211 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5877 174173 : else if (egpr_p)
5878 14 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5879 : else
5880 174159 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5881 : break;
5882 993834 : case E_DImode:
5883 993834 : case E_TImode:
5884 993834 : case E_OImode:
5885 993834 : if (evex_reg_p || egpr_vl)
5886 18531 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5887 975303 : else if (egpr_p)
5888 26 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5889 : else
5890 975277 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5891 : break;
5892 11808 : case E_XImode:
5893 49646 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5894 : break;
5895 0 : default:
5896 0 : gcc_unreachable ();
5897 : }
5898 : }
5899 : else
5900 0 : gcc_unreachable ();
5901 :
5902 4164947 : switch (size)
5903 : {
5904 86510 : case 64:
5905 86510 : snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5906 : opcode);
5907 86510 : break;
5908 91984 : case 32:
5909 91984 : snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5910 : opcode);
5911 91984 : break;
5912 3986453 : case 16:
5913 3986453 : snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5914 : opcode);
5915 3986453 : break;
5916 0 : default:
5917 0 : gcc_unreachable ();
5918 : }
5919 4164947 : output_asm_insn (buf, operands);
5920 4164947 : return "";
5921 : }
5922 :
5923 : /* Return the template of the TYPE_SSEMOV instruction to move
5924 : operands[1] into operands[0]. */
5925 :
5926 : const char *
5927 6543368 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5928 : {
5929 6543368 : machine_mode mode = GET_MODE (operands[0]);
5930 6543368 : if (get_attr_type (insn) != TYPE_SSEMOV
5931 6543368 : || mode != GET_MODE (operands[1]))
5932 0 : gcc_unreachable ();
5933 :
5934 6543368 : enum attr_mode insn_mode = get_attr_mode (insn);
5935 :
5936 6543368 : switch (insn_mode)
5937 : {
5938 86510 : case MODE_XI:
5939 86510 : case MODE_V8DF:
5940 86510 : case MODE_V16SF:
5941 86510 : return ix86_get_ssemov (operands, 64, insn_mode, mode);
5942 :
5943 91984 : case MODE_OI:
5944 91984 : case MODE_V4DF:
5945 91984 : case MODE_V8SF:
5946 91984 : return ix86_get_ssemov (operands, 32, insn_mode, mode);
5947 :
5948 3986453 : case MODE_TI:
5949 3986453 : case MODE_V2DF:
5950 3986453 : case MODE_V4SF:
5951 3986453 : return ix86_get_ssemov (operands, 16, insn_mode, mode);
5952 :
5953 664193 : case MODE_DI:
5954 : /* Handle broken assemblers that require movd instead of movq. */
5955 664193 : if (GENERAL_REG_P (operands[0]))
5956 : {
5957 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5958 : return "%vmovq\t{%1, %q0|%q0, %1}";
5959 : else
5960 : return "%vmovd\t{%1, %q0|%q0, %1}";
5961 : }
5962 587569 : else if (GENERAL_REG_P (operands[1]))
5963 : {
5964 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5965 : return "%vmovq\t{%q1, %0|%0, %q1}";
5966 : else
5967 : return "%vmovd\t{%q1, %0|%0, %q1}";
5968 : }
5969 : else
5970 420552 : return "%vmovq\t{%1, %0|%0, %1}";
5971 :
5972 202628 : case MODE_SI:
5973 202628 : if (GENERAL_REG_P (operands[0]))
5974 : return "%vmovd\t{%1, %k0|%k0, %1}";
5975 146136 : else if (GENERAL_REG_P (operands[1]))
5976 : return "%vmovd\t{%k1, %0|%0, %k1}";
5977 : else
5978 60646 : return "%vmovd\t{%1, %0|%0, %1}";
5979 :
5980 54085 : case MODE_HI:
5981 54085 : if (GENERAL_REG_P (operands[0]))
5982 : return "vmovw\t{%1, %k0|%k0, %1}";
5983 53922 : else if (GENERAL_REG_P (operands[1]))
5984 : return "vmovw\t{%k1, %0|%0, %k1}";
5985 : else
5986 53688 : return "vmovw\t{%1, %0|%0, %1}";
5987 :
5988 780855 : case MODE_DF:
5989 780855 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5990 : return "vmovsd\t{%d1, %0|%0, %d1}";
5991 : else
5992 780013 : return "%vmovsd\t{%1, %0|%0, %1}";
5993 :
5994 672548 : case MODE_SF:
5995 672548 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5996 : return "vmovss\t{%d1, %0|%0, %d1}";
5997 : else
5998 672008 : return "%vmovss\t{%1, %0|%0, %1}";
5999 :
6000 96 : case MODE_HF:
6001 96 : case MODE_BF:
6002 96 : if (REG_P (operands[0]) && REG_P (operands[1]))
6003 : return "vmovsh\t{%d1, %0|%0, %d1}";
6004 : else
6005 0 : return "vmovsh\t{%1, %0|%0, %1}";
6006 :
6007 36 : case MODE_V1DF:
6008 36 : gcc_assert (!TARGET_AVX);
6009 : return "movlpd\t{%1, %0|%0, %1}";
6010 :
6011 3980 : case MODE_V2SF:
6012 3980 : if (TARGET_AVX && REG_P (operands[0]))
6013 : return "vmovlps\t{%1, %d0|%d0, %1}";
6014 : else
6015 3907 : return "%vmovlps\t{%1, %0|%0, %1}";
6016 :
6017 0 : default:
6018 0 : gcc_unreachable ();
6019 : }
6020 : }
6021 :
6022 : /* Returns true if OP contains a symbol reference */
6023 :
6024 : bool
6025 573061797 : symbolic_reference_mentioned_p (const_rtx op)
6026 : {
6027 573061797 : const char *fmt;
6028 573061797 : int i;
6029 :
6030 573061797 : if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
6031 : return true;
6032 :
6033 433219426 : fmt = GET_RTX_FORMAT (GET_CODE (op));
6034 735207357 : for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6035 : {
6036 586551683 : if (fmt[i] == 'E')
6037 : {
6038 2021445 : int j;
6039 :
6040 4044058 : for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6041 3329423 : if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6042 : return true;
6043 : }
6044 :
6045 584530238 : else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6046 : return true;
6047 : }
6048 :
6049 : return false;
6050 : }
6051 :
6052 : /* Return true if it is appropriate to emit `ret' instructions in the
6053 : body of a function. Do this only if the epilogue is simple, needing a
6054 : couple of insns. Prior to reloading, we can't tell how many registers
6055 : must be saved, so return false then. Return false if there is no frame
6056 : marker to de-allocate. */
6057 :
6058 : bool
6059 0 : ix86_can_use_return_insn_p (void)
6060 : {
6061 0 : if (ix86_function_ms_hook_prologue (current_function_decl))
6062 : return false;
6063 :
6064 0 : if (ix86_function_naked (current_function_decl))
6065 : return false;
6066 :
6067 : /* Don't use `ret' instruction in interrupt handler. */
6068 0 : if (! reload_completed
6069 0 : || frame_pointer_needed
6070 0 : || cfun->machine->func_type != TYPE_NORMAL)
6071 : return 0;
6072 :
6073 : /* Don't allow more than 32k pop, since that's all we can do
6074 : with one instruction. */
6075 0 : if (crtl->args.pops_args && crtl->args.size >= 32768)
6076 : return 0;
6077 :
6078 0 : struct ix86_frame &frame = cfun->machine->frame;
6079 0 : return (frame.stack_pointer_offset == UNITS_PER_WORD
6080 0 : && (frame.nregs + frame.nsseregs) == 0);
6081 : }
6082 :
6083 : /* Return stack frame size. get_frame_size () returns used stack slots
6084 : during compilation, which may be optimized out later. If stack frame
6085 : is needed, stack_frame_required should be true. */
6086 :
6087 : static HOST_WIDE_INT
6088 8275840 : ix86_get_frame_size (void)
6089 : {
6090 8275840 : if (cfun->machine->stack_frame_required)
6091 8206235 : return get_frame_size ();
6092 : else
6093 : return 0;
6094 : }
6095 :
6096 : /* Value should be nonzero if functions must have frame pointers.
6097 : Zero means the frame pointer need not be set up (and parms may
6098 : be accessed via the stack pointer) in functions that seem suitable. */
6099 :
6100 : static bool
6101 1222360 : ix86_frame_pointer_required (void)
6102 : {
6103 : /* If we accessed previous frames, then the generated code expects
6104 : to be able to access the saved ebp value in our frame. */
6105 1222360 : if (cfun->machine->accesses_prev_frame)
6106 : return true;
6107 :
6108 : /* Several x86 os'es need a frame pointer for other reasons,
6109 : usually pertaining to setjmp. */
6110 1222327 : if (SUBTARGET_FRAME_POINTER_REQUIRED)
6111 : return true;
6112 :
6113 : /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6114 1222327 : if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6115 : return true;
6116 :
6117 : /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6118 : allocation is 4GB. */
6119 1222327 : if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6120 : return true;
6121 :
6122 : /* SSE saves require frame-pointer when stack is misaligned. */
6123 1222327 : if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6124 : return true;
6125 :
6126 : /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6127 : turns off the frame pointer by default. Turn it back on now if
6128 : we've not got a leaf function. */
6129 1222326 : if (TARGET_OMIT_LEAF_FRAME_POINTER
6130 1222326 : && (!crtl->is_leaf
6131 0 : || ix86_current_function_calls_tls_descriptor))
6132 0 : return true;
6133 :
6134 : /* Several versions of mcount for the x86 assumes that there is a
6135 : frame, so we cannot allow profiling without a frame pointer. */
6136 1222326 : if (crtl->profile && !flag_fentry)
6137 : return true;
6138 :
6139 : return false;
6140 : }
6141 :
6142 : /* Record that the current function accesses previous call frames. */
6143 :
6144 : void
6145 966 : ix86_setup_frame_addresses (void)
6146 : {
6147 966 : cfun->machine->accesses_prev_frame = 1;
6148 966 : }
6149 :
6150 : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6151 : # define USE_HIDDEN_LINKONCE 1
6152 : #else
6153 : # define USE_HIDDEN_LINKONCE 0
6154 : #endif
6155 :
6156 : /* Label count for call and return thunks. It is used to make unique
6157 : labels in call and return thunks. */
6158 : static int indirectlabelno;
6159 :
6160 : /* True if call thunk function is needed. */
6161 : static bool indirect_thunk_needed = false;
6162 :
6163 : /* Bit masks of integer registers, which contain branch target, used
6164 : by call thunk functions. */
6165 : static HARD_REG_SET indirect_thunks_used;
6166 :
6167 : /* True if return thunk function is needed. */
6168 : static bool indirect_return_needed = false;
6169 :
6170 : /* True if return thunk function via CX is needed. */
6171 : static bool indirect_return_via_cx;
6172 :
6173 : #ifndef INDIRECT_LABEL
6174 : # define INDIRECT_LABEL "LIND"
6175 : #endif
6176 :
6177 : /* Indicate what prefix is needed for an indirect branch. */
6178 : enum indirect_thunk_prefix
6179 : {
6180 : indirect_thunk_prefix_none,
6181 : indirect_thunk_prefix_nt
6182 : };
6183 :
6184 : /* Return the prefix needed for an indirect branch INSN. */
6185 :
6186 : enum indirect_thunk_prefix
6187 67 : indirect_thunk_need_prefix (rtx_insn *insn)
6188 : {
6189 67 : enum indirect_thunk_prefix need_prefix;
6190 67 : if ((cfun->machine->indirect_branch_type
6191 67 : == indirect_branch_thunk_extern)
6192 67 : && ix86_notrack_prefixed_insn_p (insn))
6193 : {
6194 : /* NOTRACK prefix is only used with external thunk so that it
6195 : can be properly updated to support CET at run-time. */
6196 : need_prefix = indirect_thunk_prefix_nt;
6197 : }
6198 : else
6199 : need_prefix = indirect_thunk_prefix_none;
6200 67 : return need_prefix;
6201 : }
6202 :
6203 : /* Fills in the label name that should be used for the indirect thunk. */
6204 :
6205 : static void
6206 73 : indirect_thunk_name (char name[32], unsigned int regno,
6207 : enum indirect_thunk_prefix need_prefix,
6208 : bool ret_p)
6209 : {
6210 73 : if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6211 0 : gcc_unreachable ();
6212 :
6213 73 : if (USE_HIDDEN_LINKONCE)
6214 : {
6215 73 : const char *prefix;
6216 :
6217 73 : if (need_prefix == indirect_thunk_prefix_nt
6218 73 : && regno != INVALID_REGNUM)
6219 : {
6220 : /* NOTRACK prefix is only used with external thunk via
6221 : register so that NOTRACK prefix can be added to indirect
6222 : branch via register to support CET at run-time. */
6223 : prefix = "_nt";
6224 : }
6225 : else
6226 71 : prefix = "";
6227 :
6228 73 : const char *ret = ret_p ? "return" : "indirect";
6229 :
6230 73 : if (regno != INVALID_REGNUM)
6231 : {
6232 55 : const char *reg_prefix;
6233 55 : if (LEGACY_INT_REGNO_P (regno))
6234 53 : reg_prefix = TARGET_64BIT ? "r" : "e";
6235 : else
6236 : reg_prefix = "";
6237 55 : sprintf (name, "__x86_%s_thunk%s_%s%s",
6238 : ret, prefix, reg_prefix, reg_names[regno]);
6239 : }
6240 : else
6241 18 : sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6242 : }
6243 : else
6244 : {
6245 : if (regno != INVALID_REGNUM)
6246 : ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6247 : else
6248 : {
6249 : if (ret_p)
6250 : ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6251 : else
6252 73 : ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6253 : }
6254 : }
6255 73 : }
6256 :
6257 : /* Output a call and return thunk for indirect branch. If REGNO != -1,
6258 : the function address is in REGNO and the call and return thunk looks like:
6259 :
6260 : call L2
6261 : L1:
6262 : pause
6263 : lfence
6264 : jmp L1
6265 : L2:
6266 : mov %REG, (%sp)
6267 : ret
6268 :
6269 : Otherwise, the function address is on the top of stack and the
6270 : call and return thunk looks like:
6271 :
6272 : call L2
6273 : L1:
6274 : pause
6275 : lfence
6276 : jmp L1
6277 : L2:
6278 : lea WORD_SIZE(%sp), %sp
6279 : ret
6280 : */
6281 :
6282 : static void
6283 38 : output_indirect_thunk (unsigned int regno)
6284 : {
6285 38 : char indirectlabel1[32];
6286 38 : char indirectlabel2[32];
6287 :
6288 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6289 : indirectlabelno++);
6290 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6291 : indirectlabelno++);
6292 :
6293 : /* Call */
6294 38 : fputs ("\tcall\t", asm_out_file);
6295 38 : assemble_name_raw (asm_out_file, indirectlabel2);
6296 38 : fputc ('\n', asm_out_file);
6297 :
6298 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6299 :
6300 : /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6301 : Usage of both pause + lfence is compromise solution. */
6302 38 : fprintf (asm_out_file, "\tpause\n\tlfence\n");
6303 :
6304 : /* Jump. */
6305 38 : fputs ("\tjmp\t", asm_out_file);
6306 38 : assemble_name_raw (asm_out_file, indirectlabel1);
6307 38 : fputc ('\n', asm_out_file);
6308 :
6309 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6310 :
6311 : /* The above call insn pushed a word to stack. Adjust CFI info. */
6312 38 : if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6313 : {
6314 38 : if (! dwarf2out_do_cfi_asm ())
6315 : {
6316 0 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6317 0 : xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6318 0 : xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6319 0 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6320 : }
6321 38 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6322 38 : xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6323 38 : xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6324 38 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6325 38 : dwarf2out_emit_cfi (xcfi);
6326 : }
6327 :
6328 38 : if (regno != INVALID_REGNUM)
6329 : {
6330 : /* MOV. */
6331 27 : rtx xops[2];
6332 27 : xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6333 27 : xops[1] = gen_rtx_REG (word_mode, regno);
6334 27 : output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6335 : }
6336 : else
6337 : {
6338 : /* LEA. */
6339 11 : rtx xops[2];
6340 11 : xops[0] = stack_pointer_rtx;
6341 11 : xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6342 11 : output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6343 : }
6344 :
6345 38 : fputs ("\tret\n", asm_out_file);
6346 38 : if ((ix86_harden_sls & harden_sls_return))
6347 1 : fputs ("\tint3\n", asm_out_file);
6348 38 : }
6349 :
6350 : /* Output a function with a call and return thunk for indirect branch.
6351 : If REGNO != INVALID_REGNUM, the function address is in REGNO.
6352 : Otherwise, the function address is on the top of stack. Thunk is
6353 : used for function return if RET_P is true. */
6354 :
6355 : static void
6356 22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6357 : unsigned int regno, bool ret_p)
6358 : {
6359 22 : char name[32];
6360 22 : tree decl;
6361 :
6362 : /* Create __x86_indirect_thunk. */
6363 22 : indirect_thunk_name (name, regno, need_prefix, ret_p);
6364 22 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6365 : get_identifier (name),
6366 : build_function_type_list (void_type_node, NULL_TREE));
6367 22 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6368 : NULL_TREE, void_type_node);
6369 22 : TREE_PUBLIC (decl) = 1;
6370 22 : TREE_STATIC (decl) = 1;
6371 22 : DECL_IGNORED_P (decl) = 1;
6372 :
6373 : #if TARGET_MACHO
6374 : if (TARGET_MACHO)
6375 : {
6376 : switch_to_section (darwin_sections[picbase_thunk_section]);
6377 : fputs ("\t.weak_definition\t", asm_out_file);
6378 : assemble_name (asm_out_file, name);
6379 : fputs ("\n\t.private_extern\t", asm_out_file);
6380 : assemble_name (asm_out_file, name);
6381 : putc ('\n', asm_out_file);
6382 : ASM_OUTPUT_LABEL (asm_out_file, name);
6383 : DECL_WEAK (decl) = 1;
6384 : }
6385 : else
6386 : #endif
6387 22 : if (USE_HIDDEN_LINKONCE)
6388 : {
6389 22 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6390 :
6391 22 : targetm.asm_out.unique_section (decl, 0);
6392 22 : switch_to_section (get_named_section (decl, NULL, 0));
6393 :
6394 22 : targetm.asm_out.globalize_label (asm_out_file, name);
6395 22 : fputs ("\t.hidden\t", asm_out_file);
6396 22 : assemble_name (asm_out_file, name);
6397 22 : putc ('\n', asm_out_file);
6398 22 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6399 : }
6400 : else
6401 : {
6402 : switch_to_section (text_section);
6403 22 : ASM_OUTPUT_LABEL (asm_out_file, name);
6404 : }
6405 :
6406 22 : DECL_INITIAL (decl) = make_node (BLOCK);
6407 22 : current_function_decl = decl;
6408 22 : allocate_struct_function (decl, false);
6409 22 : init_function_start (decl);
6410 : /* We're about to hide the function body from callees of final_* by
6411 : emitting it directly; tell them we're a thunk, if they care. */
6412 22 : cfun->is_thunk = true;
6413 22 : first_function_block_is_cold = false;
6414 : /* Make sure unwind info is emitted for the thunk if needed. */
6415 22 : final_start_function (emit_barrier (), asm_out_file, 1);
6416 :
6417 22 : output_indirect_thunk (regno);
6418 :
6419 22 : final_end_function ();
6420 22 : init_insn_lengths ();
6421 22 : free_after_compilation (cfun);
6422 22 : set_cfun (NULL);
6423 22 : current_function_decl = NULL;
6424 22 : }
6425 :
6426 : static int pic_labels_used;
6427 :
6428 : /* Fills in the label name that should be used for a pc thunk for
6429 : the given register. */
6430 :
6431 : static void
6432 37467 : get_pc_thunk_name (char name[32], unsigned int regno)
6433 : {
6434 37467 : gcc_assert (!TARGET_64BIT);
6435 :
6436 37467 : if (USE_HIDDEN_LINKONCE)
6437 37467 : sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6438 : else
6439 37467 : ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6440 37467 : }
6441 :
6442 :
6443 : /* This function generates code for -fpic that loads %ebx with
6444 : the return address of the caller and then returns. */
6445 :
6446 : static void
6447 238327 : ix86_code_end (void)
6448 : {
6449 238327 : rtx xops[2];
6450 238327 : unsigned int regno;
6451 :
6452 238327 : if (indirect_return_needed)
6453 6 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6454 : INVALID_REGNUM, true);
6455 238327 : if (indirect_return_via_cx)
6456 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6457 : CX_REG, true);
6458 238327 : if (indirect_thunk_needed)
6459 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6460 : INVALID_REGNUM, false);
6461 :
6462 2144943 : for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6463 : {
6464 1906616 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6465 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6466 : regno, false);
6467 : }
6468 :
6469 4051559 : for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6470 : {
6471 3813232 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6472 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6473 : regno, false);
6474 : }
6475 :
6476 2144943 : for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6477 : {
6478 1906616 : char name[32];
6479 1906616 : tree decl;
6480 :
6481 1906616 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6482 16 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6483 : regno, false);
6484 :
6485 1906616 : if (!(pic_labels_used & (1 << regno)))
6486 1903032 : continue;
6487 :
6488 3584 : get_pc_thunk_name (name, regno);
6489 :
6490 3584 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6491 : get_identifier (name),
6492 : build_function_type_list (void_type_node, NULL_TREE));
6493 3584 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6494 : NULL_TREE, void_type_node);
6495 3584 : TREE_PUBLIC (decl) = 1;
6496 3584 : TREE_STATIC (decl) = 1;
6497 3584 : DECL_IGNORED_P (decl) = 1;
6498 :
6499 : #if TARGET_MACHO
6500 : if (TARGET_MACHO)
6501 : {
6502 : switch_to_section (darwin_sections[picbase_thunk_section]);
6503 : fputs ("\t.weak_definition\t", asm_out_file);
6504 : assemble_name (asm_out_file, name);
6505 : fputs ("\n\t.private_extern\t", asm_out_file);
6506 : assemble_name (asm_out_file, name);
6507 : putc ('\n', asm_out_file);
6508 : ASM_OUTPUT_LABEL (asm_out_file, name);
6509 : DECL_WEAK (decl) = 1;
6510 : }
6511 : else
6512 : #endif
6513 3584 : if (USE_HIDDEN_LINKONCE)
6514 : {
6515 3584 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6516 :
6517 3584 : targetm.asm_out.unique_section (decl, 0);
6518 3584 : switch_to_section (get_named_section (decl, NULL, 0));
6519 :
6520 3584 : targetm.asm_out.globalize_label (asm_out_file, name);
6521 3584 : fputs ("\t.hidden\t", asm_out_file);
6522 3584 : assemble_name (asm_out_file, name);
6523 3584 : putc ('\n', asm_out_file);
6524 3584 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6525 : }
6526 : else
6527 : {
6528 : switch_to_section (text_section);
6529 3584 : ASM_OUTPUT_LABEL (asm_out_file, name);
6530 : }
6531 :
6532 3584 : DECL_INITIAL (decl) = make_node (BLOCK);
6533 3584 : current_function_decl = decl;
6534 3584 : allocate_struct_function (decl, false);
6535 3584 : init_function_start (decl);
6536 : /* We're about to hide the function body from callees of final_* by
6537 : emitting it directly; tell them we're a thunk, if they care. */
6538 3584 : cfun->is_thunk = true;
6539 3584 : first_function_block_is_cold = false;
6540 : /* Make sure unwind info is emitted for the thunk if needed. */
6541 3584 : final_start_function (emit_barrier (), asm_out_file, 1);
6542 :
6543 : /* Pad stack IP move with 4 instructions (two NOPs count
6544 : as one instruction). */
6545 3584 : if (TARGET_PAD_SHORT_FUNCTION)
6546 : {
6547 : int i = 8;
6548 :
6549 0 : while (i--)
6550 0 : fputs ("\tnop\n", asm_out_file);
6551 : }
6552 :
6553 7168 : xops[0] = gen_rtx_REG (Pmode, regno);
6554 7168 : xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6555 3584 : output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6556 3584 : fputs ("\tret\n", asm_out_file);
6557 3584 : final_end_function ();
6558 3584 : init_insn_lengths ();
6559 3584 : free_after_compilation (cfun);
6560 3584 : set_cfun (NULL);
6561 3584 : current_function_decl = NULL;
6562 : }
6563 :
6564 238327 : if (flag_split_stack)
6565 4710 : file_end_indicate_split_stack ();
6566 238327 : }
6567 :
6568 : /* Emit code for the SET_GOT patterns. */
6569 :
6570 : const char *
6571 33883 : output_set_got (rtx dest, rtx label)
6572 : {
6573 33883 : rtx xops[3];
6574 :
6575 33883 : xops[0] = dest;
6576 :
6577 33883 : if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
6578 : {
6579 : /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6580 : xops[2] = gen_rtx_MEM (Pmode,
6581 : gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6582 : output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6583 :
6584 : /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6585 : Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6586 : an unadorned address. */
6587 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6588 : SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6589 : output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6590 : return "";
6591 : }
6592 :
6593 67766 : xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6594 :
6595 33883 : if (flag_pic)
6596 : {
6597 33883 : char name[32];
6598 33883 : get_pc_thunk_name (name, REGNO (dest));
6599 33883 : pic_labels_used |= 1 << REGNO (dest);
6600 :
6601 67766 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6602 33883 : xops[2] = gen_rtx_MEM (QImode, xops[2]);
6603 33883 : output_asm_insn ("%!call\t%X2", xops);
6604 :
6605 : #if TARGET_MACHO
6606 : /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6607 : This is what will be referenced by the Mach-O PIC subsystem. */
6608 : if (machopic_should_output_picbase_label () || !label)
6609 : ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6610 :
6611 : /* When we are restoring the pic base at the site of a nonlocal label,
6612 : and we decided to emit the pic base above, we will still output a
6613 : local label used for calculating the correction offset (even though
6614 : the offset will be 0 in that case). */
6615 : if (label)
6616 : targetm.asm_out.internal_label (asm_out_file, "L",
6617 : CODE_LABEL_NUMBER (label));
6618 : #endif
6619 : }
6620 : else
6621 : {
6622 0 : if (TARGET_MACHO)
6623 : /* We don't need a pic base, we're not producing pic. */
6624 : gcc_unreachable ();
6625 :
6626 0 : xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6627 0 : output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6628 0 : targetm.asm_out.internal_label (asm_out_file, "L",
6629 0 : CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6630 : }
6631 :
6632 33883 : if (!TARGET_MACHO)
6633 33883 : output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6634 :
6635 33883 : return "";
6636 : }
6637 :
6638 : /* Generate an "push" pattern for input ARG. */
6639 :
6640 : rtx
6641 1891399 : gen_push (rtx arg, bool ppx_p)
6642 : {
6643 1891399 : struct machine_function *m = cfun->machine;
6644 :
6645 1891399 : if (m->fs.cfa_reg == stack_pointer_rtx)
6646 1613981 : m->fs.cfa_offset += UNITS_PER_WORD;
6647 1891399 : m->fs.sp_offset += UNITS_PER_WORD;
6648 :
6649 1891399 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6650 36 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6651 :
6652 1891399 : rtx stack = gen_rtx_MEM (word_mode,
6653 1891399 : gen_rtx_PRE_DEC (Pmode,
6654 : stack_pointer_rtx));
6655 3782710 : return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6656 : }
6657 :
6658 : rtx
6659 21 : gen_pushfl (void)
6660 : {
6661 21 : struct machine_function *m = cfun->machine;
6662 21 : rtx flags, mem;
6663 :
6664 21 : if (m->fs.cfa_reg == stack_pointer_rtx)
6665 0 : m->fs.cfa_offset += UNITS_PER_WORD;
6666 21 : m->fs.sp_offset += UNITS_PER_WORD;
6667 :
6668 21 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6669 :
6670 21 : mem = gen_rtx_MEM (word_mode,
6671 21 : gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6672 :
6673 21 : return gen_pushfl2 (word_mode, mem, flags);
6674 : }
6675 :
6676 : /* Generate an "pop" pattern for input ARG. */
6677 :
6678 : rtx
6679 1469017 : gen_pop (rtx arg, bool ppx_p)
6680 : {
6681 1469017 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6682 32 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6683 :
6684 1469017 : rtx stack = gen_rtx_MEM (word_mode,
6685 1469017 : gen_rtx_POST_INC (Pmode,
6686 : stack_pointer_rtx));
6687 :
6688 2937946 : return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6689 : }
6690 :
6691 : rtx
6692 21 : gen_popfl (void)
6693 : {
6694 21 : rtx flags, mem;
6695 :
6696 21 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6697 :
6698 21 : mem = gen_rtx_MEM (word_mode,
6699 21 : gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6700 :
6701 21 : return gen_popfl1 (word_mode, flags, mem);
6702 : }
6703 :
6704 : /* Generate a "push2" pattern for input ARG. */
6705 : rtx
6706 19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6707 : {
6708 19 : struct machine_function *m = cfun->machine;
6709 19 : const int offset = UNITS_PER_WORD * 2;
6710 :
6711 19 : if (m->fs.cfa_reg == stack_pointer_rtx)
6712 14 : m->fs.cfa_offset += offset;
6713 19 : m->fs.sp_offset += offset;
6714 :
6715 19 : if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6716 0 : reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6717 :
6718 19 : if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6719 0 : reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6720 :
6721 19 : return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6722 4 : : gen_push2_di (mem, reg1, reg2);
6723 : }
6724 :
6725 : /* Return >= 0 if there is an unused call-clobbered register available
6726 : for the entire function. */
6727 :
6728 : static unsigned int
6729 0 : ix86_select_alt_pic_regnum (void)
6730 : {
6731 0 : if (ix86_use_pseudo_pic_reg ())
6732 : return INVALID_REGNUM;
6733 :
6734 0 : if (crtl->is_leaf
6735 0 : && !crtl->profile
6736 0 : && !ix86_current_function_calls_tls_descriptor)
6737 : {
6738 0 : int i, drap;
6739 : /* Can't use the same register for both PIC and DRAP. */
6740 0 : if (crtl->drap_reg)
6741 0 : drap = REGNO (crtl->drap_reg);
6742 : else
6743 : drap = -1;
6744 0 : for (i = 2; i >= 0; --i)
6745 0 : if (i != drap && !df_regs_ever_live_p (i))
6746 : return i;
6747 : }
6748 :
6749 : return INVALID_REGNUM;
6750 : }
6751 :
6752 : /* Return true if REGNO is used by the epilogue. */
6753 :
6754 : bool
6755 1668288998 : ix86_epilogue_uses (int regno)
6756 : {
6757 : /* If there are no caller-saved registers, we preserve all registers,
6758 : except for MMX and x87 registers which aren't supported when saving
6759 : and restoring registers. Don't explicitly save SP register since
6760 : it is always preserved. */
6761 1668288998 : return (epilogue_completed
6762 263688130 : && (cfun->machine->call_saved_registers
6763 263688130 : == TYPE_NO_CALLER_SAVED_REGISTERS)
6764 27140 : && !fixed_regs[regno]
6765 4857 : && !STACK_REGNO_P (regno)
6766 1668293855 : && !MMX_REGNO_P (regno));
6767 : }
6768 :
6769 : /* Return nonzero if register REGNO can be used as a scratch register
6770 : in peephole2. */
6771 :
6772 : static bool
6773 1220092 : ix86_hard_regno_scratch_ok (unsigned int regno)
6774 : {
6775 : /* If there are no caller-saved registers, we can't use any register
6776 : as a scratch register after epilogue and use REGNO as scratch
6777 : register only if it has been used before to avoid saving and
6778 : restoring it. */
6779 1220092 : return ((cfun->machine->call_saved_registers
6780 1220092 : != TYPE_NO_CALLER_SAVED_REGISTERS)
6781 1220092 : || (!epilogue_completed
6782 0 : && df_regs_ever_live_p (regno)));
6783 : }
6784 :
6785 : /* Return TRUE if we need to save REGNO. */
6786 :
6787 : bool
6788 354412398 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6789 : {
6790 354412398 : rtx reg;
6791 :
6792 : /* Save and restore DRAP register between prologue and epilogue so
6793 : that stack pointer can be restored. */
6794 354412398 : if (crtl->drap_reg
6795 2285938 : && regno == REGNO (crtl->drap_reg)
6796 354468027 : && !cfun->machine->no_drap_save_restore)
6797 : return true;
6798 :
6799 354356769 : switch (cfun->machine->call_saved_registers)
6800 : {
6801 : case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6802 : break;
6803 :
6804 57152 : case TYPE_NO_CALLER_SAVED_REGISTERS:
6805 : /* If there are no caller-saved registers, we preserve all
6806 : registers, except for MMX and x87 registers which aren't
6807 : supported when saving and restoring registers. Don't
6808 : explicitly save SP register since it is always preserved.
6809 :
6810 : Don't preserve registers used for function return value. */
6811 57152 : reg = crtl->return_rtx;
6812 57152 : if (reg)
6813 : {
6814 768 : unsigned int i = REGNO (reg);
6815 768 : unsigned int nregs = REG_NREGS (reg);
6816 1522 : while (nregs-- > 0)
6817 768 : if ((i + nregs) == regno)
6818 : return false;
6819 : }
6820 :
6821 57138 : return (df_regs_ever_live_p (regno)
6822 6930 : && !fixed_regs[regno]
6823 5962 : && !STACK_REGNO_P (regno)
6824 5962 : && !MMX_REGNO_P (regno)
6825 63100 : && (regno != HARD_FRAME_POINTER_REGNUM
6826 249 : || !frame_pointer_needed));
6827 :
6828 18192 : case TYPE_NO_CALLEE_SAVED_REGISTERS:
6829 18192 : case TYPE_PRESERVE_NONE:
6830 18192 : if (regno != HARD_FRAME_POINTER_REGNUM)
6831 : return false;
6832 : break;
6833 : }
6834 :
6835 388250365 : if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6836 10780223 : && pic_offset_table_rtx)
6837 : {
6838 385014 : if (ix86_use_pseudo_pic_reg ())
6839 : {
6840 : /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6841 : _mcount in prologue. */
6842 385014 : if (!TARGET_64BIT && flag_pic && crtl->profile)
6843 : return true;
6844 : }
6845 0 : else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6846 0 : || crtl->profile
6847 0 : || crtl->calls_eh_return
6848 0 : || crtl->uses_const_pool
6849 0 : || cfun->has_nonlocal_label)
6850 0 : return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6851 : }
6852 :
6853 354281994 : if (crtl->calls_eh_return && maybe_eh_return)
6854 : {
6855 : unsigned i;
6856 13237 : for (i = 0; ; i++)
6857 : {
6858 20181 : unsigned test = EH_RETURN_DATA_REGNO (i);
6859 13671 : if (test == INVALID_REGNUM)
6860 : break;
6861 13671 : if (test == regno)
6862 : return true;
6863 13237 : }
6864 : }
6865 :
6866 354281560 : if (ignore_outlined && cfun->machine->call_ms2sysv)
6867 : {
6868 2641728 : unsigned count = cfun->machine->call_ms2sysv_extra_regs
6869 : + xlogue_layout::MIN_REGS;
6870 2641728 : if (xlogue_layout::is_stub_managed_reg (regno, count))
6871 : return false;
6872 : }
6873 :
6874 353781691 : return (df_regs_ever_live_p (regno)
6875 373148687 : && !call_used_or_fixed_reg_p (regno)
6876 372517980 : && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6877 : }
6878 :
6879 : /* Return number of saved general prupose registers. */
6880 :
6881 : static int
6882 8199850 : ix86_nsaved_regs (void)
6883 : {
6884 8199850 : int nregs = 0;
6885 8199850 : int regno;
6886 :
6887 762586050 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6888 754386200 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6889 8185098 : nregs ++;
6890 8199850 : return nregs;
6891 : }
6892 :
6893 : /* Return number of saved SSE registers. */
6894 :
6895 : static int
6896 8234936 : ix86_nsaved_sseregs (void)
6897 : {
6898 8234936 : int nregs = 0;
6899 8234936 : int regno;
6900 :
6901 7432672 : if (!TARGET_64BIT_MS_ABI
6902 8234936 : && (cfun->machine->call_saved_registers
6903 8009335 : != TYPE_NO_CALLER_SAVED_REGISTERS))
6904 : return 0;
6905 21049434 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6906 20823096 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6907 1897045 : nregs ++;
6908 : return nregs;
6909 : }
6910 :
6911 : /* Given FROM and TO register numbers, say whether this elimination is
6912 : allowed. If stack alignment is needed, we can only replace argument
6913 : pointer with hard frame pointer, or replace frame pointer with stack
6914 : pointer. Otherwise, frame pointer elimination is automatically
6915 : handled and all other eliminations are valid. */
6916 :
6917 : static bool
6918 48536735 : ix86_can_eliminate (const int from, const int to)
6919 : {
6920 48536735 : if (stack_realign_fp)
6921 1713236 : return ((from == ARG_POINTER_REGNUM
6922 1713236 : && to == HARD_FRAME_POINTER_REGNUM)
6923 1713236 : || (from == FRAME_POINTER_REGNUM
6924 1713236 : && to == STACK_POINTER_REGNUM));
6925 : else
6926 86984450 : return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6927 : }
6928 :
6929 : /* Return the offset between two registers, one to be eliminated, and the other
6930 : its replacement, at the start of a routine. */
6931 :
6932 : HOST_WIDE_INT
6933 141386275 : ix86_initial_elimination_offset (int from, int to)
6934 : {
6935 141386275 : struct ix86_frame &frame = cfun->machine->frame;
6936 :
6937 141386275 : if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6938 10450296 : return frame.hard_frame_pointer_offset;
6939 130935979 : else if (from == FRAME_POINTER_REGNUM
6940 130935979 : && to == HARD_FRAME_POINTER_REGNUM)
6941 8164438 : return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6942 : else
6943 : {
6944 122771541 : gcc_assert (to == STACK_POINTER_REGNUM);
6945 :
6946 122771541 : if (from == ARG_POINTER_REGNUM)
6947 114607103 : return frame.stack_pointer_offset;
6948 :
6949 8164438 : gcc_assert (from == FRAME_POINTER_REGNUM);
6950 8164438 : return frame.stack_pointer_offset - frame.frame_pointer_offset;
6951 : }
6952 : }
6953 :
6954 : /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6955 : void
6956 0 : warn_once_call_ms2sysv_xlogues (const char *feature)
6957 : {
6958 0 : static bool warned_once = false;
6959 0 : if (!warned_once)
6960 : {
6961 0 : warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6962 : feature);
6963 0 : warned_once = true;
6964 : }
6965 0 : }
6966 :
6967 : /* Return the probing interval for -fstack-clash-protection. */
6968 :
6969 : static HOST_WIDE_INT
6970 495 : get_probe_interval (void)
6971 : {
6972 341 : if (flag_stack_clash_protection)
6973 412 : return (HOST_WIDE_INT_1U
6974 412 : << param_stack_clash_protection_probe_interval);
6975 : else
6976 : return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6977 : }
6978 :
6979 : /* When using -fsplit-stack, the allocation routines set a field in
6980 : the TCB to the bottom of the stack plus this much space, measured
6981 : in bytes. */
6982 :
6983 : #define SPLIT_STACK_AVAILABLE 256
6984 :
6985 : /* Return true if push2/pop2 can be generated. */
6986 :
6987 : static bool
6988 8200506 : ix86_can_use_push2pop2 (void)
6989 : {
6990 : /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6991 8200506 : unsigned int incoming_stack_boundary
6992 8200506 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6993 8200506 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6994 8200506 : return incoming_stack_boundary % 128 == 0;
6995 : }
6996 :
6997 : /* Helper function to determine whether push2/pop2 can be used in prologue or
6998 : epilogue for register save/restore. */
6999 : static bool
7000 8199850 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
7001 : {
7002 8199850 : if (!ix86_can_use_push2pop2 ())
7003 : return false;
7004 8163939 : int aligned = cfun->machine->fs.sp_offset % 16 == 0;
7005 8163939 : return TARGET_APX_PUSH2POP2
7006 2852 : && !cfun->machine->frame.save_regs_using_mov
7007 2840 : && cfun->machine->func_type == TYPE_NORMAL
7008 8166771 : && (nregs + aligned) >= 3;
7009 : }
7010 :
7011 : /* Check if push/pop should be used to save/restore registers. */
7012 : static bool
7013 8921123 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
7014 : {
7015 3207936 : return ((!to_allocate && cfun->machine->frame.nregs <= 1)
7016 5958988 : || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7017 : /* If static stack checking is enabled and done with probes,
7018 : the registers need to be saved before allocating the frame. */
7019 5958327 : || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7020 : /* If stack clash probing needs a loop, then it needs a
7021 : scratch register. But the returned register is only guaranteed
7022 : to be safe to use after register saves are complete. So if
7023 : stack clash protections are enabled and the allocated frame is
7024 : larger than the probe interval, then use pushes to save
7025 : callee saved registers. */
7026 14879376 : || (flag_stack_clash_protection
7027 341 : && !ix86_target_stack_probe ()
7028 341 : && to_allocate > get_probe_interval ()));
7029 : }
7030 :
7031 : /* Fill structure ix86_frame about frame of currently computed function. */
7032 :
7033 : static void
7034 8199850 : ix86_compute_frame_layout (void)
7035 : {
7036 8199850 : struct ix86_frame *frame = &cfun->machine->frame;
7037 8199850 : struct machine_function *m = cfun->machine;
7038 8199850 : unsigned HOST_WIDE_INT stack_alignment_needed;
7039 8199850 : HOST_WIDE_INT offset;
7040 8199850 : unsigned HOST_WIDE_INT preferred_alignment;
7041 8199850 : HOST_WIDE_INT size = ix86_get_frame_size ();
7042 8199850 : HOST_WIDE_INT to_allocate;
7043 :
7044 : /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
7045 : * ms_abi functions that call a sysv function. We now need to prune away
7046 : * cases where it should be disabled. */
7047 8199850 : if (TARGET_64BIT && m->call_ms2sysv)
7048 : {
7049 35225 : gcc_assert (TARGET_64BIT_MS_ABI);
7050 35225 : gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
7051 35225 : gcc_assert (!TARGET_SEH);
7052 35225 : gcc_assert (TARGET_SSE);
7053 35225 : gcc_assert (!ix86_using_red_zone ());
7054 :
7055 35225 : if (crtl->calls_eh_return)
7056 : {
7057 0 : gcc_assert (!reload_completed);
7058 0 : m->call_ms2sysv = false;
7059 0 : warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
7060 : }
7061 :
7062 35225 : else if (ix86_static_chain_on_stack)
7063 : {
7064 0 : gcc_assert (!reload_completed);
7065 0 : m->call_ms2sysv = false;
7066 0 : warn_once_call_ms2sysv_xlogues ("static call chains");
7067 : }
7068 :
7069 : /* Finally, compute which registers the stub will manage. */
7070 : else
7071 : {
7072 35225 : unsigned count = xlogue_layout::count_stub_managed_regs ();
7073 35225 : m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
7074 35225 : m->call_ms2sysv_pad_in = 0;
7075 : }
7076 : }
7077 :
7078 8199850 : frame->nregs = ix86_nsaved_regs ();
7079 8199850 : frame->nsseregs = ix86_nsaved_sseregs ();
7080 :
7081 : /* 64-bit MS ABI seem to require stack alignment to be always 16,
7082 : except for function prologues, leaf functions and when the default
7083 : incoming stack boundary is overridden at command line or via
7084 : force_align_arg_pointer attribute.
7085 :
7086 : Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
7087 : at call sites, including profile function calls.
7088 :
7089 : For APX push2/pop2, the stack also requires 128b alignment. */
7090 8199850 : if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
7091 65 : && crtl->preferred_stack_boundary < 128)
7092 8199913 : || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
7093 225599 : && crtl->preferred_stack_boundary < 128)
7094 0 : && (!crtl->is_leaf || cfun->calls_alloca != 0
7095 0 : || ix86_current_function_calls_tls_descriptor
7096 0 : || (TARGET_MACHO && crtl->profile)
7097 0 : || ix86_incoming_stack_boundary < 128)))
7098 : {
7099 2 : crtl->preferred_stack_boundary = 128;
7100 2 : if (crtl->stack_alignment_needed < 128)
7101 1 : crtl->stack_alignment_needed = 128;
7102 : }
7103 :
7104 8199850 : stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7105 8199850 : preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7106 :
7107 8199850 : gcc_assert (!size || stack_alignment_needed);
7108 9002085 : gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7109 8199850 : gcc_assert (preferred_alignment <= stack_alignment_needed);
7110 :
7111 : /* The only ABI saving SSE regs should be 64-bit ms_abi or with
7112 : no_caller_saved_registers attribute. */
7113 8199850 : gcc_assert (TARGET_64BIT
7114 : || (cfun->machine->call_saved_registers
7115 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7116 : || !frame->nsseregs);
7117 8199850 : if (TARGET_64BIT && m->call_ms2sysv)
7118 : {
7119 35225 : gcc_assert (stack_alignment_needed >= 16);
7120 35225 : gcc_assert ((cfun->machine->call_saved_registers
7121 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7122 : || !frame->nsseregs);
7123 : }
7124 :
7125 : /* For SEH we have to limit the amount of code movement into the prologue.
7126 : At present we do this via a BLOCKAGE, at which point there's very little
7127 : scheduling that can be done, which means that there's very little point
7128 : in doing anything except PUSHs. */
7129 8199850 : if (TARGET_SEH)
7130 : m->use_fast_prologue_epilogue = false;
7131 8199850 : else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7132 : {
7133 7863617 : int count = frame->nregs;
7134 7863617 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
7135 :
7136 : /* The fast prologue uses move instead of push to save registers. This
7137 : is significantly longer, but also executes faster as modern hardware
7138 : can execute the moves in parallel, but can't do that for push/pop.
7139 :
7140 : Be careful about choosing what prologue to emit: When function takes
7141 : many instructions to execute we may use slow version as well as in
7142 : case function is known to be outside hot spot (this is known with
7143 : feedback only). Weight the size of function by number of registers
7144 : to save as it is cheap to use one or two push instructions but very
7145 : slow to use many of them.
7146 :
7147 : Calling this hook multiple times with the same frame requirements
7148 : must produce the same layout, since the RA might otherwise be
7149 : unable to reach a fixed point or might fail its final sanity checks.
7150 : This means that once we've assumed that a function does or doesn't
7151 : have a particular size, we have to stick to that assumption
7152 : regardless of how the function has changed since. */
7153 7863617 : if (count)
7154 2600622 : count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7155 7863617 : if (node->frequency < NODE_FREQUENCY_NORMAL
7156 7168057 : || (flag_branch_probabilities
7157 1051 : && node->frequency < NODE_FREQUENCY_HOT))
7158 695890 : m->use_fast_prologue_epilogue = false;
7159 : else
7160 : {
7161 7167727 : if (count != frame->expensive_count)
7162 : {
7163 284424 : frame->expensive_count = count;
7164 284424 : frame->expensive_p = expensive_function_p (count);
7165 : }
7166 7167727 : m->use_fast_prologue_epilogue = !frame->expensive_p;
7167 : }
7168 : }
7169 :
7170 8199850 : frame->save_regs_using_mov
7171 8199850 : = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7172 :
7173 : /* Skip return address and error code in exception handler. */
7174 8199850 : offset = INCOMING_FRAME_SP_OFFSET;
7175 :
7176 : /* Skip pushed static chain. */
7177 8199850 : if (ix86_static_chain_on_stack)
7178 0 : offset += UNITS_PER_WORD;
7179 :
7180 : /* Skip saved base pointer. */
7181 8199850 : if (frame_pointer_needed)
7182 2828267 : offset += UNITS_PER_WORD;
7183 8199850 : frame->hfp_save_offset = offset;
7184 :
7185 : /* The traditional frame pointer location is at the top of the frame. */
7186 8199850 : frame->hard_frame_pointer_offset = offset;
7187 :
7188 : /* Register save area */
7189 8199850 : offset += frame->nregs * UNITS_PER_WORD;
7190 8199850 : frame->reg_save_offset = offset;
7191 :
7192 : /* Calculate the size of the va-arg area (not including padding, if any). */
7193 8199850 : frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7194 :
7195 : /* Also adjust stack_realign_offset for the largest alignment of
7196 : stack slot actually used. */
7197 8199850 : if (stack_realign_fp
7198 7891611 : || (cfun->machine->max_used_stack_alignment != 0
7199 138 : && (offset % cfun->machine->max_used_stack_alignment) != 0))
7200 : {
7201 : /* We may need a 16-byte aligned stack for the remainder of the
7202 : register save area, but the stack frame for the local function
7203 : may require a greater alignment if using AVX/2/512. In order
7204 : to avoid wasting space, we first calculate the space needed for
7205 : the rest of the register saves, add that to the stack pointer,
7206 : and then realign the stack to the boundary of the start of the
7207 : frame for the local function. */
7208 308308 : HOST_WIDE_INT space_needed = 0;
7209 308308 : HOST_WIDE_INT sse_reg_space_needed = 0;
7210 :
7211 308308 : if (TARGET_64BIT)
7212 : {
7213 306523 : if (m->call_ms2sysv)
7214 : {
7215 6415 : m->call_ms2sysv_pad_in = 0;
7216 6415 : space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7217 : }
7218 :
7219 300108 : else if (frame->nsseregs)
7220 : /* The only ABI that has saved SSE registers (Win64) also has a
7221 : 16-byte aligned default stack. However, many programs violate
7222 : the ABI, and Wine64 forces stack realignment to compensate. */
7223 6447 : space_needed = frame->nsseregs * 16;
7224 :
7225 306523 : sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7226 :
7227 : /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7228 : rounding to be pedantic. */
7229 306523 : space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7230 : }
7231 : else
7232 1785 : space_needed = frame->va_arg_size;
7233 :
7234 : /* Record the allocation size required prior to the realignment AND. */
7235 308308 : frame->stack_realign_allocate = space_needed;
7236 :
7237 : /* The re-aligned stack starts at frame->stack_realign_offset. Values
7238 : before this point are not directly comparable with values below
7239 : this point. Use sp_valid_at to determine if the stack pointer is
7240 : valid for a given offset, fp_valid_at for the frame pointer, or
7241 : choose_baseaddr to have a base register chosen for you.
7242 :
7243 : Note that the result of (frame->stack_realign_offset
7244 : & (stack_alignment_needed - 1)) may not equal zero. */
7245 308308 : offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7246 308308 : frame->stack_realign_offset = offset - space_needed;
7247 308308 : frame->sse_reg_save_offset = frame->stack_realign_offset
7248 308308 : + sse_reg_space_needed;
7249 308308 : }
7250 : else
7251 : {
7252 7891542 : frame->stack_realign_offset = offset;
7253 :
7254 7891542 : if (TARGET_64BIT && m->call_ms2sysv)
7255 : {
7256 28810 : m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7257 28810 : offset += xlogue_layout::get_instance ().get_stack_space_used ();
7258 : }
7259 :
7260 : /* Align and set SSE register save area. */
7261 7862732 : else if (frame->nsseregs)
7262 : {
7263 : /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7264 : required and the DRAP re-alignment boundary is at least 16 bytes,
7265 : then we want the SSE register save area properly aligned. */
7266 183247 : if (ix86_incoming_stack_boundary >= 128
7267 6400 : || (stack_realign_drap && stack_alignment_needed >= 16))
7268 183247 : offset = ROUND_UP (offset, 16);
7269 183247 : offset += frame->nsseregs * 16;
7270 : }
7271 7891542 : frame->sse_reg_save_offset = offset;
7272 7891542 : offset += frame->va_arg_size;
7273 : }
7274 :
7275 : /* Align start of frame for local function. When a function call
7276 : is removed, it may become a leaf function. But if argument may
7277 : be passed on stack, we need to align the stack when there is no
7278 : tail call. */
7279 8199850 : if (m->call_ms2sysv
7280 8164625 : || frame->va_arg_size != 0
7281 8085185 : || size != 0
7282 4398766 : || !crtl->is_leaf
7283 2058478 : || (!crtl->tail_call_emit
7284 1745647 : && cfun->machine->outgoing_args_on_stack)
7285 2058428 : || cfun->calls_alloca
7286 10256569 : || ix86_current_function_calls_tls_descriptor)
7287 6143543 : offset = ROUND_UP (offset, stack_alignment_needed);
7288 :
7289 : /* Frame pointer points here. */
7290 8199850 : frame->frame_pointer_offset = offset;
7291 :
7292 8199850 : offset += size;
7293 :
7294 : /* Add outgoing arguments area. Can be skipped if we eliminated
7295 : all the function calls as dead code.
7296 : Skipping is however impossible when function calls alloca. Alloca
7297 : expander assumes that last crtl->outgoing_args_size
7298 : of stack frame are unused. */
7299 8199850 : if (ACCUMULATE_OUTGOING_ARGS
7300 8817966 : && (!crtl->is_leaf || cfun->calls_alloca
7301 391912 : || ix86_current_function_calls_tls_descriptor))
7302 : {
7303 226204 : offset += crtl->outgoing_args_size;
7304 226204 : frame->outgoing_arguments_size = crtl->outgoing_args_size;
7305 : }
7306 : else
7307 7973646 : frame->outgoing_arguments_size = 0;
7308 :
7309 : /* Align stack boundary. Only needed if we're calling another function
7310 : or using alloca. */
7311 2786886 : if (!crtl->is_leaf || cfun->calls_alloca
7312 10983328 : || ix86_current_function_calls_tls_descriptor)
7313 5418184 : offset = ROUND_UP (offset, preferred_alignment);
7314 :
7315 : /* We've reached end of stack frame. */
7316 8199850 : frame->stack_pointer_offset = offset;
7317 :
7318 : /* Size prologue needs to allocate. */
7319 8199850 : to_allocate = offset - frame->sse_reg_save_offset;
7320 :
7321 8199850 : if (save_regs_using_push_pop (to_allocate))
7322 2591043 : frame->save_regs_using_mov = false;
7323 :
7324 8199850 : if (ix86_using_red_zone ()
7325 7171825 : && crtl->sp_is_unchanging
7326 6527706 : && crtl->is_leaf
7327 2687437 : && !cfun->machine->asm_redzone_clobber_seen
7328 2687424 : && !ix86_pc_thunk_call_expanded
7329 10887274 : && !ix86_current_function_calls_tls_descriptor)
7330 : {
7331 2687409 : frame->red_zone_size = to_allocate;
7332 2687409 : if (frame->save_regs_using_mov)
7333 139945 : frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7334 2687409 : if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7335 102734 : frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7336 : }
7337 : else
7338 5512441 : frame->red_zone_size = 0;
7339 8199850 : frame->stack_pointer_offset -= frame->red_zone_size;
7340 :
7341 : /* The SEH frame pointer location is near the bottom of the frame.
7342 : This is enforced by the fact that the difference between the
7343 : stack pointer and the frame pointer is limited to 240 bytes in
7344 : the unwind data structure. */
7345 8199850 : if (TARGET_SEH)
7346 : {
7347 : /* Force the frame pointer to point at or below the lowest register save
7348 : area, see the SEH code in config/i386/winnt.cc for the rationale. */
7349 : frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7350 :
7351 : /* If we can leave the frame pointer where it is, do so; however return
7352 : the establisher frame for __builtin_frame_address (0) or else if the
7353 : frame overflows the SEH maximum frame size.
7354 :
7355 : Note that the value returned by __builtin_frame_address (0) is quite
7356 : constrained, because setjmp is piggybacked on the SEH machinery with
7357 : recent versions of MinGW:
7358 :
7359 : # elif defined(__SEH__)
7360 : # if defined(__aarch64__) || defined(_ARM64_)
7361 : # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7362 : # elif (__MINGW_GCC_VERSION < 40702)
7363 : # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7364 : # else
7365 : # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7366 : # endif
7367 :
7368 : and the second argument passed to _setjmp, if not null, is forwarded
7369 : to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7370 : built an ExceptionRecord on the fly describing the setjmp buffer). */
7371 : const HOST_WIDE_INT diff
7372 : = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7373 : if (diff <= 255 && !crtl->accesses_prior_frames)
7374 : {
7375 : /* The resulting diff will be a multiple of 16 lower than 255,
7376 : i.e. at most 240 as required by the unwind data structure. */
7377 : frame->hard_frame_pointer_offset += (diff & 15);
7378 : }
7379 : else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7380 : {
7381 : /* Ideally we'd determine what portion of the local stack frame
7382 : (within the constraint of the lowest 240) is most heavily used.
7383 : But without that complication, simply bias the frame pointer
7384 : by 128 bytes so as to maximize the amount of the local stack
7385 : frame that is addressable with 8-bit offsets. */
7386 : frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7387 : }
7388 : else
7389 : frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7390 : }
7391 8199850 : }
7392 :
7393 : /* This is semi-inlined memory_address_length, but simplified
7394 : since we know that we're always dealing with reg+offset, and
7395 : to avoid having to create and discard all that rtl. */
7396 :
7397 : static inline int
7398 1003599 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7399 : {
7400 1003599 : int len = 4;
7401 :
7402 0 : if (offset == 0)
7403 : {
7404 : /* EBP and R13 cannot be encoded without an offset. */
7405 0 : len = (regno == BP_REG || regno == R13_REG);
7406 : }
7407 995531 : else if (IN_RANGE (offset, -128, 127))
7408 630674 : len = 1;
7409 :
7410 : /* ESP and R12 must be encoded with a SIB byte. */
7411 0 : if (regno == SP_REG || regno == R12_REG)
7412 0 : len++;
7413 :
7414 1003599 : return len;
7415 : }
7416 :
7417 : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7418 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7419 :
7420 : static bool
7421 3501894 : sp_valid_at (HOST_WIDE_INT cfa_offset)
7422 : {
7423 3501894 : const struct machine_frame_state &fs = cfun->machine->fs;
7424 3501894 : if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7425 : {
7426 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7427 46600 : gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7428 : return false;
7429 : }
7430 3455294 : return fs.sp_valid;
7431 : }
7432 :
7433 : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7434 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7435 :
7436 : static inline bool
7437 1355438 : fp_valid_at (HOST_WIDE_INT cfa_offset)
7438 : {
7439 1355438 : const struct machine_frame_state &fs = cfun->machine->fs;
7440 1355438 : if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7441 : {
7442 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7443 28328 : gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7444 : return false;
7445 : }
7446 1327110 : return fs.fp_valid;
7447 : }
7448 :
7449 : /* Choose a base register based upon alignment requested, speed and/or
7450 : size. */
7451 :
7452 : static void
7453 1355438 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7454 : HOST_WIDE_INT &base_offset,
7455 : unsigned int align_reqested, unsigned int *align)
7456 : {
7457 1355438 : const struct machine_function *m = cfun->machine;
7458 1355438 : unsigned int hfp_align;
7459 1355438 : unsigned int drap_align;
7460 1355438 : unsigned int sp_align;
7461 1355438 : bool hfp_ok = fp_valid_at (cfa_offset);
7462 1355438 : bool drap_ok = m->fs.drap_valid;
7463 1355438 : bool sp_ok = sp_valid_at (cfa_offset);
7464 :
7465 1355438 : hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7466 :
7467 : /* Filter out any registers that don't meet the requested alignment
7468 : criteria. */
7469 1355438 : if (align_reqested)
7470 : {
7471 961194 : if (m->fs.realigned)
7472 28160 : hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7473 : /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7474 : notes (which we would need to use a realigned stack pointer),
7475 : so disable on SEH targets. */
7476 933034 : else if (m->fs.sp_realigned)
7477 28328 : sp_align = crtl->stack_alignment_needed;
7478 :
7479 961194 : hfp_ok = hfp_ok && hfp_align >= align_reqested;
7480 961194 : drap_ok = drap_ok && drap_align >= align_reqested;
7481 961194 : sp_ok = sp_ok && sp_align >= align_reqested;
7482 : }
7483 :
7484 1355438 : if (m->use_fast_prologue_epilogue)
7485 : {
7486 : /* Choose the base register most likely to allow the most scheduling
7487 : opportunities. Generally FP is valid throughout the function,
7488 : while DRAP must be reloaded within the epilogue. But choose either
7489 : over the SP due to increased encoding size. */
7490 :
7491 647174 : if (hfp_ok)
7492 : {
7493 118719 : base_reg = hard_frame_pointer_rtx;
7494 118719 : base_offset = m->fs.fp_offset - cfa_offset;
7495 : }
7496 528455 : else if (drap_ok)
7497 : {
7498 0 : base_reg = crtl->drap_reg;
7499 0 : base_offset = 0 - cfa_offset;
7500 : }
7501 528455 : else if (sp_ok)
7502 : {
7503 528455 : base_reg = stack_pointer_rtx;
7504 528455 : base_offset = m->fs.sp_offset - cfa_offset;
7505 : }
7506 : }
7507 : else
7508 : {
7509 708264 : HOST_WIDE_INT toffset;
7510 708264 : int len = 16, tlen;
7511 :
7512 : /* Choose the base register with the smallest address encoding.
7513 : With a tie, choose FP > DRAP > SP. */
7514 708264 : if (sp_ok)
7515 : {
7516 690948 : base_reg = stack_pointer_rtx;
7517 690948 : base_offset = m->fs.sp_offset - cfa_offset;
7518 1373828 : len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7519 : }
7520 708264 : if (drap_ok)
7521 : {
7522 0 : toffset = 0 - cfa_offset;
7523 0 : tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7524 0 : if (tlen <= len)
7525 : {
7526 0 : base_reg = crtl->drap_reg;
7527 0 : base_offset = toffset;
7528 0 : len = tlen;
7529 : }
7530 : }
7531 708264 : if (hfp_ok)
7532 : {
7533 312651 : toffset = m->fs.fp_offset - cfa_offset;
7534 312651 : tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7535 312651 : if (tlen <= len)
7536 : {
7537 221981 : base_reg = hard_frame_pointer_rtx;
7538 221981 : base_offset = toffset;
7539 : }
7540 : }
7541 : }
7542 :
7543 : /* Set the align return value. */
7544 1355438 : if (align)
7545 : {
7546 961194 : if (base_reg == stack_pointer_rtx)
7547 679497 : *align = sp_align;
7548 281697 : else if (base_reg == crtl->drap_reg)
7549 0 : *align = drap_align;
7550 281697 : else if (base_reg == hard_frame_pointer_rtx)
7551 281697 : *align = hfp_align;
7552 : }
7553 1355438 : }
7554 :
7555 : /* Return an RTX that points to CFA_OFFSET within the stack frame and
7556 : the alignment of address. If ALIGN is non-null, it should point to
7557 : an alignment value (in bits) that is preferred or zero and will
7558 : receive the alignment of the base register that was selected,
7559 : irrespective of rather or not CFA_OFFSET is a multiple of that
7560 : alignment value. If it is possible for the base register offset to be
7561 : non-immediate then SCRATCH_REGNO should specify a scratch register to
7562 : use.
7563 :
7564 : The valid base registers are taken from CFUN->MACHINE->FS. */
7565 :
7566 : static rtx
7567 1355438 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7568 : unsigned int scratch_regno = INVALID_REGNUM)
7569 : {
7570 1355438 : rtx base_reg = NULL;
7571 1355438 : HOST_WIDE_INT base_offset = 0;
7572 :
7573 : /* If a specific alignment is requested, try to get a base register
7574 : with that alignment first. */
7575 1355438 : if (align && *align)
7576 961194 : choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7577 :
7578 1355438 : if (!base_reg)
7579 394244 : choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7580 :
7581 1355438 : gcc_assert (base_reg != NULL);
7582 :
7583 1355438 : rtx base_offset_rtx = GEN_INT (base_offset);
7584 :
7585 1405991 : if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7586 : {
7587 1 : gcc_assert (scratch_regno != INVALID_REGNUM);
7588 :
7589 1 : rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7590 1 : emit_move_insn (scratch_reg, base_offset_rtx);
7591 :
7592 1 : return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7593 : }
7594 :
7595 1405990 : return plus_constant (Pmode, base_reg, base_offset);
7596 : }
7597 :
7598 : /* Emit code to save registers in the prologue. */
7599 :
7600 : static void
7601 427777 : ix86_emit_save_regs (void)
7602 : {
7603 427777 : int regno;
7604 427777 : rtx_insn *insn;
7605 427777 : bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7606 :
7607 427777 : if (!TARGET_APX_PUSH2POP2
7608 90 : || !ix86_can_use_push2pop2 ()
7609 427865 : || cfun->machine->func_type != TYPE_NORMAL)
7610 : {
7611 39775170 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7612 39347480 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7613 : {
7614 1196180 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7615 : use_ppx));
7616 1196180 : RTX_FRAME_RELATED_P (insn) = 1;
7617 : }
7618 : }
7619 : else
7620 : {
7621 87 : int regno_list[2];
7622 87 : regno_list[0] = regno_list[1] = -1;
7623 87 : int loaded_regnum = 0;
7624 87 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7625 :
7626 8091 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7627 8004 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7628 : {
7629 127 : if (aligned)
7630 : {
7631 45 : regno_list[loaded_regnum++] = regno;
7632 45 : if (loaded_regnum == 2)
7633 : {
7634 19 : gcc_assert (regno_list[0] != -1
7635 : && regno_list[1] != -1
7636 : && regno_list[0] != regno_list[1]);
7637 19 : const int offset = UNITS_PER_WORD * 2;
7638 19 : rtx mem = gen_rtx_MEM (TImode,
7639 19 : gen_rtx_PRE_DEC (Pmode,
7640 : stack_pointer_rtx));
7641 19 : insn = emit_insn (gen_push2 (mem,
7642 : gen_rtx_REG (word_mode,
7643 : regno_list[0]),
7644 : gen_rtx_REG (word_mode,
7645 : regno_list[1]),
7646 : use_ppx));
7647 19 : RTX_FRAME_RELATED_P (insn) = 1;
7648 19 : rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7649 :
7650 57 : for (int i = 0; i < 2; i++)
7651 : {
7652 76 : rtx dwarf_reg = gen_rtx_REG (word_mode,
7653 38 : regno_list[i]);
7654 38 : rtx sp_offset = plus_constant (Pmode,
7655 : stack_pointer_rtx,
7656 38 : + UNITS_PER_WORD
7657 38 : * (1 - i));
7658 38 : rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7659 : sp_offset),
7660 : dwarf_reg);
7661 38 : RTX_FRAME_RELATED_P (tmp) = 1;
7662 38 : XVECEXP (dwarf, 0, i + 1) = tmp;
7663 : }
7664 19 : rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7665 : plus_constant (Pmode,
7666 : stack_pointer_rtx,
7667 : -offset));
7668 19 : RTX_FRAME_RELATED_P (sp_tmp) = 1;
7669 19 : XVECEXP (dwarf, 0, 0) = sp_tmp;
7670 19 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7671 :
7672 19 : loaded_regnum = 0;
7673 19 : regno_list[0] = regno_list[1] = -1;
7674 : }
7675 : }
7676 : else
7677 : {
7678 82 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7679 : use_ppx));
7680 82 : RTX_FRAME_RELATED_P (insn) = 1;
7681 82 : aligned = true;
7682 : }
7683 : }
7684 87 : if (loaded_regnum == 1)
7685 : {
7686 7 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7687 7 : regno_list[0]),
7688 : use_ppx));
7689 7 : RTX_FRAME_RELATED_P (insn) = 1;
7690 : }
7691 : }
7692 427777 : }
7693 :
7694 : /* Emit a single register save at CFA - CFA_OFFSET. */
7695 :
7696 : static void
7697 607111 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7698 : HOST_WIDE_INT cfa_offset)
7699 : {
7700 607111 : struct machine_function *m = cfun->machine;
7701 607111 : rtx reg = gen_rtx_REG (mode, regno);
7702 607111 : rtx mem, addr, base, insn;
7703 607111 : unsigned int align = GET_MODE_ALIGNMENT (mode);
7704 :
7705 607111 : addr = choose_baseaddr (cfa_offset, &align);
7706 607111 : mem = gen_frame_mem (mode, addr);
7707 :
7708 : /* The location alignment depends upon the base register. */
7709 607111 : align = MIN (GET_MODE_ALIGNMENT (mode), align);
7710 607111 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7711 607111 : set_mem_align (mem, align);
7712 :
7713 607111 : insn = emit_insn (gen_rtx_SET (mem, reg));
7714 607111 : RTX_FRAME_RELATED_P (insn) = 1;
7715 :
7716 607111 : base = addr;
7717 607111 : if (GET_CODE (base) == PLUS)
7718 595363 : base = XEXP (base, 0);
7719 607111 : gcc_checking_assert (REG_P (base));
7720 :
7721 : /* When saving registers into a re-aligned local stack frame, avoid
7722 : any tricky guessing by dwarf2out. */
7723 607111 : if (m->fs.realigned)
7724 : {
7725 12800 : gcc_checking_assert (stack_realign_drap);
7726 :
7727 12800 : if (regno == REGNO (crtl->drap_reg))
7728 : {
7729 : /* A bit of a hack. We force the DRAP register to be saved in
7730 : the re-aligned stack frame, which provides us with a copy
7731 : of the CFA that will last past the prologue. Install it. */
7732 0 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7733 0 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7734 0 : cfun->machine->fs.fp_offset - cfa_offset);
7735 0 : mem = gen_rtx_MEM (mode, addr);
7736 0 : add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7737 : }
7738 : else
7739 : {
7740 : /* The frame pointer is a stable reference within the
7741 : aligned frame. Use it. */
7742 12800 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7743 12800 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7744 12800 : cfun->machine->fs.fp_offset - cfa_offset);
7745 12800 : mem = gen_rtx_MEM (mode, addr);
7746 12800 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7747 : }
7748 : }
7749 :
7750 594311 : else if (base == stack_pointer_rtx && m->fs.sp_realigned
7751 12881 : && cfa_offset >= m->fs.sp_realigned_offset)
7752 : {
7753 12881 : gcc_checking_assert (stack_realign_fp);
7754 12881 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7755 : }
7756 :
7757 : /* The memory may not be relative to the current CFA register,
7758 : which means that we may need to generate a new pattern for
7759 : use by the unwind info. */
7760 581430 : else if (base != m->fs.cfa_reg)
7761 : {
7762 45097 : addr = plus_constant (Pmode, m->fs.cfa_reg,
7763 45097 : m->fs.cfa_offset - cfa_offset);
7764 45097 : mem = gen_rtx_MEM (mode, addr);
7765 45097 : add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7766 : }
7767 607111 : }
7768 :
7769 : /* Emit code to save registers using MOV insns.
7770 : First register is stored at CFA - CFA_OFFSET. */
7771 : static void
7772 44146 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7773 : {
7774 44146 : unsigned int regno;
7775 :
7776 4105578 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7777 4061432 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7778 : {
7779 : /* Skip registers, already processed by shrink wrap separate. */
7780 188586 : if (!cfun->machine->reg_is_wrapped_separately[regno])
7781 84047 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7782 203288 : cfa_offset -= UNITS_PER_WORD;
7783 : }
7784 44146 : }
7785 :
7786 : /* Emit code to save SSE registers using MOV insns.
7787 : First register is stored at CFA - CFA_OFFSET. */
7788 : static void
7789 33363 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7790 : {
7791 33363 : unsigned int regno;
7792 :
7793 3102759 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7794 3069396 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7795 : {
7796 333657 : ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7797 333657 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
7798 : }
7799 33363 : }
7800 :
7801 : static GTY(()) rtx queued_cfa_restores;
7802 :
7803 : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7804 : manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7805 : Don't add the note if the previously saved value will be left untouched
7806 : within stack red-zone till return, as unwinders can find the same value
7807 : in the register and on the stack. */
7808 :
7809 : static void
7810 2292920 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7811 : {
7812 2292920 : if (!crtl->shrink_wrapped
7813 2274323 : && cfa_offset <= cfun->machine->fs.red_zone_offset)
7814 : return;
7815 :
7816 771362 : if (insn)
7817 : {
7818 360575 : add_reg_note (insn, REG_CFA_RESTORE, reg);
7819 360575 : RTX_FRAME_RELATED_P (insn) = 1;
7820 : }
7821 : else
7822 410787 : queued_cfa_restores
7823 410787 : = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7824 : }
7825 :
7826 : /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7827 :
7828 : static void
7829 2555044 : ix86_add_queued_cfa_restore_notes (rtx insn)
7830 : {
7831 2555044 : rtx last;
7832 2555044 : if (!queued_cfa_restores)
7833 : return;
7834 410787 : for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7835 : ;
7836 52954 : XEXP (last, 1) = REG_NOTES (insn);
7837 52954 : REG_NOTES (insn) = queued_cfa_restores;
7838 52954 : queued_cfa_restores = NULL_RTX;
7839 52954 : RTX_FRAME_RELATED_P (insn) = 1;
7840 : }
7841 :
7842 : /* Expand prologue or epilogue stack adjustment.
7843 : The pattern exist to put a dependency on all ebp-based memory accesses.
7844 : STYLE should be negative if instructions should be marked as frame related,
7845 : zero if %r11 register is live and cannot be freely used and positive
7846 : otherwise. */
7847 :
7848 : static rtx
7849 1580186 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7850 : int style, bool set_cfa)
7851 : {
7852 1580186 : struct machine_function *m = cfun->machine;
7853 1580186 : rtx addend = offset;
7854 1580186 : rtx insn;
7855 1580186 : bool add_frame_related_expr = false;
7856 :
7857 1798710 : if (!x86_64_immediate_operand (offset, Pmode))
7858 : {
7859 : /* r11 is used by indirect sibcall return as well, set before the
7860 : epilogue and used after the epilogue. */
7861 199 : if (style)
7862 174 : addend = gen_rtx_REG (Pmode, R11_REG);
7863 : else
7864 : {
7865 25 : gcc_assert (src != hard_frame_pointer_rtx
7866 : && dest != hard_frame_pointer_rtx);
7867 : addend = hard_frame_pointer_rtx;
7868 : }
7869 199 : emit_insn (gen_rtx_SET (addend, offset));
7870 199 : if (style < 0)
7871 88 : add_frame_related_expr = true;
7872 : }
7873 :
7874 : /* Shrink wrap separate may insert prologue between TEST and JMP. In order
7875 : not to affect EFlags, emit add without reg clobbering. */
7876 1580186 : if (crtl->shrink_wrapped_separate)
7877 93230 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
7878 93230 : (Pmode, dest, src, addend));
7879 : else
7880 1486956 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7881 1486956 : (Pmode, dest, src, addend));
7882 :
7883 1580186 : if (style >= 0)
7884 694955 : ix86_add_queued_cfa_restore_notes (insn);
7885 :
7886 1580186 : if (set_cfa)
7887 : {
7888 1215141 : rtx r;
7889 :
7890 1215141 : gcc_assert (m->fs.cfa_reg == src);
7891 1215141 : m->fs.cfa_offset += INTVAL (offset);
7892 1215141 : m->fs.cfa_reg = dest;
7893 :
7894 1411288 : r = gen_rtx_PLUS (Pmode, src, offset);
7895 1215141 : r = gen_rtx_SET (dest, r);
7896 1215141 : add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7897 1215141 : RTX_FRAME_RELATED_P (insn) = 1;
7898 : }
7899 365045 : else if (style < 0)
7900 : {
7901 298163 : RTX_FRAME_RELATED_P (insn) = 1;
7902 298163 : if (add_frame_related_expr)
7903 : {
7904 20 : rtx r = gen_rtx_PLUS (Pmode, src, offset);
7905 20 : r = gen_rtx_SET (dest, r);
7906 20 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7907 : }
7908 : }
7909 :
7910 1580186 : if (dest == stack_pointer_rtx)
7911 : {
7912 1580186 : HOST_WIDE_INT ooffset = m->fs.sp_offset;
7913 1580186 : bool valid = m->fs.sp_valid;
7914 1580186 : bool realigned = m->fs.sp_realigned;
7915 :
7916 1580186 : if (src == hard_frame_pointer_rtx)
7917 : {
7918 29817 : valid = m->fs.fp_valid;
7919 29817 : realigned = false;
7920 29817 : ooffset = m->fs.fp_offset;
7921 : }
7922 1550369 : else if (src == crtl->drap_reg)
7923 : {
7924 0 : valid = m->fs.drap_valid;
7925 0 : realigned = false;
7926 0 : ooffset = 0;
7927 : }
7928 : else
7929 : {
7930 : /* Else there are two possibilities: SP itself, which we set
7931 : up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7932 : taken care of this by hand along the eh_return path. */
7933 1550369 : gcc_checking_assert (src == stack_pointer_rtx
7934 : || offset == const0_rtx);
7935 : }
7936 :
7937 1580186 : m->fs.sp_offset = ooffset - INTVAL (offset);
7938 1580186 : m->fs.sp_valid = valid;
7939 1580186 : m->fs.sp_realigned = realigned;
7940 : }
7941 1580186 : return insn;
7942 : }
7943 :
7944 : /* Find an available register to be used as dynamic realign argument
7945 : pointer register. Such a register will be written in prologue and
7946 : used in begin of body, so it must not be
7947 : 1. parameter passing register.
7948 : 2. GOT pointer.
7949 : We reuse static-chain register if it is available. Otherwise, we
7950 : use DI for i386 and R13 for x86-64. We chose R13 since it has
7951 : shorter encoding.
7952 :
7953 : Return: the regno of chosen register. */
7954 :
7955 : static unsigned int
7956 7295 : find_drap_reg (void)
7957 : {
7958 7295 : tree decl = cfun->decl;
7959 :
7960 : /* Always use callee-saved register if there are no caller-saved
7961 : registers. */
7962 7295 : if (TARGET_64BIT)
7963 : {
7964 : /* In preserve_none functions, any register can be used for DRAP,
7965 : except AX, R12–R15, DI, SI (argument registers), SP, and BP. */
7966 7010 : if (cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
7967 : return R11_REG;
7968 :
7969 : /* Use R13 for nested function or function need static chain.
7970 : Since function with tail call may use any caller-saved
7971 : registers in epilogue, DRAP must not use caller-saved
7972 : register in such case. */
7973 7009 : if (DECL_STATIC_CHAIN (decl)
7974 6967 : || (cfun->machine->call_saved_registers
7975 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7976 13976 : || crtl->tail_call_emit)
7977 191 : return R13_REG;
7978 :
7979 : return R10_REG;
7980 : }
7981 : else
7982 : {
7983 : /* Use DI for nested function or function need static chain.
7984 : Since function with tail call may use any caller-saved
7985 : registers in epilogue, DRAP must not use caller-saved
7986 : register in such case. */
7987 285 : if (DECL_STATIC_CHAIN (decl)
7988 285 : || (cfun->machine->call_saved_registers
7989 285 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7990 285 : || crtl->tail_call_emit
7991 550 : || crtl->calls_eh_return)
7992 : return DI_REG;
7993 :
7994 : /* Reuse static chain register if it isn't used for parameter
7995 : passing. */
7996 265 : if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7997 : {
7998 265 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7999 265 : if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
8000 : return CX_REG;
8001 : }
8002 0 : return DI_REG;
8003 : }
8004 : }
8005 :
8006 : /* Return minimum incoming stack alignment. */
8007 :
8008 : static unsigned int
8009 1619987 : ix86_minimum_incoming_stack_boundary (bool sibcall)
8010 : {
8011 1619987 : unsigned int incoming_stack_boundary;
8012 :
8013 : /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
8014 1619987 : if (cfun->machine->func_type != TYPE_NORMAL)
8015 120 : incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
8016 : /* Prefer the one specified at command line. */
8017 1619867 : else if (ix86_user_incoming_stack_boundary)
8018 : incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8019 : /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8020 : if -mstackrealign is used, it isn't used for sibcall check and
8021 : estimated stack alignment is 128bit. */
8022 1619845 : else if (!sibcall
8023 1489011 : && ix86_force_align_arg_pointer
8024 4572 : && crtl->stack_alignment_estimated == 128)
8025 596 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8026 : else
8027 1619249 : incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8028 :
8029 : /* Incoming stack alignment can be changed on individual functions
8030 : via force_align_arg_pointer attribute. We use the smallest
8031 : incoming stack boundary. */
8032 1619987 : if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8033 3239368 : && lookup_attribute ("force_align_arg_pointer",
8034 1619381 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8035 5708 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8036 :
8037 : /* The incoming stack frame has to be aligned at least at
8038 : parm_stack_boundary. */
8039 1619987 : if (incoming_stack_boundary < crtl->parm_stack_boundary)
8040 : incoming_stack_boundary = crtl->parm_stack_boundary;
8041 :
8042 : /* Stack at entrance of main is aligned by runtime. We use the
8043 : smallest incoming stack boundary. */
8044 1619987 : if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8045 140808 : && DECL_NAME (current_function_decl)
8046 140808 : && MAIN_NAME_P (DECL_NAME (current_function_decl))
8047 1622461 : && DECL_FILE_SCOPE_P (current_function_decl))
8048 2474 : incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8049 :
8050 1619987 : return incoming_stack_boundary;
8051 : }
8052 :
8053 : /* Update incoming stack boundary and estimated stack alignment. */
8054 :
8055 : static void
8056 1489148 : ix86_update_stack_boundary (void)
8057 : {
8058 1489148 : ix86_incoming_stack_boundary
8059 1489148 : = ix86_minimum_incoming_stack_boundary (false);
8060 :
8061 : /* x86_64 vararg needs 16byte stack alignment for register save area. */
8062 1489148 : if (TARGET_64BIT
8063 1362581 : && cfun->stdarg
8064 21431 : && crtl->stack_alignment_estimated < 128)
8065 10187 : crtl->stack_alignment_estimated = 128;
8066 :
8067 : /* __tls_get_addr needs to be called with 16-byte aligned stack. */
8068 1489148 : if (ix86_tls_descriptor_calls_expanded_in_cfun
8069 1078 : && crtl->preferred_stack_boundary < 128)
8070 750 : crtl->preferred_stack_boundary = 128;
8071 :
8072 : /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
8073 : are 32 bits, but if force_align_arg_pointer is specified, it should
8074 : prefer 128 bits for a backward-compatibility reason, which is also
8075 : what the doc suggests. */
8076 1489148 : if (lookup_attribute ("force_align_arg_pointer",
8077 1489148 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
8078 1489148 : && crtl->preferred_stack_boundary < 128)
8079 4 : crtl->preferred_stack_boundary = 128;
8080 1489148 : }
8081 :
8082 : /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8083 : needed or an rtx for DRAP otherwise. */
8084 :
8085 : static rtx
8086 1593012 : ix86_get_drap_rtx (void)
8087 : {
8088 : /* We must use DRAP if there are outgoing arguments on stack or
8089 : the stack pointer register is clobbered by asm statement and
8090 : ACCUMULATE_OUTGOING_ARGS is false. */
8091 1593012 : if (ix86_force_drap
8092 1593012 : || ((cfun->machine->outgoing_args_on_stack
8093 1260546 : || crtl->sp_is_clobbered_by_asm)
8094 330521 : && !ACCUMULATE_OUTGOING_ARGS))
8095 310326 : crtl->need_drap = true;
8096 :
8097 1593012 : if (stack_realign_drap)
8098 : {
8099 : /* Assign DRAP to vDRAP and returns vDRAP */
8100 7295 : unsigned int regno = find_drap_reg ();
8101 7295 : rtx drap_vreg;
8102 7295 : rtx arg_ptr;
8103 7295 : rtx_insn *seq, *insn;
8104 :
8105 7580 : arg_ptr = gen_rtx_REG (Pmode, regno);
8106 7295 : crtl->drap_reg = arg_ptr;
8107 :
8108 7295 : start_sequence ();
8109 7295 : drap_vreg = copy_to_reg (arg_ptr);
8110 7295 : seq = end_sequence ();
8111 :
8112 7295 : insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8113 7295 : if (!optimize)
8114 : {
8115 1896 : add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8116 1896 : RTX_FRAME_RELATED_P (insn) = 1;
8117 : }
8118 7295 : return drap_vreg;
8119 : }
8120 : else
8121 : return NULL;
8122 : }
8123 :
8124 : /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8125 :
8126 : static rtx
8127 1489150 : ix86_internal_arg_pointer (void)
8128 : {
8129 1489150 : return virtual_incoming_args_rtx;
8130 : }
8131 :
8132 : struct scratch_reg {
8133 : rtx reg;
8134 : bool saved;
8135 : };
8136 :
8137 : /* Return a short-lived scratch register for use on function entry.
8138 : In 32-bit mode, it is valid only after the registers are saved
8139 : in the prologue. This register must be released by means of
8140 : release_scratch_register_on_entry once it is dead. */
8141 :
8142 : static void
8143 25 : get_scratch_register_on_entry (struct scratch_reg *sr)
8144 : {
8145 25 : int regno;
8146 :
8147 25 : sr->saved = false;
8148 :
8149 25 : if (TARGET_64BIT)
8150 : {
8151 : /* We always use R11 in 64-bit mode. */
8152 : regno = R11_REG;
8153 : }
8154 : else
8155 : {
8156 0 : tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8157 0 : bool fastcall_p
8158 0 : = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8159 0 : bool thiscall_p
8160 0 : = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8161 0 : bool static_chain_p = DECL_STATIC_CHAIN (decl);
8162 0 : int regparm = ix86_function_regparm (fntype, decl);
8163 0 : int drap_regno
8164 0 : = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8165 :
8166 : /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8167 : for the static chain register. */
8168 0 : if ((regparm < 1 || (fastcall_p && !static_chain_p))
8169 0 : && drap_regno != AX_REG)
8170 : regno = AX_REG;
8171 : /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8172 : for the static chain register. */
8173 0 : else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8174 : regno = AX_REG;
8175 0 : else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8176 : regno = DX_REG;
8177 : /* ecx is the static chain register. */
8178 0 : else if (regparm < 3 && !fastcall_p && !thiscall_p
8179 0 : && !static_chain_p
8180 0 : && drap_regno != CX_REG)
8181 : regno = CX_REG;
8182 0 : else if (ix86_save_reg (BX_REG, true, false))
8183 : regno = BX_REG;
8184 : /* esi is the static chain register. */
8185 0 : else if (!(regparm == 3 && static_chain_p)
8186 0 : && ix86_save_reg (SI_REG, true, false))
8187 : regno = SI_REG;
8188 0 : else if (ix86_save_reg (DI_REG, true, false))
8189 : regno = DI_REG;
8190 : else
8191 : {
8192 0 : regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8193 0 : sr->saved = true;
8194 : }
8195 : }
8196 :
8197 25 : sr->reg = gen_rtx_REG (Pmode, regno);
8198 25 : if (sr->saved)
8199 : {
8200 0 : rtx_insn *insn = emit_insn (gen_push (sr->reg));
8201 0 : RTX_FRAME_RELATED_P (insn) = 1;
8202 : }
8203 25 : }
8204 :
8205 : /* Release a scratch register obtained from the preceding function.
8206 :
8207 : If RELEASE_VIA_POP is true, we just pop the register off the stack
8208 : to release it. This is what non-Linux systems use with -fstack-check.
8209 :
8210 : Otherwise we use OFFSET to locate the saved register and the
8211 : allocated stack space becomes part of the local frame and is
8212 : deallocated by the epilogue. */
8213 :
8214 : static void
8215 25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8216 : bool release_via_pop)
8217 : {
8218 25 : if (sr->saved)
8219 : {
8220 0 : if (release_via_pop)
8221 : {
8222 0 : struct machine_function *m = cfun->machine;
8223 0 : rtx x, insn = emit_insn (gen_pop (sr->reg));
8224 :
8225 : /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8226 0 : RTX_FRAME_RELATED_P (insn) = 1;
8227 0 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8228 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
8229 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8230 0 : m->fs.sp_offset -= UNITS_PER_WORD;
8231 : }
8232 : else
8233 : {
8234 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8235 0 : x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8236 0 : emit_insn (x);
8237 : }
8238 : }
8239 25 : }
8240 :
8241 : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8242 :
8243 : If INT_REGISTERS_SAVED is true, then integer registers have already been
8244 : pushed on the stack.
8245 :
8246 : If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8247 : beyond SIZE bytes.
8248 :
8249 : This assumes no knowledge of the current probing state, i.e. it is never
8250 : allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8251 : a suitable probe. */
8252 :
8253 : static void
8254 126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8255 : const bool int_registers_saved,
8256 : const bool protection_area)
8257 : {
8258 126 : struct machine_function *m = cfun->machine;
8259 :
8260 : /* If this function does not statically allocate stack space, then
8261 : no probes are needed. */
8262 126 : if (!size)
8263 : {
8264 : /* However, the allocation of space via pushes for register
8265 : saves could be viewed as allocating space, but without the
8266 : need to probe. */
8267 43 : if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8268 23 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8269 : else
8270 20 : dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8271 43 : return;
8272 : }
8273 :
8274 : /* If we are a noreturn function, then we have to consider the
8275 : possibility that we're called via a jump rather than a call.
8276 :
8277 : Thus we don't have the implicit probe generated by saving the
8278 : return address into the stack at the call. Thus, the stack
8279 : pointer could be anywhere in the guard page. The safe thing
8280 : to do is emit a probe now.
8281 :
8282 : The probe can be avoided if we have already emitted any callee
8283 : register saves into the stack or have a frame pointer (which will
8284 : have been saved as well). Those saves will function as implicit
8285 : probes.
8286 :
8287 : ?!? This should be revamped to work like aarch64 and s390 where
8288 : we track the offset from the most recent probe. Normally that
8289 : offset would be zero. For a noreturn function we would reset
8290 : it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8291 : we just probe when we cross PROBE_INTERVAL. */
8292 83 : if (TREE_THIS_VOLATILE (cfun->decl)
8293 15 : && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8294 : {
8295 : /* We can safely use any register here since we're just going to push
8296 : its value and immediately pop it back. But we do try and avoid
8297 : argument passing registers so as not to introduce dependencies in
8298 : the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8299 15 : rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8300 15 : rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8301 15 : rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8302 15 : m->fs.sp_offset -= UNITS_PER_WORD;
8303 15 : if (m->fs.cfa_reg == stack_pointer_rtx)
8304 : {
8305 15 : m->fs.cfa_offset -= UNITS_PER_WORD;
8306 15 : rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8307 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8308 15 : add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8309 15 : RTX_FRAME_RELATED_P (insn_push) = 1;
8310 15 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8311 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8312 15 : add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8313 15 : RTX_FRAME_RELATED_P (insn_pop) = 1;
8314 : }
8315 15 : emit_insn (gen_blockage ());
8316 : }
8317 :
8318 83 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8319 83 : const int dope = 4 * UNITS_PER_WORD;
8320 :
8321 : /* If there is protection area, take it into account in the size. */
8322 83 : if (protection_area)
8323 24 : size += probe_interval + dope;
8324 :
8325 : /* If we allocate less than the size of the guard statically,
8326 : then no probing is necessary, but we do need to allocate
8327 : the stack. */
8328 59 : else if (size < (1 << param_stack_clash_protection_guard_size))
8329 : {
8330 38 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8331 : GEN_INT (-size), -1,
8332 38 : m->fs.cfa_reg == stack_pointer_rtx);
8333 38 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8334 38 : return;
8335 : }
8336 :
8337 : /* We're allocating a large enough stack frame that we need to
8338 : emit probes. Either emit them inline or in a loop depending
8339 : on the size. */
8340 45 : if (size <= 4 * probe_interval)
8341 : {
8342 : HOST_WIDE_INT i;
8343 47 : for (i = probe_interval; i <= size; i += probe_interval)
8344 : {
8345 : /* Allocate PROBE_INTERVAL bytes. */
8346 27 : rtx insn
8347 27 : = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8348 : GEN_INT (-probe_interval), -1,
8349 27 : m->fs.cfa_reg == stack_pointer_rtx);
8350 27 : add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8351 :
8352 : /* And probe at *sp. */
8353 27 : emit_stack_probe (stack_pointer_rtx);
8354 27 : emit_insn (gen_blockage ());
8355 : }
8356 :
8357 : /* We need to allocate space for the residual, but we do not need
8358 : to probe the residual... */
8359 20 : HOST_WIDE_INT residual = (i - probe_interval - size);
8360 20 : if (residual)
8361 : {
8362 20 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8363 : GEN_INT (residual), -1,
8364 20 : m->fs.cfa_reg == stack_pointer_rtx);
8365 :
8366 : /* ...except if there is a protection area to maintain. */
8367 20 : if (protection_area)
8368 11 : emit_stack_probe (stack_pointer_rtx);
8369 : }
8370 :
8371 20 : dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8372 : }
8373 : else
8374 : {
8375 : /* We expect the GP registers to be saved when probes are used
8376 : as the probing sequences might need a scratch register and
8377 : the routine to allocate one assumes the integer registers
8378 : have already been saved. */
8379 25 : gcc_assert (int_registers_saved);
8380 :
8381 25 : struct scratch_reg sr;
8382 25 : get_scratch_register_on_entry (&sr);
8383 :
8384 : /* If we needed to save a register, then account for any space
8385 : that was pushed (we are not going to pop the register when
8386 : we do the restore). */
8387 25 : if (sr.saved)
8388 0 : size -= UNITS_PER_WORD;
8389 :
8390 : /* Step 1: round SIZE down to a multiple of the interval. */
8391 25 : HOST_WIDE_INT rounded_size = size & -probe_interval;
8392 :
8393 : /* Step 2: compute final value of the loop counter. Use lea if
8394 : possible. */
8395 25 : rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8396 25 : rtx insn;
8397 25 : if (address_no_seg_operand (addr, Pmode))
8398 13 : insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8399 : else
8400 : {
8401 12 : emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8402 12 : insn = emit_insn (gen_rtx_SET (sr.reg,
8403 : gen_rtx_PLUS (Pmode, sr.reg,
8404 : stack_pointer_rtx)));
8405 : }
8406 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8407 : {
8408 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8409 22 : plus_constant (Pmode, sr.reg,
8410 22 : m->fs.cfa_offset + rounded_size));
8411 22 : RTX_FRAME_RELATED_P (insn) = 1;
8412 : }
8413 :
8414 : /* Step 3: the loop. */
8415 25 : rtx size_rtx = GEN_INT (rounded_size);
8416 25 : insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8417 : size_rtx));
8418 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8419 : {
8420 22 : m->fs.cfa_offset += rounded_size;
8421 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8422 22 : plus_constant (Pmode, stack_pointer_rtx,
8423 22 : m->fs.cfa_offset));
8424 22 : RTX_FRAME_RELATED_P (insn) = 1;
8425 : }
8426 25 : m->fs.sp_offset += rounded_size;
8427 25 : emit_insn (gen_blockage ());
8428 :
8429 : /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8430 : is equal to ROUNDED_SIZE. */
8431 :
8432 25 : if (size != rounded_size)
8433 : {
8434 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8435 : GEN_INT (rounded_size - size), -1,
8436 25 : m->fs.cfa_reg == stack_pointer_rtx);
8437 :
8438 25 : if (protection_area)
8439 13 : emit_stack_probe (stack_pointer_rtx);
8440 : }
8441 :
8442 25 : dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8443 :
8444 : /* This does not deallocate the space reserved for the scratch
8445 : register. That will be deallocated in the epilogue. */
8446 25 : release_scratch_register_on_entry (&sr, size, false);
8447 : }
8448 :
8449 : /* Adjust back to account for the protection area. */
8450 45 : if (protection_area)
8451 24 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8452 24 : GEN_INT (probe_interval + dope), -1,
8453 24 : m->fs.cfa_reg == stack_pointer_rtx);
8454 :
8455 : /* Make sure nothing is scheduled before we are done. */
8456 45 : emit_insn (gen_blockage ());
8457 : }
8458 :
8459 : /* Adjust the stack pointer up to REG while probing it. */
8460 :
8461 : const char *
8462 25 : output_adjust_stack_and_probe (rtx reg)
8463 : {
8464 25 : static int labelno = 0;
8465 25 : char loop_lab[32];
8466 25 : rtx xops[2];
8467 :
8468 25 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8469 :
8470 : /* Loop. */
8471 25 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8472 :
8473 : /* SP = SP + PROBE_INTERVAL. */
8474 25 : xops[0] = stack_pointer_rtx;
8475 37 : xops[1] = GEN_INT (get_probe_interval ());
8476 25 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8477 :
8478 : /* Probe at SP. */
8479 25 : xops[1] = const0_rtx;
8480 25 : output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
8481 :
8482 : /* Test if SP == LAST_ADDR. */
8483 25 : xops[0] = stack_pointer_rtx;
8484 25 : xops[1] = reg;
8485 25 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8486 :
8487 : /* Branch. */
8488 25 : fputs ("\tjne\t", asm_out_file);
8489 25 : assemble_name_raw (asm_out_file, loop_lab);
8490 25 : fputc ('\n', asm_out_file);
8491 :
8492 25 : return "";
8493 : }
8494 :
8495 : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8496 : inclusive. These are offsets from the current stack pointer.
8497 :
8498 : INT_REGISTERS_SAVED is true if integer registers have already been
8499 : pushed on the stack. */
8500 :
8501 : static void
8502 0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8503 : const bool int_registers_saved)
8504 : {
8505 0 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8506 :
8507 : /* See if we have a constant small number of probes to generate. If so,
8508 : that's the easy case. The run-time loop is made up of 6 insns in the
8509 : generic case while the compile-time loop is made up of n insns for n #
8510 : of intervals. */
8511 0 : if (size <= 6 * probe_interval)
8512 : {
8513 : HOST_WIDE_INT i;
8514 :
8515 : /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8516 : it exceeds SIZE. If only one probe is needed, this will not
8517 : generate any code. Then probe at FIRST + SIZE. */
8518 0 : for (i = probe_interval; i < size; i += probe_interval)
8519 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8520 0 : -(first + i)));
8521 :
8522 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8523 0 : -(first + size)));
8524 : }
8525 :
8526 : /* Otherwise, do the same as above, but in a loop. Note that we must be
8527 : extra careful with variables wrapping around because we might be at
8528 : the very top (or the very bottom) of the address space and we have
8529 : to be able to handle this case properly; in particular, we use an
8530 : equality test for the loop condition. */
8531 : else
8532 : {
8533 : /* We expect the GP registers to be saved when probes are used
8534 : as the probing sequences might need a scratch register and
8535 : the routine to allocate one assumes the integer registers
8536 : have already been saved. */
8537 0 : gcc_assert (int_registers_saved);
8538 :
8539 0 : HOST_WIDE_INT rounded_size, last;
8540 0 : struct scratch_reg sr;
8541 :
8542 0 : get_scratch_register_on_entry (&sr);
8543 :
8544 :
8545 : /* Step 1: round SIZE to the previous multiple of the interval. */
8546 :
8547 0 : rounded_size = ROUND_DOWN (size, probe_interval);
8548 :
8549 :
8550 : /* Step 2: compute initial and final value of the loop counter. */
8551 :
8552 : /* TEST_OFFSET = FIRST. */
8553 0 : emit_move_insn (sr.reg, GEN_INT (-first));
8554 :
8555 : /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8556 0 : last = first + rounded_size;
8557 :
8558 :
8559 : /* Step 3: the loop
8560 :
8561 : do
8562 : {
8563 : TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8564 : probe at TEST_ADDR
8565 : }
8566 : while (TEST_ADDR != LAST_ADDR)
8567 :
8568 : probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8569 : until it is equal to ROUNDED_SIZE. */
8570 :
8571 0 : emit_insn
8572 0 : (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8573 :
8574 :
8575 : /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8576 : that SIZE is equal to ROUNDED_SIZE. */
8577 :
8578 0 : if (size != rounded_size)
8579 0 : emit_stack_probe (plus_constant (Pmode,
8580 0 : gen_rtx_PLUS (Pmode,
8581 : stack_pointer_rtx,
8582 : sr.reg),
8583 0 : rounded_size - size));
8584 :
8585 0 : release_scratch_register_on_entry (&sr, size, true);
8586 : }
8587 :
8588 : /* Make sure nothing is scheduled before we are done. */
8589 0 : emit_insn (gen_blockage ());
8590 0 : }
8591 :
8592 : /* Probe a range of stack addresses from REG to END, inclusive. These are
8593 : offsets from the current stack pointer. */
8594 :
8595 : const char *
8596 0 : output_probe_stack_range (rtx reg, rtx end)
8597 : {
8598 0 : static int labelno = 0;
8599 0 : char loop_lab[32];
8600 0 : rtx xops[3];
8601 :
8602 0 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8603 :
8604 : /* Loop. */
8605 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8606 :
8607 : /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8608 0 : xops[0] = reg;
8609 0 : xops[1] = GEN_INT (get_probe_interval ());
8610 0 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8611 :
8612 : /* Probe at TEST_ADDR. */
8613 0 : xops[0] = stack_pointer_rtx;
8614 0 : xops[1] = reg;
8615 0 : xops[2] = const0_rtx;
8616 0 : output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
8617 :
8618 : /* Test if TEST_ADDR == LAST_ADDR. */
8619 0 : xops[0] = reg;
8620 0 : xops[1] = end;
8621 0 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8622 :
8623 : /* Branch. */
8624 0 : fputs ("\tjne\t", asm_out_file);
8625 0 : assemble_name_raw (asm_out_file, loop_lab);
8626 0 : fputc ('\n', asm_out_file);
8627 :
8628 0 : return "";
8629 : }
8630 :
8631 : /* Data passed to ix86_update_stack_alignment. */
8632 : struct stack_access_data
8633 : {
8634 : /* The stack access register. */
8635 : const_rtx reg;
8636 : /* Pointer to stack alignment. */
8637 : unsigned int *stack_alignment;
8638 : };
8639 :
8640 : /* Return true if OP references an argument passed on stack. */
8641 :
8642 : static bool
8643 135885 : ix86_argument_passed_on_stack_p (const_rtx op)
8644 : {
8645 135885 : tree mem_expr = MEM_EXPR (op);
8646 135885 : if (mem_expr)
8647 : {
8648 133991 : tree var = get_base_address (mem_expr);
8649 133991 : return TREE_CODE (var) == PARM_DECL;
8650 : }
8651 : return false;
8652 : }
8653 :
8654 : /* Update the maximum stack slot alignment from memory alignment in PAT. */
8655 :
8656 : static void
8657 168965 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8658 : {
8659 : /* This insn may reference stack slot. Update the maximum stack slot
8660 : alignment if the memory is referenced by the stack access register. */
8661 168965 : stack_access_data *p = (stack_access_data *) data;
8662 :
8663 168965 : subrtx_iterator::array_type array;
8664 707147 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
8665 : {
8666 566933 : auto op = *iter;
8667 566933 : if (MEM_P (op))
8668 : {
8669 : /* NB: Ignore arguments passed on stack since caller is
8670 : responsible to align the outgoing stack for arguments
8671 : passed on stack. */
8672 165550 : if (reg_mentioned_p (p->reg, XEXP (op, 0))
8673 165550 : && !ix86_argument_passed_on_stack_p (op))
8674 : {
8675 28751 : unsigned int alignment = MEM_ALIGN (op);
8676 :
8677 28751 : if (alignment > *p->stack_alignment)
8678 28672 : *p->stack_alignment = alignment;
8679 : break;
8680 : }
8681 : else
8682 136799 : iter.skip_subrtxes ();
8683 : }
8684 : }
8685 168965 : }
8686 :
8687 : /* Helper function for ix86_find_all_reg_uses. */
8688 :
8689 : static void
8690 45204803 : ix86_find_all_reg_uses_1 (HARD_REG_SET ®set,
8691 : rtx set, unsigned int regno,
8692 : auto_bitmap &worklist)
8693 : {
8694 45204803 : rtx dest = SET_DEST (set);
8695 :
8696 45204803 : if (!REG_P (dest))
8697 40941213 : return;
8698 :
8699 : /* Reject non-Pmode modes. */
8700 34234168 : if (GET_MODE (dest) != Pmode)
8701 : return;
8702 :
8703 18111700 : unsigned int dst_regno = REGNO (dest);
8704 :
8705 18111700 : if (TEST_HARD_REG_BIT (regset, dst_regno))
8706 : return;
8707 :
8708 4263590 : const_rtx src = SET_SRC (set);
8709 :
8710 4263590 : subrtx_iterator::array_type array;
8711 8475926 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8712 : {
8713 5482905 : auto op = *iter;
8714 :
8715 5482905 : if (MEM_P (op))
8716 2971500 : iter.skip_subrtxes ();
8717 :
8718 5482905 : if (REG_P (op) && REGNO (op) == regno)
8719 : {
8720 : /* Add this register to register set. */
8721 1439023 : add_to_hard_reg_set (®set, Pmode, dst_regno);
8722 1270569 : bitmap_set_bit (worklist, dst_regno);
8723 1270569 : break;
8724 : }
8725 : }
8726 4263590 : }
8727 :
8728 : /* Find all registers defined with register REGNO. */
8729 :
8730 : static void
8731 2294420 : ix86_find_all_reg_uses (HARD_REG_SET ®set,
8732 : unsigned int regno, auto_bitmap &worklist)
8733 : {
8734 2294420 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8735 81031909 : ref != NULL;
8736 78737489 : ref = DF_REF_NEXT_REG (ref))
8737 : {
8738 78737489 : if (DF_REF_IS_ARTIFICIAL (ref))
8739 16506296 : continue;
8740 :
8741 62231193 : rtx_insn *insn = DF_REF_INSN (ref);
8742 :
8743 62231193 : if (!NONJUMP_INSN_P (insn))
8744 17681695 : continue;
8745 :
8746 44549498 : unsigned int ref_regno = DF_REF_REGNO (ref);
8747 :
8748 44549498 : rtx set = single_set (insn);
8749 44549498 : if (set)
8750 : {
8751 43775569 : ix86_find_all_reg_uses_1 (regset, set,
8752 : ref_regno, worklist);
8753 43775569 : continue;
8754 : }
8755 :
8756 773929 : rtx pat = PATTERN (insn);
8757 773929 : if (GET_CODE (pat) != PARALLEL)
8758 124633 : continue;
8759 :
8760 2502858 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8761 : {
8762 1853562 : rtx exp = XVECEXP (pat, 0, i);
8763 :
8764 1853562 : if (GET_CODE (exp) == SET)
8765 1429234 : ix86_find_all_reg_uses_1 (regset, exp,
8766 : ref_regno, worklist);
8767 : }
8768 : }
8769 2294420 : }
8770 :
8771 : /* Return true if the hard register REGNO used for a stack access is
8772 : defined in a basic block that dominates the block where it is used. */
8773 :
8774 : static bool
8775 39115 : ix86_access_stack_p (unsigned int regno, basic_block bb,
8776 : HARD_REG_SET &set_up_by_prologue,
8777 : HARD_REG_SET &prologue_used,
8778 : auto_bitmap reg_dominate_bbs_known[],
8779 : auto_bitmap reg_dominate_bbs[])
8780 : {
8781 39115 : if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
8782 10566 : return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
8783 :
8784 28549 : bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
8785 :
8786 : /* Get all BBs which set REGNO and dominate the current BB from all
8787 : DEFs of REGNO. */
8788 28549 : for (df_ref def = DF_REG_DEF_CHAIN (regno);
8789 1470550 : def;
8790 1442001 : def = DF_REF_NEXT_REG (def))
8791 1469139 : if (!DF_REF_IS_ARTIFICIAL (def)
8792 1467412 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
8793 1443513 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
8794 : {
8795 1441738 : basic_block set_bb = DF_REF_BB (def);
8796 1441738 : if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
8797 : {
8798 87477 : rtx_insn *insn = DF_REF_INSN (def);
8799 : /* Return true if INSN requires stack. */
8800 87477 : if (requires_stack_frame_p (insn, prologue_used,
8801 : set_up_by_prologue))
8802 : {
8803 27138 : bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
8804 27138 : return true;
8805 : }
8806 : }
8807 : }
8808 :
8809 : /* When we get here, REGNO used in the current BB doesn't access
8810 : stack. */
8811 : return false;
8812 : }
8813 :
8814 : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
8815 : needs alignment > ALIGNMENT. */
8816 :
8817 : static bool
8818 27614536 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
8819 : {
8820 27614536 : bool need_alignment = MEM_ALIGN (op) > alignment;
8821 27614536 : tree mem_expr = MEM_EXPR (op);
8822 27614536 : if (!mem_expr)
8823 : return need_alignment;
8824 :
8825 22640371 : tree var = get_base_address (mem_expr);
8826 22640371 : if (!VAR_P (var) || !DECL_RTL_SET_P (var))
8827 : return need_alignment;
8828 :
8829 14378292 : rtx x = DECL_RTL (var);
8830 14378292 : if (!MEM_P (x))
8831 : return need_alignment;
8832 :
8833 14378289 : x = XEXP (x, 0);
8834 14378289 : return !SYMBOLIC_CONST (x) && need_alignment;
8835 : }
8836 :
8837 : /* Return true if SET needs alignment > ALIGNMENT. */
8838 :
8839 : static bool
8840 45204093 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
8841 : {
8842 45204093 : rtx dest = SET_DEST (set);
8843 :
8844 45204093 : if (MEM_P (dest))
8845 17118665 : return ix86_need_alignment_p_2 (dest, alignment);
8846 :
8847 28085428 : const_rtx src = SET_SRC (set);
8848 :
8849 28085428 : subrtx_iterator::array_type array;
8850 81341742 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8851 : {
8852 63752185 : auto op = *iter;
8853 :
8854 63752185 : if (MEM_P (op))
8855 10495871 : return ix86_need_alignment_p_2 (op, alignment);
8856 : }
8857 :
8858 17589557 : return false;
8859 28085428 : }
8860 :
8861 : /* Return true if INSN needs alignment > ALIGNMENT. */
8862 :
8863 : static bool
8864 44549498 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
8865 : {
8866 44549498 : rtx set = single_set (insn);
8867 44549498 : if (set)
8868 43775569 : return ix86_need_alignment_p_1 (set, alignment);
8869 :
8870 773929 : rtx pat = PATTERN (insn);
8871 773929 : if (GET_CODE (pat) != PARALLEL)
8872 : return false;
8873 :
8874 2501169 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8875 : {
8876 1852661 : rtx exp = XVECEXP (pat, 0, i);
8877 :
8878 1852661 : if (GET_CODE (exp) == SET
8879 1852661 : && ix86_need_alignment_p_1 (exp, alignment))
8880 : return true;
8881 : }
8882 :
8883 : return false;
8884 : }
8885 :
8886 : /* Set stack_frame_required to false if stack frame isn't required.
8887 : Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8888 : slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8889 :
8890 : static void
8891 1488297 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8892 : bool check_stack_slot)
8893 : {
8894 1488297 : HARD_REG_SET set_up_by_prologue, prologue_used;
8895 1488297 : basic_block bb;
8896 :
8897 5953188 : CLEAR_HARD_REG_SET (prologue_used);
8898 1488297 : CLEAR_HARD_REG_SET (set_up_by_prologue);
8899 1614979 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8900 1488297 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8901 1488297 : add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8902 : HARD_FRAME_POINTER_REGNUM);
8903 :
8904 1488297 : bool require_stack_frame = false;
8905 :
8906 15758966 : FOR_EACH_BB_FN (bb, cfun)
8907 : {
8908 14270669 : rtx_insn *insn;
8909 88752251 : FOR_BB_INSNS (bb, insn)
8910 82274467 : if (NONDEBUG_INSN_P (insn)
8911 82274467 : && requires_stack_frame_p (insn, prologue_used,
8912 : set_up_by_prologue))
8913 : {
8914 : require_stack_frame = true;
8915 : break;
8916 : }
8917 : }
8918 :
8919 1488297 : cfun->machine->stack_frame_required = require_stack_frame;
8920 :
8921 : /* Stop if we don't need to check stack slot. */
8922 1488297 : if (!check_stack_slot)
8923 793224 : return;
8924 :
8925 : /* The preferred stack alignment is the minimum stack alignment. */
8926 695073 : if (stack_alignment > crtl->preferred_stack_boundary)
8927 143793 : stack_alignment = crtl->preferred_stack_boundary;
8928 :
8929 : HARD_REG_SET stack_slot_access;
8930 695073 : CLEAR_HARD_REG_SET (stack_slot_access);
8931 :
8932 : /* Stack slot can be accessed by stack pointer, frame pointer or
8933 : registers defined by stack pointer or frame pointer. */
8934 695073 : auto_bitmap worklist;
8935 :
8936 754639 : add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8937 695073 : bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8938 :
8939 695073 : if (frame_pointer_needed)
8940 : {
8941 337808 : add_to_hard_reg_set (&stack_slot_access, Pmode,
8942 : HARD_FRAME_POINTER_REGNUM);
8943 328778 : bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8944 : }
8945 :
8946 : /* Registers on HARD_STACK_SLOT_ACCESS always access stack. */
8947 695073 : HARD_REG_SET hard_stack_slot_access = stack_slot_access;
8948 :
8949 695073 : calculate_dominance_info (CDI_DOMINATORS);
8950 :
8951 2294420 : unsigned int regno;
8952 :
8953 2294420 : do
8954 : {
8955 2294420 : regno = bitmap_clear_first_set_bit (worklist);
8956 2294420 : ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
8957 : }
8958 2294420 : while (!bitmap_empty_p (worklist));
8959 :
8960 : hard_reg_set_iterator hrsi;
8961 : stack_access_data data;
8962 :
8963 128588505 : auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
8964 128588505 : auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
8965 :
8966 695073 : data.stack_alignment = &stack_alignment;
8967 :
8968 2989493 : EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8969 : {
8970 2294420 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8971 81031909 : ref != NULL;
8972 78737489 : ref = DF_REF_NEXT_REG (ref))
8973 : {
8974 78737489 : if (DF_REF_IS_ARTIFICIAL (ref))
8975 16506296 : continue;
8976 :
8977 62231193 : rtx_insn *insn = DF_REF_INSN (ref);
8978 :
8979 62231193 : if (!NONJUMP_INSN_P (insn))
8980 17681695 : continue;
8981 :
8982 : /* Call ix86_access_stack_p only if INSN needs alignment >
8983 : STACK_ALIGNMENT. */
8984 44549498 : if (ix86_need_alignment_p (insn, stack_alignment)
8985 44549498 : && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
8986 39115 : || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
8987 : set_up_by_prologue,
8988 : prologue_used,
8989 : reg_dominate_bbs_known,
8990 : reg_dominate_bbs)))
8991 : {
8992 : /* Update stack alignment if REGNO is used for stack
8993 : access. */
8994 162393 : data.reg = DF_REF_REG (ref);
8995 162393 : note_stores (insn, ix86_update_stack_alignment, &data);
8996 : }
8997 : }
8998 : }
8999 :
9000 695073 : free_dominance_info (CDI_DOMINATORS);
9001 129978651 : }
9002 :
9003 : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
9004 : will guide prologue/epilogue to be generated in correct form. */
9005 :
9006 : static void
9007 3446793 : ix86_finalize_stack_frame_flags (void)
9008 : {
9009 : /* Check if stack realign is really needed after reload, and
9010 : stores result in cfun */
9011 3446793 : unsigned int incoming_stack_boundary
9012 3446793 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9013 3446793 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9014 3446793 : unsigned int stack_alignment
9015 1188659 : = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
9016 4635452 : ? crtl->max_used_stack_slot_alignment
9017 3446793 : : crtl->stack_alignment_needed);
9018 3446793 : unsigned int stack_realign
9019 3446793 : = (incoming_stack_boundary < stack_alignment);
9020 3446793 : bool recompute_frame_layout_p = false;
9021 :
9022 3446793 : if (crtl->stack_realign_finalized)
9023 : {
9024 : /* After stack_realign_needed is finalized, we can't no longer
9025 : change it. */
9026 1958496 : gcc_assert (crtl->stack_realign_needed == stack_realign);
9027 1958496 : return;
9028 : }
9029 :
9030 : /* It is always safe to compute max_used_stack_alignment. We
9031 : compute it only if 128-bit aligned load/store may be generated
9032 : on misaligned stack slot which will lead to segfault. */
9033 2976594 : bool check_stack_slot
9034 1488297 : = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
9035 1488297 : ix86_find_max_used_stack_alignment (stack_alignment,
9036 : check_stack_slot);
9037 :
9038 : /* If the only reason for frame_pointer_needed is that we conservatively
9039 : assumed stack realignment might be needed or -fno-omit-frame-pointer
9040 : is used, but in the end nothing that needed the stack alignment had
9041 : been spilled nor stack access, clear frame_pointer_needed and say we
9042 : don't need stack realignment.
9043 :
9044 : When vector register is used for piecewise move and store, we don't
9045 : increase stack_alignment_needed as there is no register spill for
9046 : piecewise move and store. Since stack_realign_needed is set to true
9047 : by checking stack_alignment_estimated which is updated by pseudo
9048 : vector register usage, we also need to check stack_realign_needed to
9049 : eliminate frame pointer. */
9050 1488297 : if ((stack_realign
9051 1421808 : || (!flag_omit_frame_pointer && optimize)
9052 1411558 : || crtl->stack_realign_needed)
9053 77398 : && frame_pointer_needed
9054 77398 : && crtl->is_leaf
9055 52926 : && crtl->sp_is_unchanging
9056 52874 : && !ix86_current_function_calls_tls_descriptor
9057 52874 : && !crtl->accesses_prior_frames
9058 52874 : && !cfun->calls_alloca
9059 52874 : && !crtl->calls_eh_return
9060 : /* See ira_setup_eliminable_regset for the rationale. */
9061 52874 : && !(STACK_CHECK_MOVING_SP
9062 52874 : && flag_stack_check
9063 0 : && flag_exceptions
9064 0 : && cfun->can_throw_non_call_exceptions)
9065 52874 : && !ix86_frame_pointer_required ()
9066 52873 : && ix86_get_frame_size () == 0
9067 35086 : && ix86_nsaved_sseregs () == 0
9068 1523383 : && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9069 : {
9070 35086 : if (cfun->machine->stack_frame_required)
9071 : {
9072 : /* Stack frame is required. If stack alignment needed is less
9073 : than incoming stack boundary, don't realign stack. */
9074 285 : stack_realign = incoming_stack_boundary < stack_alignment;
9075 285 : if (!stack_realign)
9076 : {
9077 285 : crtl->max_used_stack_slot_alignment
9078 285 : = incoming_stack_boundary;
9079 285 : crtl->stack_alignment_needed
9080 285 : = incoming_stack_boundary;
9081 : /* Also update preferred_stack_boundary for leaf
9082 : functions. */
9083 285 : crtl->preferred_stack_boundary
9084 285 : = incoming_stack_boundary;
9085 : }
9086 : }
9087 : else
9088 : {
9089 : /* If drap has been set, but it actually isn't live at the
9090 : start of the function, there is no reason to set it up. */
9091 34801 : if (crtl->drap_reg)
9092 : {
9093 35 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9094 70 : if (! REGNO_REG_SET_P (DF_LR_IN (bb),
9095 : REGNO (crtl->drap_reg)))
9096 : {
9097 35 : crtl->drap_reg = NULL_RTX;
9098 35 : crtl->need_drap = false;
9099 : }
9100 : }
9101 : else
9102 34766 : cfun->machine->no_drap_save_restore = true;
9103 :
9104 34801 : frame_pointer_needed = false;
9105 34801 : stack_realign = false;
9106 34801 : crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9107 34801 : crtl->stack_alignment_needed = incoming_stack_boundary;
9108 34801 : crtl->stack_alignment_estimated = incoming_stack_boundary;
9109 34801 : if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9110 1 : crtl->preferred_stack_boundary = incoming_stack_boundary;
9111 34801 : df_finish_pass (true);
9112 34801 : df_scan_alloc (NULL);
9113 34801 : df_scan_blocks ();
9114 34801 : df_compute_regs_ever_live (true);
9115 34801 : df_analyze ();
9116 :
9117 34801 : if (flag_var_tracking)
9118 : {
9119 : /* Since frame pointer is no longer available, replace it with
9120 : stack pointer - UNITS_PER_WORD in debug insns. */
9121 136 : df_ref ref, next;
9122 136 : for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
9123 136 : ref; ref = next)
9124 : {
9125 0 : next = DF_REF_NEXT_REG (ref);
9126 0 : if (!DF_REF_INSN_INFO (ref))
9127 0 : continue;
9128 :
9129 : /* Make sure the next ref is for a different instruction,
9130 : so that we're not affected by the rescan. */
9131 0 : rtx_insn *insn = DF_REF_INSN (ref);
9132 0 : while (next && DF_REF_INSN (next) == insn)
9133 0 : next = DF_REF_NEXT_REG (next);
9134 :
9135 0 : if (DEBUG_INSN_P (insn))
9136 : {
9137 : bool changed = false;
9138 0 : for (; ref != next; ref = DF_REF_NEXT_REG (ref))
9139 : {
9140 0 : rtx *loc = DF_REF_LOC (ref);
9141 0 : if (*loc == hard_frame_pointer_rtx)
9142 : {
9143 0 : *loc = plus_constant (Pmode,
9144 : stack_pointer_rtx,
9145 0 : -UNITS_PER_WORD);
9146 0 : changed = true;
9147 : }
9148 : }
9149 0 : if (changed)
9150 0 : df_insn_rescan (insn);
9151 : }
9152 : }
9153 : }
9154 :
9155 : recompute_frame_layout_p = true;
9156 : }
9157 : }
9158 1453211 : else if (crtl->max_used_stack_slot_alignment >= 128
9159 658963 : && cfun->machine->stack_frame_required)
9160 : {
9161 : /* We don't need to realign stack. max_used_stack_alignment is
9162 : used to decide how stack frame should be aligned. This is
9163 : independent of any psABIs nor 32-bit vs 64-bit. */
9164 613364 : cfun->machine->max_used_stack_alignment
9165 613364 : = stack_alignment / BITS_PER_UNIT;
9166 : }
9167 :
9168 1488297 : if (crtl->stack_realign_needed != stack_realign)
9169 35319 : recompute_frame_layout_p = true;
9170 1488297 : crtl->stack_realign_needed = stack_realign;
9171 1488297 : crtl->stack_realign_finalized = true;
9172 1488297 : if (recompute_frame_layout_p)
9173 35412 : ix86_compute_frame_layout ();
9174 : }
9175 :
9176 : /* Delete SET_GOT right after entry block if it is allocated to reg. */
9177 :
9178 : static void
9179 0 : ix86_elim_entry_set_got (rtx reg)
9180 : {
9181 0 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9182 0 : rtx_insn *c_insn = BB_HEAD (bb);
9183 0 : if (!NONDEBUG_INSN_P (c_insn))
9184 0 : c_insn = next_nonnote_nondebug_insn (c_insn);
9185 0 : if (c_insn && NONJUMP_INSN_P (c_insn))
9186 : {
9187 0 : rtx pat = PATTERN (c_insn);
9188 0 : if (GET_CODE (pat) == PARALLEL)
9189 : {
9190 0 : rtx set = XVECEXP (pat, 0, 0);
9191 0 : if (GET_CODE (set) == SET
9192 0 : && GET_CODE (SET_SRC (set)) == UNSPEC
9193 0 : && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
9194 0 : && REGNO (SET_DEST (set)) == REGNO (reg))
9195 0 : delete_insn (c_insn);
9196 : }
9197 : }
9198 0 : }
9199 :
9200 : static rtx
9201 193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
9202 : {
9203 193166 : rtx addr, mem;
9204 :
9205 193166 : if (offset)
9206 184480 : addr = plus_constant (Pmode, frame_reg, offset);
9207 193166 : mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
9208 193166 : return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
9209 : }
9210 :
9211 : static inline rtx
9212 100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
9213 : {
9214 100333 : return gen_frame_set (reg, frame_reg, offset, false);
9215 : }
9216 :
9217 : static inline rtx
9218 92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
9219 : {
9220 92833 : return gen_frame_set (reg, frame_reg, offset, true);
9221 : }
9222 :
9223 : static void
9224 7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
9225 : {
9226 7045 : struct machine_function *m = cfun->machine;
9227 7045 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9228 7045 : + m->call_ms2sysv_extra_regs;
9229 7045 : rtvec v = rtvec_alloc (ncregs + 1);
9230 7045 : unsigned int align, i, vi = 0;
9231 7045 : rtx_insn *insn;
9232 7045 : rtx sym, addr;
9233 7045 : rtx rax = gen_rtx_REG (word_mode, AX_REG);
9234 7045 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9235 :
9236 : /* AL should only be live with sysv_abi. */
9237 7045 : gcc_assert (!ix86_eax_live_at_start_p ());
9238 7045 : gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
9239 :
9240 : /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
9241 : we've actually realigned the stack or not. */
9242 7045 : align = GET_MODE_ALIGNMENT (V4SFmode);
9243 7045 : addr = choose_baseaddr (frame.stack_realign_offset
9244 7045 : + xlogue.get_stub_ptr_offset (), &align, AX_REG);
9245 7045 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9246 :
9247 7045 : emit_insn (gen_rtx_SET (rax, addr));
9248 :
9249 : /* Get the stub symbol. */
9250 8327 : sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
9251 : : XLOGUE_STUB_SAVE);
9252 7045 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9253 :
9254 99878 : for (i = 0; i < ncregs; ++i)
9255 : {
9256 92833 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9257 92833 : rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
9258 92833 : r.regno);
9259 92833 : RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
9260 : }
9261 :
9262 7045 : gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
9263 :
9264 7045 : insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
9265 7045 : RTX_FRAME_RELATED_P (insn) = true;
9266 7045 : }
9267 :
9268 : /* Generate and return an insn body to AND X with Y. */
9269 :
9270 : static rtx_insn *
9271 31829 : gen_and2_insn (rtx x, rtx y)
9272 : {
9273 31829 : enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
9274 :
9275 31829 : gcc_assert (insn_operand_matches (icode, 0, x));
9276 31829 : gcc_assert (insn_operand_matches (icode, 1, x));
9277 31829 : gcc_assert (insn_operand_matches (icode, 2, y));
9278 :
9279 31829 : return GEN_FCN (icode) (x, x, y);
9280 : }
9281 :
9282 : /* Expand the prologue into a bunch of separate insns. */
9283 :
9284 : void
9285 1532456 : ix86_expand_prologue (void)
9286 : {
9287 1532456 : struct machine_function *m = cfun->machine;
9288 1532456 : rtx insn, t;
9289 1532456 : HOST_WIDE_INT allocate;
9290 1532456 : bool int_registers_saved;
9291 1532456 : bool sse_registers_saved;
9292 1532456 : bool save_stub_call_needed;
9293 1532456 : rtx static_chain = NULL_RTX;
9294 :
9295 1532456 : ix86_last_zero_store_uid = 0;
9296 1532456 : if (ix86_function_naked (current_function_decl))
9297 : {
9298 74 : if (flag_stack_usage_info)
9299 0 : current_function_static_stack_size = 0;
9300 74 : return;
9301 : }
9302 :
9303 1532382 : ix86_finalize_stack_frame_flags ();
9304 :
9305 : /* DRAP should not coexist with stack_realign_fp */
9306 1532382 : gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9307 :
9308 1532382 : memset (&m->fs, 0, sizeof (m->fs));
9309 :
9310 : /* Initialize CFA state for before the prologue. */
9311 1532382 : m->fs.cfa_reg = stack_pointer_rtx;
9312 1532382 : m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9313 :
9314 : /* Track SP offset to the CFA. We continue tracking this after we've
9315 : swapped the CFA register away from SP. In the case of re-alignment
9316 : this is fudged; we're interested to offsets within the local frame. */
9317 1532382 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9318 1532382 : m->fs.sp_valid = true;
9319 1532382 : m->fs.sp_realigned = false;
9320 :
9321 1532382 : const struct ix86_frame &frame = cfun->machine->frame;
9322 :
9323 1532382 : if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9324 : {
9325 : /* We should have already generated an error for any use of
9326 : ms_hook on a nested function. */
9327 0 : gcc_checking_assert (!ix86_static_chain_on_stack);
9328 :
9329 : /* Check if profiling is active and we shall use profiling before
9330 : prologue variant. If so sorry. */
9331 0 : if (crtl->profile && flag_fentry != 0)
9332 0 : sorry ("%<ms_hook_prologue%> attribute is not compatible "
9333 : "with %<-mfentry%> for 32-bit");
9334 :
9335 : /* In ix86_asm_output_function_label we emitted:
9336 : 8b ff movl.s %edi,%edi
9337 : 55 push %ebp
9338 : 8b ec movl.s %esp,%ebp
9339 :
9340 : This matches the hookable function prologue in Win32 API
9341 : functions in Microsoft Windows XP Service Pack 2 and newer.
9342 : Wine uses this to enable Windows apps to hook the Win32 API
9343 : functions provided by Wine.
9344 :
9345 : What that means is that we've already set up the frame pointer. */
9346 :
9347 0 : if (frame_pointer_needed
9348 0 : && !(crtl->drap_reg && crtl->stack_realign_needed))
9349 : {
9350 0 : rtx push, mov;
9351 :
9352 : /* We've decided to use the frame pointer already set up.
9353 : Describe this to the unwinder by pretending that both
9354 : push and mov insns happen right here.
9355 :
9356 : Putting the unwind info here at the end of the ms_hook
9357 : is done so that we can make absolutely certain we get
9358 : the required byte sequence at the start of the function,
9359 : rather than relying on an assembler that can produce
9360 : the exact encoding required.
9361 :
9362 : However it does mean (in the unpatched case) that we have
9363 : a 1 insn window where the asynchronous unwind info is
9364 : incorrect. However, if we placed the unwind info at
9365 : its correct location we would have incorrect unwind info
9366 : in the patched case. Which is probably all moot since
9367 : I don't expect Wine generates dwarf2 unwind info for the
9368 : system libraries that use this feature. */
9369 :
9370 0 : insn = emit_insn (gen_blockage ());
9371 :
9372 0 : push = gen_push (hard_frame_pointer_rtx);
9373 0 : mov = gen_rtx_SET (hard_frame_pointer_rtx,
9374 : stack_pointer_rtx);
9375 0 : RTX_FRAME_RELATED_P (push) = 1;
9376 0 : RTX_FRAME_RELATED_P (mov) = 1;
9377 :
9378 0 : RTX_FRAME_RELATED_P (insn) = 1;
9379 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9380 : gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9381 :
9382 : /* Note that gen_push incremented m->fs.cfa_offset, even
9383 : though we didn't emit the push insn here. */
9384 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9385 0 : m->fs.fp_offset = m->fs.cfa_offset;
9386 0 : m->fs.fp_valid = true;
9387 0 : }
9388 : else
9389 : {
9390 : /* The frame pointer is not needed so pop %ebp again.
9391 : This leaves us with a pristine state. */
9392 0 : emit_insn (gen_pop (hard_frame_pointer_rtx));
9393 : }
9394 : }
9395 :
9396 : /* The first insn of a function that accepts its static chain on the
9397 : stack is to push the register that would be filled in by a direct
9398 : call. This insn will be skipped by the trampoline. */
9399 1532382 : else if (ix86_static_chain_on_stack)
9400 : {
9401 0 : static_chain = ix86_static_chain (cfun->decl, false);
9402 0 : insn = emit_insn (gen_push (static_chain));
9403 0 : emit_insn (gen_blockage ());
9404 :
9405 : /* We don't want to interpret this push insn as a register save,
9406 : only as a stack adjustment. The real copy of the register as
9407 : a save will be done later, if needed. */
9408 0 : t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9409 0 : t = gen_rtx_SET (stack_pointer_rtx, t);
9410 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9411 0 : RTX_FRAME_RELATED_P (insn) = 1;
9412 : }
9413 :
9414 : /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9415 : of DRAP is needed and stack realignment is really needed after reload */
9416 1532382 : if (stack_realign_drap)
9417 : {
9418 7079 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9419 :
9420 : /* Can't use DRAP in interrupt function. */
9421 7079 : if (cfun->machine->func_type != TYPE_NORMAL)
9422 0 : sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9423 : "in interrupt service routine. This may be worked "
9424 : "around by avoiding functions with aggregate return.");
9425 :
9426 : /* Only need to push parameter pointer reg if it is caller saved. */
9427 7079 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9428 : {
9429 : /* Push arg pointer reg */
9430 137 : insn = emit_insn (gen_push (crtl->drap_reg));
9431 137 : RTX_FRAME_RELATED_P (insn) = 1;
9432 : }
9433 :
9434 : /* Grab the argument pointer. */
9435 7364 : t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9436 7079 : insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9437 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9438 7079 : m->fs.cfa_reg = crtl->drap_reg;
9439 7079 : m->fs.cfa_offset = 0;
9440 :
9441 : /* Align the stack. */
9442 7079 : insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9443 7079 : GEN_INT (-align_bytes)));
9444 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9445 :
9446 : /* Replicate the return address on the stack so that return
9447 : address can be reached via (argp - 1) slot. This is needed
9448 : to implement macro RETURN_ADDR_RTX and intrinsic function
9449 : expand_builtin_return_addr etc. */
9450 7649 : t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9451 7079 : t = gen_frame_mem (word_mode, t);
9452 7079 : insn = emit_insn (gen_push (t));
9453 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9454 :
9455 : /* For the purposes of frame and register save area addressing,
9456 : we've started over with a new frame. */
9457 7079 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9458 7079 : m->fs.realigned = true;
9459 :
9460 7079 : if (static_chain)
9461 : {
9462 : /* Replicate static chain on the stack so that static chain
9463 : can be reached via (argp - 2) slot. This is needed for
9464 : nested function with stack realignment. */
9465 0 : insn = emit_insn (gen_push (static_chain));
9466 0 : RTX_FRAME_RELATED_P (insn) = 1;
9467 : }
9468 : }
9469 :
9470 1532382 : int_registers_saved = (frame.nregs == 0);
9471 1532382 : sse_registers_saved = (frame.nsseregs == 0);
9472 1532382 : save_stub_call_needed = (m->call_ms2sysv);
9473 1532382 : gcc_assert (sse_registers_saved || !save_stub_call_needed);
9474 :
9475 1532382 : if (frame_pointer_needed && !m->fs.fp_valid)
9476 : {
9477 : /* Note: AT&T enter does NOT have reversed args. Enter is probably
9478 : slower on all targets. Also sdb didn't like it. */
9479 491868 : insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9480 491868 : RTX_FRAME_RELATED_P (insn) = 1;
9481 :
9482 491868 : if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9483 : {
9484 491868 : insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9485 491868 : RTX_FRAME_RELATED_P (insn) = 1;
9486 :
9487 491868 : if (m->fs.cfa_reg == stack_pointer_rtx)
9488 484789 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9489 491868 : m->fs.fp_offset = m->fs.sp_offset;
9490 491868 : m->fs.fp_valid = true;
9491 : }
9492 : }
9493 :
9494 1532382 : if (!int_registers_saved)
9495 : {
9496 : /* If saving registers via PUSH, do so now. */
9497 471923 : if (!frame.save_regs_using_mov)
9498 : {
9499 427777 : ix86_emit_save_regs ();
9500 427777 : m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9501 427777 : int_registers_saved = true;
9502 427777 : gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9503 : }
9504 :
9505 : /* When using red zone we may start register saving before allocating
9506 : the stack frame saving one cycle of the prologue. However, avoid
9507 : doing this if we have to probe the stack; at least on x86_64 the
9508 : stack probe can turn into a call that clobbers a red zone location. */
9509 44146 : else if (ix86_using_red_zone ()
9510 44146 : && (! TARGET_STACK_PROBE
9511 0 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9512 : {
9513 39699 : HOST_WIDE_INT allocate_offset;
9514 39699 : if (crtl->shrink_wrapped_separate)
9515 : {
9516 39643 : allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
9517 :
9518 : /* Adjust the total offset at the beginning of the function. */
9519 39643 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9520 : GEN_INT (allocate_offset), -1,
9521 39643 : m->fs.cfa_reg == stack_pointer_rtx);
9522 39643 : m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
9523 : }
9524 :
9525 39699 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9526 39699 : int_registers_saved = true;
9527 : }
9528 : }
9529 :
9530 1532382 : if (frame.red_zone_size != 0)
9531 143651 : cfun->machine->red_zone_used = true;
9532 :
9533 1532382 : if (stack_realign_fp)
9534 : {
9535 24750 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9536 25099 : gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9537 :
9538 : /* Record last valid frame pointer offset. */
9539 24750 : m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9540 :
9541 : /* The computation of the size of the re-aligned stack frame means
9542 : that we must allocate the size of the register save area before
9543 : performing the actual alignment. Otherwise we cannot guarantee
9544 : that there's enough storage above the realignment point. */
9545 24750 : allocate = frame.reg_save_offset - m->fs.sp_offset
9546 24750 : + frame.stack_realign_allocate;
9547 24750 : if (allocate)
9548 2691 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9549 : GEN_INT (-allocate), -1, false);
9550 :
9551 : /* Align the stack. */
9552 24750 : emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9553 24750 : m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9554 24750 : m->fs.sp_realigned_offset = m->fs.sp_offset
9555 24750 : - frame.stack_realign_allocate;
9556 : /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9557 : Beyond this point, stack access should be done via choose_baseaddr or
9558 : by using sp_valid_at and fp_valid_at to determine the correct base
9559 : register. Henceforth, any CFA offset should be thought of as logical
9560 : and not physical. */
9561 24750 : gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9562 24750 : gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9563 24750 : m->fs.sp_realigned = true;
9564 :
9565 : /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9566 : is needed to describe where a register is saved using a realigned
9567 : stack pointer, so we need to invalidate the stack pointer for that
9568 : target. */
9569 24750 : if (TARGET_SEH)
9570 : m->fs.sp_valid = false;
9571 :
9572 : /* If SP offset is non-immediate after allocation of the stack frame,
9573 : then emit SSE saves or stub call prior to allocating the rest of the
9574 : stack frame. This is less efficient for the out-of-line stub because
9575 : we can't combine allocations across the call barrier, but it's better
9576 : than using a scratch register. */
9577 24750 : else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9578 : - m->fs.sp_realigned_offset),
9579 24750 : Pmode))
9580 : {
9581 3 : if (!sse_registers_saved)
9582 : {
9583 1 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9584 1 : sse_registers_saved = true;
9585 : }
9586 2 : else if (save_stub_call_needed)
9587 : {
9588 1 : ix86_emit_outlined_ms2sysv_save (frame);
9589 1 : save_stub_call_needed = false;
9590 : }
9591 : }
9592 : }
9593 :
9594 1532382 : allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9595 :
9596 1532382 : if (flag_stack_usage_info)
9597 : {
9598 : /* We start to count from ARG_POINTER. */
9599 355 : HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9600 :
9601 : /* If it was realigned, take into account the fake frame. */
9602 355 : if (stack_realign_drap)
9603 : {
9604 1 : if (ix86_static_chain_on_stack)
9605 0 : stack_size += UNITS_PER_WORD;
9606 :
9607 1 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9608 0 : stack_size += UNITS_PER_WORD;
9609 :
9610 : /* This over-estimates by 1 minimal-stack-alignment-unit but
9611 : mitigates that by counting in the new return address slot. */
9612 1 : current_function_dynamic_stack_size
9613 1 : += crtl->stack_alignment_needed / BITS_PER_UNIT;
9614 : }
9615 :
9616 355 : current_function_static_stack_size = stack_size;
9617 : }
9618 :
9619 : /* On SEH target with very large frame size, allocate an area to save
9620 : SSE registers (as the very large allocation won't be described). */
9621 1532382 : if (TARGET_SEH
9622 : && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9623 : && !sse_registers_saved)
9624 : {
9625 : HOST_WIDE_INT sse_size
9626 : = frame.sse_reg_save_offset - frame.reg_save_offset;
9627 :
9628 : gcc_assert (int_registers_saved);
9629 :
9630 : /* No need to do stack checking as the area will be immediately
9631 : written. */
9632 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9633 : GEN_INT (-sse_size), -1,
9634 : m->fs.cfa_reg == stack_pointer_rtx);
9635 : allocate -= sse_size;
9636 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9637 : sse_registers_saved = true;
9638 : }
9639 :
9640 : /* If stack clash protection is requested, then probe the stack, unless it
9641 : is already probed on the target. */
9642 1532382 : if (allocate >= 0
9643 1532378 : && flag_stack_clash_protection
9644 1532480 : && !ix86_target_stack_probe ())
9645 : {
9646 98 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9647 98 : allocate = 0;
9648 : }
9649 :
9650 : /* The stack has already been decremented by the instruction calling us
9651 : so probe if the size is non-negative to preserve the protection area. */
9652 1532284 : else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9653 : {
9654 46 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
9655 :
9656 46 : if (STACK_CHECK_MOVING_SP)
9657 : {
9658 46 : if (crtl->is_leaf
9659 19 : && !cfun->calls_alloca
9660 19 : && allocate <= probe_interval)
9661 : ;
9662 :
9663 : else
9664 : {
9665 28 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9666 28 : allocate = 0;
9667 : }
9668 : }
9669 :
9670 : else
9671 : {
9672 : HOST_WIDE_INT size = allocate;
9673 :
9674 : if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9675 : size = 0x80000000 - get_stack_check_protect () - 1;
9676 :
9677 : if (TARGET_STACK_PROBE)
9678 : {
9679 : if (crtl->is_leaf && !cfun->calls_alloca)
9680 : {
9681 : if (size > probe_interval)
9682 : ix86_emit_probe_stack_range (0, size, int_registers_saved);
9683 : }
9684 : else
9685 : ix86_emit_probe_stack_range (0,
9686 : size + get_stack_check_protect (),
9687 : int_registers_saved);
9688 : }
9689 : else
9690 : {
9691 : if (crtl->is_leaf && !cfun->calls_alloca)
9692 : {
9693 : if (size > probe_interval
9694 : && size > get_stack_check_protect ())
9695 : ix86_emit_probe_stack_range (get_stack_check_protect (),
9696 : (size
9697 : - get_stack_check_protect ()),
9698 : int_registers_saved);
9699 : }
9700 : else
9701 : ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9702 : int_registers_saved);
9703 : }
9704 : }
9705 : }
9706 :
9707 1532378 : if (allocate == 0)
9708 : ;
9709 842808 : else if (!ix86_target_stack_probe ()
9710 842808 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9711 : {
9712 842763 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9713 : GEN_INT (-allocate), -1,
9714 842763 : m->fs.cfa_reg == stack_pointer_rtx);
9715 : }
9716 : else
9717 : {
9718 45 : rtx eax = gen_rtx_REG (Pmode, AX_REG);
9719 45 : rtx r10 = NULL;
9720 45 : const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9721 45 : bool eax_live = ix86_eax_live_at_start_p ();
9722 45 : bool r10_live = false;
9723 :
9724 45 : if (TARGET_64BIT)
9725 45 : r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9726 :
9727 45 : if (eax_live)
9728 : {
9729 0 : insn = emit_insn (gen_push (eax));
9730 0 : allocate -= UNITS_PER_WORD;
9731 : /* Note that SEH directives need to continue tracking the stack
9732 : pointer even after the frame pointer has been set up. */
9733 0 : if (sp_is_cfa_reg || TARGET_SEH)
9734 : {
9735 0 : if (sp_is_cfa_reg)
9736 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9737 0 : RTX_FRAME_RELATED_P (insn) = 1;
9738 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9739 0 : gen_rtx_SET (stack_pointer_rtx,
9740 : plus_constant (Pmode,
9741 : stack_pointer_rtx,
9742 : -UNITS_PER_WORD)));
9743 : }
9744 : }
9745 :
9746 45 : if (r10_live)
9747 : {
9748 0 : r10 = gen_rtx_REG (Pmode, R10_REG);
9749 0 : insn = emit_insn (gen_push (r10));
9750 0 : allocate -= UNITS_PER_WORD;
9751 0 : if (sp_is_cfa_reg || TARGET_SEH)
9752 : {
9753 0 : if (sp_is_cfa_reg)
9754 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9755 0 : RTX_FRAME_RELATED_P (insn) = 1;
9756 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9757 0 : gen_rtx_SET (stack_pointer_rtx,
9758 : plus_constant (Pmode,
9759 : stack_pointer_rtx,
9760 : -UNITS_PER_WORD)));
9761 : }
9762 : }
9763 :
9764 45 : emit_move_insn (eax, GEN_INT (allocate));
9765 45 : emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9766 :
9767 : /* Use the fact that AX still contains ALLOCATE. */
9768 45 : insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9769 45 : (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9770 :
9771 45 : if (sp_is_cfa_reg || TARGET_SEH)
9772 : {
9773 37 : if (sp_is_cfa_reg)
9774 37 : m->fs.cfa_offset += allocate;
9775 37 : RTX_FRAME_RELATED_P (insn) = 1;
9776 37 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9777 37 : gen_rtx_SET (stack_pointer_rtx,
9778 : plus_constant (Pmode, stack_pointer_rtx,
9779 : -allocate)));
9780 : }
9781 45 : m->fs.sp_offset += allocate;
9782 :
9783 : /* Use stack_pointer_rtx for relative addressing so that code works for
9784 : realigned stack. But this means that we need a blockage to prevent
9785 : stores based on the frame pointer from being scheduled before. */
9786 45 : if (r10_live && eax_live)
9787 : {
9788 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9789 0 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9790 : gen_frame_mem (word_mode, t));
9791 0 : t = plus_constant (Pmode, t, UNITS_PER_WORD);
9792 0 : emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9793 : gen_frame_mem (word_mode, t));
9794 0 : emit_insn (gen_memory_blockage ());
9795 : }
9796 45 : else if (eax_live || r10_live)
9797 : {
9798 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9799 0 : emit_move_insn (gen_rtx_REG (word_mode,
9800 : (eax_live ? AX_REG : R10_REG)),
9801 : gen_frame_mem (word_mode, t));
9802 0 : emit_insn (gen_memory_blockage ());
9803 : }
9804 : }
9805 1532382 : gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9806 :
9807 : /* If we haven't already set up the frame pointer, do so now. */
9808 1532382 : if (frame_pointer_needed && !m->fs.fp_valid)
9809 : {
9810 0 : insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9811 0 : GEN_INT (frame.stack_pointer_offset
9812 : - frame.hard_frame_pointer_offset));
9813 0 : insn = emit_insn (insn);
9814 0 : RTX_FRAME_RELATED_P (insn) = 1;
9815 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9816 :
9817 0 : if (m->fs.cfa_reg == stack_pointer_rtx)
9818 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9819 0 : m->fs.fp_offset = frame.hard_frame_pointer_offset;
9820 0 : m->fs.fp_valid = true;
9821 : }
9822 :
9823 1532382 : if (!int_registers_saved)
9824 4447 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9825 1532382 : if (!sse_registers_saved)
9826 33362 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9827 1499020 : else if (save_stub_call_needed)
9828 7044 : ix86_emit_outlined_ms2sysv_save (frame);
9829 :
9830 : /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9831 : in PROLOGUE. */
9832 1532382 : if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9833 : {
9834 0 : rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9835 0 : insn = emit_insn (gen_set_got (pic));
9836 0 : RTX_FRAME_RELATED_P (insn) = 1;
9837 0 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9838 0 : emit_insn (gen_prologue_use (pic));
9839 : /* Deleting already emitted SET_GOT if exist and allocated to
9840 : REAL_PIC_OFFSET_TABLE_REGNUM. */
9841 0 : ix86_elim_entry_set_got (pic);
9842 : }
9843 :
9844 1532382 : if (crtl->drap_reg && !crtl->stack_realign_needed)
9845 : {
9846 : /* vDRAP is setup but after reload it turns out stack realign
9847 : isn't necessary, here we will emit prologue to setup DRAP
9848 : without stack realign adjustment */
9849 181 : t = choose_baseaddr (0, NULL);
9850 181 : emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9851 : }
9852 :
9853 : /* Prevent instructions from being scheduled into register save push
9854 : sequence when access to the redzone area is done through frame pointer.
9855 : The offset between the frame pointer and the stack pointer is calculated
9856 : relative to the value of the stack pointer at the end of the function
9857 : prologue, and moving instructions that access redzone area via frame
9858 : pointer inside push sequence violates this assumption. */
9859 1532382 : if (frame_pointer_needed && frame.red_zone_size)
9860 132639 : emit_insn (gen_memory_blockage ());
9861 :
9862 : /* SEH requires that the prologue end within 256 bytes of the start of
9863 : the function. Prevent instruction schedules that would extend that.
9864 : Further, prevent alloca modifications to the stack pointer from being
9865 : combined with prologue modifications. */
9866 : if (TARGET_SEH)
9867 : emit_insn (gen_prologue_use (stack_pointer_rtx));
9868 : }
9869 :
9870 : /* Emit code to restore REG using a POP or POPP insn. */
9871 :
9872 : static void
9873 1468964 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9874 : {
9875 1468964 : struct machine_function *m = cfun->machine;
9876 1468964 : rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9877 :
9878 1468964 : ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9879 1468964 : m->fs.sp_offset -= UNITS_PER_WORD;
9880 :
9881 1468964 : if (m->fs.cfa_reg == crtl->drap_reg
9882 1468964 : && REGNO (reg) == REGNO (crtl->drap_reg))
9883 : {
9884 : /* Previously we'd represented the CFA as an expression
9885 : like *(%ebp - 8). We've just popped that value from
9886 : the stack, which means we need to reset the CFA to
9887 : the drap register. This will remain until we restore
9888 : the stack pointer. */
9889 4033 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9890 4033 : RTX_FRAME_RELATED_P (insn) = 1;
9891 :
9892 : /* This means that the DRAP register is valid for addressing too. */
9893 4033 : m->fs.drap_valid = true;
9894 4033 : return;
9895 : }
9896 :
9897 1464931 : if (m->fs.cfa_reg == stack_pointer_rtx)
9898 : {
9899 1372359 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9900 1009046 : x = gen_rtx_SET (stack_pointer_rtx, x);
9901 1009046 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9902 1009046 : RTX_FRAME_RELATED_P (insn) = 1;
9903 :
9904 1190695 : m->fs.cfa_offset -= UNITS_PER_WORD;
9905 : }
9906 :
9907 : /* When the frame pointer is the CFA, and we pop it, we are
9908 : swapping back to the stack pointer as the CFA. This happens
9909 : for stack frames that don't allocate other data, so we assume
9910 : the stack pointer is now pointing at the return address, i.e.
9911 : the function entry state, which makes the offset be 1 word. */
9912 1464931 : if (reg == hard_frame_pointer_rtx)
9913 : {
9914 245856 : m->fs.fp_valid = false;
9915 245856 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9916 : {
9917 241810 : m->fs.cfa_reg = stack_pointer_rtx;
9918 241810 : m->fs.cfa_offset -= UNITS_PER_WORD;
9919 :
9920 241810 : add_reg_note (insn, REG_CFA_DEF_CFA,
9921 241810 : plus_constant (Pmode, stack_pointer_rtx,
9922 241810 : m->fs.cfa_offset));
9923 241810 : RTX_FRAME_RELATED_P (insn) = 1;
9924 : }
9925 : }
9926 : }
9927 :
9928 : /* Emit code to restore REG using a POP2 insn. */
9929 : static void
9930 19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9931 : {
9932 19 : struct machine_function *m = cfun->machine;
9933 19 : const int offset = UNITS_PER_WORD * 2;
9934 19 : rtx_insn *insn;
9935 :
9936 19 : rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9937 : stack_pointer_rtx));
9938 :
9939 19 : if (ppx_p)
9940 15 : insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9941 : else
9942 4 : insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9943 :
9944 19 : RTX_FRAME_RELATED_P (insn) = 1;
9945 :
9946 19 : rtx dwarf = NULL_RTX;
9947 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9948 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9949 19 : REG_NOTES (insn) = dwarf;
9950 19 : m->fs.sp_offset -= offset;
9951 :
9952 19 : if (m->fs.cfa_reg == crtl->drap_reg
9953 19 : && (REGNO (reg1) == REGNO (crtl->drap_reg)
9954 3 : || REGNO (reg2) == REGNO (crtl->drap_reg)))
9955 : {
9956 : /* Previously we'd represented the CFA as an expression
9957 : like *(%ebp - 8). We've just popped that value from
9958 : the stack, which means we need to reset the CFA to
9959 : the drap register. This will remain until we restore
9960 : the stack pointer. */
9961 1 : add_reg_note (insn, REG_CFA_DEF_CFA,
9962 1 : REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9963 1 : RTX_FRAME_RELATED_P (insn) = 1;
9964 :
9965 : /* This means that the DRAP register is valid for addressing too. */
9966 1 : m->fs.drap_valid = true;
9967 1 : return;
9968 : }
9969 :
9970 18 : if (m->fs.cfa_reg == stack_pointer_rtx)
9971 : {
9972 14 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9973 14 : x = gen_rtx_SET (stack_pointer_rtx, x);
9974 14 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9975 14 : RTX_FRAME_RELATED_P (insn) = 1;
9976 :
9977 14 : m->fs.cfa_offset -= offset;
9978 : }
9979 :
9980 : /* When the frame pointer is the CFA, and we pop it, we are
9981 : swapping back to the stack pointer as the CFA. This happens
9982 : for stack frames that don't allocate other data, so we assume
9983 : the stack pointer is now pointing at the return address, i.e.
9984 : the function entry state, which makes the offset be 1 word. */
9985 18 : if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9986 : {
9987 0 : m->fs.fp_valid = false;
9988 0 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9989 : {
9990 0 : m->fs.cfa_reg = stack_pointer_rtx;
9991 0 : m->fs.cfa_offset -= offset;
9992 :
9993 0 : add_reg_note (insn, REG_CFA_DEF_CFA,
9994 0 : plus_constant (Pmode, stack_pointer_rtx,
9995 0 : m->fs.cfa_offset));
9996 0 : RTX_FRAME_RELATED_P (insn) = 1;
9997 : }
9998 : }
9999 : }
10000 :
10001 : /* Emit code to restore saved registers using POP insns. */
10002 :
10003 : static void
10004 1357019 : ix86_emit_restore_regs_using_pop (bool ppx_p)
10005 : {
10006 1357019 : unsigned int regno;
10007 :
10008 126202767 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10009 124845748 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
10010 1222787 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
10011 1357019 : }
10012 :
10013 : /* Emit code to restore saved registers using POP2 insns. */
10014 :
10015 : static void
10016 563 : ix86_emit_restore_regs_using_pop2 (void)
10017 : {
10018 563 : int regno;
10019 563 : int regno_list[2];
10020 563 : regno_list[0] = regno_list[1] = -1;
10021 563 : int loaded_regnum = 0;
10022 563 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
10023 :
10024 52359 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10025 51796 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
10026 : {
10027 127 : if (aligned)
10028 : {
10029 120 : regno_list[loaded_regnum++] = regno;
10030 120 : if (loaded_regnum == 2)
10031 : {
10032 19 : gcc_assert (regno_list[0] != -1
10033 : && regno_list[1] != -1
10034 : && regno_list[0] != regno_list[1]);
10035 :
10036 19 : ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
10037 : regno_list[0]),
10038 : gen_rtx_REG (word_mode,
10039 : regno_list[1]),
10040 19 : TARGET_APX_PPX);
10041 19 : loaded_regnum = 0;
10042 19 : regno_list[0] = regno_list[1] = -1;
10043 : }
10044 : }
10045 : else
10046 : {
10047 14 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
10048 7 : TARGET_APX_PPX);
10049 7 : aligned = true;
10050 : }
10051 : }
10052 :
10053 563 : if (loaded_regnum == 1)
10054 82 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
10055 82 : TARGET_APX_PPX);
10056 563 : }
10057 :
10058 : /* Emit code and notes for the LEAVE instruction. If insn is non-null,
10059 : omits the emit and only attaches the notes. */
10060 :
10061 : static void
10062 247325 : ix86_emit_leave (rtx_insn *insn)
10063 : {
10064 247325 : struct machine_function *m = cfun->machine;
10065 :
10066 247325 : if (!insn)
10067 246354 : insn = emit_insn (gen_leave (word_mode));
10068 :
10069 247325 : ix86_add_queued_cfa_restore_notes (insn);
10070 :
10071 247325 : gcc_assert (m->fs.fp_valid);
10072 247325 : m->fs.sp_valid = true;
10073 247325 : m->fs.sp_realigned = false;
10074 247325 : m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10075 247325 : m->fs.fp_valid = false;
10076 :
10077 247325 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10078 : {
10079 244184 : m->fs.cfa_reg = stack_pointer_rtx;
10080 244184 : m->fs.cfa_offset = m->fs.sp_offset;
10081 :
10082 244184 : add_reg_note (insn, REG_CFA_DEF_CFA,
10083 244184 : plus_constant (Pmode, stack_pointer_rtx,
10084 244184 : m->fs.sp_offset));
10085 244184 : RTX_FRAME_RELATED_P (insn) = 1;
10086 : }
10087 247325 : ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10088 : m->fs.fp_offset);
10089 247325 : }
10090 :
10091 : /* Emit code to restore saved registers using MOV insns.
10092 : First register is restored from CFA - CFA_OFFSET. */
10093 : static void
10094 96126 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10095 : bool maybe_eh_return)
10096 : {
10097 96126 : struct machine_function *m = cfun->machine;
10098 96126 : unsigned int regno;
10099 :
10100 8939718 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10101 8843592 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10102 : {
10103 :
10104 : /* Skip registers, already processed by shrink wrap separate. */
10105 262650 : if (!cfun->machine->reg_is_wrapped_separately[regno])
10106 : {
10107 140022 : rtx reg = gen_rtx_REG (word_mode, regno);
10108 140022 : rtx mem;
10109 140022 : rtx_insn *insn;
10110 :
10111 140022 : mem = choose_baseaddr (cfa_offset, NULL);
10112 140022 : mem = gen_frame_mem (word_mode, mem);
10113 140022 : insn = emit_move_insn (reg, mem);
10114 :
10115 140022 : if (m->fs.cfa_reg == crtl->drap_reg
10116 140022 : && regno == REGNO (crtl->drap_reg))
10117 : {
10118 : /* Previously we'd represented the CFA as an expression
10119 : like *(%ebp - 8). We've just popped that value from
10120 : the stack, which means we need to reset the CFA to
10121 : the drap register. This will remain until we restore
10122 : the stack pointer. */
10123 3141 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10124 3141 : RTX_FRAME_RELATED_P (insn) = 1;
10125 :
10126 : /* DRAP register is valid for addressing. */
10127 3141 : m->fs.drap_valid = true;
10128 : }
10129 : else
10130 136881 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10131 : }
10132 283097 : cfa_offset -= UNITS_PER_WORD;
10133 : }
10134 96126 : }
10135 :
10136 : /* Emit code to restore saved registers using MOV insns.
10137 : First register is restored from CFA - CFA_OFFSET. */
10138 : static void
10139 33939 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10140 : bool maybe_eh_return)
10141 : {
10142 33939 : unsigned int regno;
10143 :
10144 3156327 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10145 3122388 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10146 : {
10147 339417 : rtx reg = gen_rtx_REG (V4SFmode, regno);
10148 339417 : rtx mem;
10149 339417 : unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
10150 :
10151 339417 : mem = choose_baseaddr (cfa_offset, &align);
10152 339417 : mem = gen_rtx_MEM (V4SFmode, mem);
10153 :
10154 : /* The location alignment depends upon the base register. */
10155 339417 : align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
10156 339417 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
10157 339417 : set_mem_align (mem, align);
10158 339417 : emit_insn (gen_rtx_SET (reg, mem));
10159 :
10160 339417 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10161 :
10162 339417 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
10163 : }
10164 33939 : }
10165 :
10166 : static void
10167 7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
10168 : bool use_call, int style)
10169 : {
10170 7621 : struct machine_function *m = cfun->machine;
10171 7621 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
10172 7621 : + m->call_ms2sysv_extra_regs;
10173 7621 : rtvec v;
10174 7621 : unsigned int elems_needed, align, i, vi = 0;
10175 7621 : rtx_insn *insn;
10176 7621 : rtx sym, tmp;
10177 7621 : rtx rsi = gen_rtx_REG (word_mode, SI_REG);
10178 7621 : rtx r10 = NULL_RTX;
10179 7621 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
10180 7621 : HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
10181 7621 : HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
10182 7621 : rtx rsi_frame_load = NULL_RTX;
10183 7621 : HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
10184 7621 : enum xlogue_stub stub;
10185 :
10186 7621 : gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
10187 :
10188 : /* If using a realigned stack, we should never start with padding. */
10189 7621 : gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
10190 :
10191 : /* Setup RSI as the stub's base pointer. */
10192 7621 : align = GET_MODE_ALIGNMENT (V4SFmode);
10193 7621 : tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
10194 7621 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
10195 :
10196 7621 : emit_insn (gen_rtx_SET (rsi, tmp));
10197 :
10198 : /* Get a symbol for the stub. */
10199 7621 : if (frame_pointer_needed)
10200 5955 : stub = use_call ? XLOGUE_STUB_RESTORE_HFP
10201 : : XLOGUE_STUB_RESTORE_HFP_TAIL;
10202 : else
10203 1666 : stub = use_call ? XLOGUE_STUB_RESTORE
10204 : : XLOGUE_STUB_RESTORE_TAIL;
10205 7621 : sym = xlogue.get_stub_rtx (stub);
10206 :
10207 7621 : elems_needed = ncregs;
10208 7621 : if (use_call)
10209 6498 : elems_needed += 1;
10210 : else
10211 1275 : elems_needed += frame_pointer_needed ? 5 : 3;
10212 7621 : v = rtvec_alloc (elems_needed);
10213 :
10214 : /* We call the epilogue stub when we need to pop incoming args or we are
10215 : doing a sibling call as the tail. Otherwise, we will emit a jmp to the
10216 : epilogue stub and it is the tail-call. */
10217 7621 : if (use_call)
10218 6498 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10219 : else
10220 : {
10221 1123 : RTVEC_ELT (v, vi++) = ret_rtx;
10222 1123 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10223 1123 : if (frame_pointer_needed)
10224 : {
10225 971 : rtx rbp = gen_rtx_REG (DImode, BP_REG);
10226 971 : gcc_assert (m->fs.fp_valid);
10227 971 : gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
10228 :
10229 971 : tmp = plus_constant (DImode, rbp, 8);
10230 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
10231 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
10232 971 : tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10233 971 : RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
10234 : }
10235 : else
10236 : {
10237 : /* If no hard frame pointer, we set R10 to the SP restore value. */
10238 152 : gcc_assert (!m->fs.fp_valid);
10239 152 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10240 152 : gcc_assert (m->fs.sp_valid);
10241 :
10242 152 : r10 = gen_rtx_REG (DImode, R10_REG);
10243 152 : tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
10244 152 : emit_insn (gen_rtx_SET (r10, tmp));
10245 :
10246 152 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
10247 : }
10248 : }
10249 :
10250 : /* Generate frame load insns and restore notes. */
10251 107954 : for (i = 0; i < ncregs; ++i)
10252 : {
10253 100333 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
10254 100333 : machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
10255 100333 : rtx reg, frame_load;
10256 :
10257 100333 : reg = gen_rtx_REG (mode, r.regno);
10258 100333 : frame_load = gen_frame_load (reg, rsi, r.offset);
10259 :
10260 : /* Save RSI frame load insn & note to add last. */
10261 100333 : if (r.regno == SI_REG)
10262 : {
10263 7621 : gcc_assert (!rsi_frame_load);
10264 7621 : rsi_frame_load = frame_load;
10265 7621 : rsi_restore_offset = r.offset;
10266 : }
10267 : else
10268 : {
10269 92712 : RTVEC_ELT (v, vi++) = frame_load;
10270 92712 : ix86_add_cfa_restore_note (NULL, reg, r.offset);
10271 : }
10272 : }
10273 :
10274 : /* Add RSI frame load & restore note at the end. */
10275 7621 : gcc_assert (rsi_frame_load);
10276 7621 : gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
10277 7621 : RTVEC_ELT (v, vi++) = rsi_frame_load;
10278 7621 : ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
10279 : rsi_restore_offset);
10280 :
10281 : /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
10282 7621 : if (!use_call && !frame_pointer_needed)
10283 : {
10284 152 : gcc_assert (m->fs.sp_valid);
10285 152 : gcc_assert (!m->fs.sp_realigned);
10286 :
10287 : /* At this point, R10 should point to frame.stack_realign_offset. */
10288 152 : if (m->fs.cfa_reg == stack_pointer_rtx)
10289 152 : m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
10290 152 : m->fs.sp_offset = frame.stack_realign_offset;
10291 : }
10292 :
10293 7621 : gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
10294 7621 : tmp = gen_rtx_PARALLEL (VOIDmode, v);
10295 7621 : if (use_call)
10296 6498 : insn = emit_insn (tmp);
10297 : else
10298 : {
10299 1123 : insn = emit_jump_insn (tmp);
10300 1123 : JUMP_LABEL (insn) = ret_rtx;
10301 :
10302 1123 : if (frame_pointer_needed)
10303 971 : ix86_emit_leave (insn);
10304 : else
10305 : {
10306 : /* Need CFA adjust note. */
10307 152 : tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10308 152 : add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10309 : }
10310 : }
10311 :
10312 7621 : RTX_FRAME_RELATED_P (insn) = true;
10313 7621 : ix86_add_queued_cfa_restore_notes (insn);
10314 :
10315 : /* If we're not doing a tail-call, we need to adjust the stack. */
10316 7621 : if (use_call && m->fs.sp_valid)
10317 : {
10318 3706 : HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10319 3706 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10320 : GEN_INT (dealloc), style,
10321 3706 : m->fs.cfa_reg == stack_pointer_rtx);
10322 : }
10323 7621 : }
10324 :
10325 : /* Restore function stack, frame, and registers. */
10326 :
10327 : void
10328 1654442 : ix86_expand_epilogue (int style)
10329 : {
10330 1654442 : struct machine_function *m = cfun->machine;
10331 1654442 : struct machine_frame_state frame_state_save = m->fs;
10332 1654442 : bool restore_regs_via_mov;
10333 1654442 : bool using_drap;
10334 1654442 : bool restore_stub_is_tail = false;
10335 :
10336 1654442 : if (ix86_function_naked (current_function_decl))
10337 : {
10338 : /* The program should not reach this point. */
10339 74 : emit_insn (gen_ud2 ());
10340 122105 : return;
10341 : }
10342 :
10343 1654368 : ix86_finalize_stack_frame_flags ();
10344 1654368 : const struct ix86_frame &frame = cfun->machine->frame;
10345 :
10346 1654368 : m->fs.sp_realigned = stack_realign_fp;
10347 31971 : m->fs.sp_valid = stack_realign_fp
10348 1629572 : || !frame_pointer_needed
10349 2122780 : || crtl->sp_is_unchanging;
10350 1654368 : gcc_assert (!m->fs.sp_valid
10351 : || m->fs.sp_offset == frame.stack_pointer_offset);
10352 :
10353 : /* The FP must be valid if the frame pointer is present. */
10354 1654368 : gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10355 1654368 : gcc_assert (!m->fs.fp_valid
10356 : || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10357 :
10358 : /* We must have *some* valid pointer to the stack frame. */
10359 1654368 : gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10360 :
10361 : /* The DRAP is never valid at this point. */
10362 1654368 : gcc_assert (!m->fs.drap_valid);
10363 :
10364 : /* See the comment about red zone and frame
10365 : pointer usage in ix86_expand_prologue. */
10366 1654368 : if (frame_pointer_needed && frame.red_zone_size)
10367 132672 : emit_insn (gen_memory_blockage ());
10368 :
10369 1654368 : using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10370 7175 : gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10371 :
10372 : /* Determine the CFA offset of the end of the red-zone. */
10373 1654368 : m->fs.red_zone_offset = 0;
10374 1654368 : if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10375 : {
10376 : /* The red-zone begins below return address and error code in
10377 : exception handler. */
10378 1476908 : m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10379 :
10380 : /* When the register save area is in the aligned portion of
10381 : the stack, determine the maximum runtime displacement that
10382 : matches up with the aligned frame. */
10383 1476908 : if (stack_realign_drap)
10384 8620 : m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10385 4310 : + UNITS_PER_WORD);
10386 : }
10387 :
10388 1654368 : HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10389 :
10390 : /* Special care must be taken for the normal return case of a function
10391 : using eh_return: the eax and edx registers are marked as saved, but
10392 : not restored along this path. Adjust the save location to match. */
10393 1654368 : if (crtl->calls_eh_return && style != 2)
10394 37 : reg_save_offset -= 2 * UNITS_PER_WORD;
10395 :
10396 : /* EH_RETURN requires the use of moves to function properly. */
10397 1654368 : if (crtl->calls_eh_return)
10398 : restore_regs_via_mov = true;
10399 : /* SEH requires the use of pops to identify the epilogue. */
10400 1654310 : else if (TARGET_SEH)
10401 : restore_regs_via_mov = false;
10402 : /* If we already save reg with pushp, don't use move at epilogue. */
10403 1654310 : else if (m->fs.apx_ppx_used)
10404 : restore_regs_via_mov = false;
10405 : /* If we're only restoring one register and sp cannot be used then
10406 : using a move instruction to restore the register since it's
10407 : less work than reloading sp and popping the register. */
10408 1654223 : else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
10409 : restore_regs_via_mov = true;
10410 1593138 : else if (crtl->shrink_wrapped_separate
10411 1541636 : || (TARGET_EPILOGUE_USING_MOVE
10412 56735 : && cfun->machine->use_fast_prologue_epilogue
10413 56679 : && (frame.nregs > 1
10414 56666 : || m->fs.sp_offset != reg_save_offset)))
10415 : restore_regs_via_mov = true;
10416 1541401 : else if (frame_pointer_needed
10417 429514 : && !frame.nregs
10418 333251 : && m->fs.sp_offset != reg_save_offset)
10419 : restore_regs_via_mov = true;
10420 1388062 : else if (frame_pointer_needed
10421 276175 : && TARGET_USE_LEAVE
10422 276100 : && cfun->machine->use_fast_prologue_epilogue
10423 218824 : && frame.nregs == 1)
10424 : restore_regs_via_mov = true;
10425 : else
10426 1654368 : restore_regs_via_mov = false;
10427 :
10428 1654368 : if (crtl->shrink_wrapped_separate)
10429 51533 : gcc_assert (restore_regs_via_mov);
10430 :
10431 1602835 : if (restore_regs_via_mov || frame.nsseregs)
10432 : {
10433 : /* Ensure that the entire register save area is addressable via
10434 : the stack pointer, if we will restore SSE regs via sp. */
10435 330371 : if (TARGET_64BIT
10436 317771 : && m->fs.sp_offset > 0x7fffffff
10437 23 : && sp_valid_at (frame.stack_realign_offset + 1)
10438 330393 : && (frame.nsseregs + frame.nregs) != 0)
10439 : {
10440 6 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10441 6 : GEN_INT (m->fs.sp_offset
10442 : - frame.sse_reg_save_offset),
10443 : style,
10444 6 : m->fs.cfa_reg == stack_pointer_rtx);
10445 : }
10446 : }
10447 :
10448 : /* If there are any SSE registers to restore, then we have to do it
10449 : via moves, since there's obviously no pop for SSE regs. */
10450 1654368 : if (frame.nsseregs)
10451 33939 : ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10452 : style == 2);
10453 :
10454 1654368 : if (m->call_ms2sysv)
10455 : {
10456 7621 : int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10457 :
10458 : /* We cannot use a tail-call for the stub if:
10459 : 1. We have to pop incoming args,
10460 : 2. We have additional int regs to restore, or
10461 : 3. A sibling call will be the tail-call, or
10462 : 4. We are emitting an eh_return_internal epilogue.
10463 :
10464 : TODO: Item 4 has not yet tested!
10465 :
10466 : If any of the above are true, we will call the stub rather than
10467 : jump to it. */
10468 7621 : restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10469 7621 : ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
10470 : }
10471 :
10472 : /* If using out-of-line stub that is a tail-call, then...*/
10473 1654368 : if (m->call_ms2sysv && restore_stub_is_tail)
10474 : {
10475 : /* TODO: parinoid tests. (remove eventually) */
10476 1123 : gcc_assert (m->fs.sp_valid);
10477 1123 : gcc_assert (!m->fs.sp_realigned);
10478 1123 : gcc_assert (!m->fs.fp_valid);
10479 1123 : gcc_assert (!m->fs.realigned);
10480 1123 : gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10481 1123 : gcc_assert (!crtl->drap_reg);
10482 1123 : gcc_assert (!frame.nregs);
10483 1123 : gcc_assert (!crtl->shrink_wrapped_separate);
10484 : }
10485 1653245 : else if (restore_regs_via_mov)
10486 : {
10487 295663 : rtx t;
10488 :
10489 295663 : if (frame.nregs)
10490 96126 : ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
10491 :
10492 : /* eh_return epilogues need %ecx added to the stack pointer. */
10493 295663 : if (style == 2)
10494 : {
10495 37 : rtx sa = EH_RETURN_STACKADJ_RTX;
10496 29 : rtx_insn *insn;
10497 :
10498 29 : gcc_assert (!crtl->shrink_wrapped_separate);
10499 :
10500 : /* Stack realignment doesn't work with eh_return. */
10501 29 : if (crtl->stack_realign_needed)
10502 0 : sorry ("Stack realignment not supported with "
10503 : "%<__builtin_eh_return%>");
10504 :
10505 : /* regparm nested functions don't work with eh_return. */
10506 29 : if (ix86_static_chain_on_stack)
10507 0 : sorry ("regparm nested function not supported with "
10508 : "%<__builtin_eh_return%>");
10509 :
10510 29 : if (frame_pointer_needed)
10511 : {
10512 35 : t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10513 43 : t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10514 27 : emit_insn (gen_rtx_SET (sa, t));
10515 :
10516 : /* NB: eh_return epilogues must restore the frame pointer
10517 : in word_mode since the upper 32 bits of RBP register
10518 : can have any values. */
10519 27 : t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10520 27 : rtx frame_reg = gen_rtx_REG (word_mode,
10521 : HARD_FRAME_POINTER_REGNUM);
10522 27 : insn = emit_move_insn (frame_reg, t);
10523 :
10524 : /* Note that we use SA as a temporary CFA, as the return
10525 : address is at the proper place relative to it. We
10526 : pretend this happens at the FP restore insn because
10527 : prior to this insn the FP would be stored at the wrong
10528 : offset relative to SA, and after this insn we have no
10529 : other reasonable register to use for the CFA. We don't
10530 : bother resetting the CFA to the SP for the duration of
10531 : the return insn, unless the control flow instrumentation
10532 : is done. In this case the SP is used later and we have
10533 : to reset CFA to SP. */
10534 27 : add_reg_note (insn, REG_CFA_DEF_CFA,
10535 35 : plus_constant (Pmode, sa, UNITS_PER_WORD));
10536 27 : ix86_add_queued_cfa_restore_notes (insn);
10537 27 : add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10538 27 : RTX_FRAME_RELATED_P (insn) = 1;
10539 :
10540 27 : m->fs.cfa_reg = sa;
10541 27 : m->fs.cfa_offset = UNITS_PER_WORD;
10542 27 : m->fs.fp_valid = false;
10543 :
10544 27 : pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10545 : const0_rtx, style,
10546 27 : flag_cf_protection);
10547 : }
10548 : else
10549 : {
10550 2 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10551 2 : t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10552 2 : insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10553 2 : ix86_add_queued_cfa_restore_notes (insn);
10554 :
10555 2 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10556 2 : if (m->fs.cfa_offset != UNITS_PER_WORD)
10557 : {
10558 2 : m->fs.cfa_offset = UNITS_PER_WORD;
10559 2 : add_reg_note (insn, REG_CFA_DEF_CFA,
10560 2 : plus_constant (Pmode, stack_pointer_rtx,
10561 2 : UNITS_PER_WORD));
10562 2 : RTX_FRAME_RELATED_P (insn) = 1;
10563 : }
10564 : }
10565 29 : m->fs.sp_offset = UNITS_PER_WORD;
10566 29 : m->fs.sp_valid = true;
10567 29 : m->fs.sp_realigned = false;
10568 : }
10569 : }
10570 : else
10571 : {
10572 : /* SEH requires that the function end with (1) a stack adjustment
10573 : if necessary, (2) a sequence of pops, and (3) a return or
10574 : jump instruction. Prevent insns from the function body from
10575 : being scheduled into this sequence. */
10576 1357582 : if (TARGET_SEH)
10577 : {
10578 : /* Prevent a catch region from being adjacent to the standard
10579 : epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10580 : nor several other flags that would be interesting to test are
10581 : set up yet. */
10582 : if (flag_non_call_exceptions)
10583 : emit_insn (gen_nops (const1_rtx));
10584 : else
10585 : emit_insn (gen_blockage ());
10586 : }
10587 :
10588 : /* First step is to deallocate the stack frame so that we can
10589 : pop the registers. If the stack pointer was realigned, it needs
10590 : to be restored now. Also do it on SEH target for very large
10591 : frame as the emitted instructions aren't allowed by the ABI
10592 : in epilogues. */
10593 1357582 : if (!m->fs.sp_valid || m->fs.sp_realigned
10594 : || (TARGET_SEH
10595 : && (m->fs.sp_offset - reg_save_offset
10596 : >= SEH_MAX_FRAME_SIZE)))
10597 : {
10598 29805 : pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10599 29805 : GEN_INT (m->fs.fp_offset
10600 : - reg_save_offset),
10601 : style, false);
10602 : }
10603 1327777 : else if (m->fs.sp_offset != reg_save_offset)
10604 : {
10605 612145 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10606 : GEN_INT (m->fs.sp_offset
10607 : - reg_save_offset),
10608 : style,
10609 612145 : m->fs.cfa_reg == stack_pointer_rtx);
10610 : }
10611 :
10612 1357582 : if (TARGET_APX_PUSH2POP2
10613 566 : && ix86_can_use_push2pop2 ()
10614 1358146 : && m->func_type == TYPE_NORMAL)
10615 563 : ix86_emit_restore_regs_using_pop2 ();
10616 : else
10617 1357019 : ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10618 : }
10619 :
10620 : /* If we used a stack pointer and haven't already got rid of it,
10621 : then do so now. */
10622 1654368 : if (m->fs.fp_valid)
10623 : {
10624 : /* If the stack pointer is valid and pointing at the frame
10625 : pointer store address, then we only need a pop. */
10626 492210 : if (sp_valid_at (frame.hfp_save_offset)
10627 492210 : && m->fs.sp_offset == frame.hfp_save_offset)
10628 245844 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10629 : /* Leave results in shorter dependency chains on CPUs that are
10630 : able to grok it fast. */
10631 246366 : else if (TARGET_USE_LEAVE
10632 12 : || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10633 246378 : || !cfun->machine->use_fast_prologue_epilogue)
10634 246354 : ix86_emit_leave (NULL);
10635 : else
10636 : {
10637 12 : pro_epilogue_adjust_stack (stack_pointer_rtx,
10638 : hard_frame_pointer_rtx,
10639 12 : const0_rtx, style, !using_drap);
10640 12 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10641 : }
10642 : }
10643 :
10644 1654368 : if (using_drap)
10645 : {
10646 7175 : int param_ptr_offset = UNITS_PER_WORD;
10647 7175 : rtx_insn *insn;
10648 :
10649 7175 : gcc_assert (stack_realign_drap);
10650 :
10651 7175 : if (ix86_static_chain_on_stack)
10652 0 : param_ptr_offset += UNITS_PER_WORD;
10653 7175 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10654 232 : param_ptr_offset += UNITS_PER_WORD;
10655 :
10656 7480 : insn = emit_insn (gen_rtx_SET
10657 : (stack_pointer_rtx,
10658 : plus_constant (Pmode, crtl->drap_reg,
10659 : -param_ptr_offset)));
10660 7175 : m->fs.cfa_reg = stack_pointer_rtx;
10661 7175 : m->fs.cfa_offset = param_ptr_offset;
10662 7175 : m->fs.sp_offset = param_ptr_offset;
10663 7175 : m->fs.realigned = false;
10664 :
10665 7480 : add_reg_note (insn, REG_CFA_DEF_CFA,
10666 7175 : plus_constant (Pmode, stack_pointer_rtx,
10667 7175 : param_ptr_offset));
10668 7175 : RTX_FRAME_RELATED_P (insn) = 1;
10669 :
10670 7175 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10671 232 : ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10672 : }
10673 :
10674 : /* At this point the stack pointer must be valid, and we must have
10675 : restored all of the registers. We may not have deallocated the
10676 : entire stack frame. We've delayed this until now because it may
10677 : be possible to merge the local stack deallocation with the
10678 : deallocation forced by ix86_static_chain_on_stack. */
10679 1654368 : gcc_assert (m->fs.sp_valid);
10680 1654368 : gcc_assert (!m->fs.sp_realigned);
10681 1654368 : gcc_assert (!m->fs.fp_valid);
10682 1654368 : gcc_assert (!m->fs.realigned);
10683 1790076 : if (m->fs.sp_offset != UNITS_PER_WORD)
10684 : {
10685 49254 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10686 : GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10687 : style, true);
10688 : }
10689 : else
10690 1605114 : ix86_add_queued_cfa_restore_notes (get_last_insn ());
10691 :
10692 : /* Sibcall epilogues don't want a return instruction. */
10693 1654368 : if (style == 0)
10694 : {
10695 121957 : m->fs = frame_state_save;
10696 121957 : return;
10697 : }
10698 :
10699 1532411 : if (cfun->machine->func_type != TYPE_NORMAL)
10700 120 : emit_jump_insn (gen_interrupt_return ());
10701 1532291 : else if (crtl->args.pops_args && crtl->args.size)
10702 : {
10703 25992 : rtx popc = GEN_INT (crtl->args.pops_args);
10704 :
10705 : /* i386 can only pop 64K bytes. If asked to pop more, pop return
10706 : address, do explicit add, and jump indirectly to the caller. */
10707 :
10708 25992 : if (crtl->args.pops_args >= 65536)
10709 : {
10710 0 : rtx ecx = gen_rtx_REG (SImode, CX_REG);
10711 0 : rtx_insn *insn;
10712 :
10713 : /* There is no "pascal" calling convention in any 64bit ABI. */
10714 0 : gcc_assert (!TARGET_64BIT);
10715 :
10716 0 : insn = emit_insn (gen_pop (ecx));
10717 0 : m->fs.cfa_offset -= UNITS_PER_WORD;
10718 0 : m->fs.sp_offset -= UNITS_PER_WORD;
10719 :
10720 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10721 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
10722 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10723 0 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10724 0 : RTX_FRAME_RELATED_P (insn) = 1;
10725 :
10726 0 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10727 : popc, -1, true);
10728 0 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10729 : }
10730 : else
10731 25992 : emit_jump_insn (gen_simple_return_pop_internal (popc));
10732 : }
10733 1506299 : else if (!m->call_ms2sysv || !restore_stub_is_tail)
10734 : {
10735 : /* In case of return from EH a simple return cannot be used
10736 : as a return address will be compared with a shadow stack
10737 : return address. Use indirect jump instead. */
10738 1505176 : if (style == 2 && flag_cf_protection)
10739 : {
10740 : /* Register used in indirect jump must be in word_mode. But
10741 : Pmode may not be the same as word_mode for x32. */
10742 17 : rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10743 17 : rtx_insn *insn;
10744 :
10745 17 : insn = emit_insn (gen_pop (ecx));
10746 17 : m->fs.cfa_offset -= UNITS_PER_WORD;
10747 17 : m->fs.sp_offset -= UNITS_PER_WORD;
10748 :
10749 33 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10750 17 : x = gen_rtx_SET (stack_pointer_rtx, x);
10751 17 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10752 17 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10753 17 : RTX_FRAME_RELATED_P (insn) = 1;
10754 :
10755 17 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10756 17 : }
10757 : else
10758 1505159 : emit_jump_insn (gen_simple_return_internal ());
10759 : }
10760 :
10761 : /* Restore the state back to the state from the prologue,
10762 : so that it's correct for the next epilogue. */
10763 1532411 : m->fs = frame_state_save;
10764 : }
10765 :
10766 : /* Reset from the function's potential modifications. */
10767 :
10768 : static void
10769 1493745 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10770 : {
10771 1493745 : if (pic_offset_table_rtx
10772 1493745 : && !ix86_use_pseudo_pic_reg ())
10773 0 : SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10774 :
10775 1493745 : if (TARGET_MACHO)
10776 : {
10777 : rtx_insn *insn = get_last_insn ();
10778 : rtx_insn *deleted_debug_label = NULL;
10779 :
10780 : /* Mach-O doesn't support labels at the end of objects, so if
10781 : it looks like we might want one, take special action.
10782 : First, collect any sequence of deleted debug labels. */
10783 : while (insn
10784 : && NOTE_P (insn)
10785 : && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10786 : {
10787 : /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10788 : notes only, instead set their CODE_LABEL_NUMBER to -1,
10789 : otherwise there would be code generation differences
10790 : in between -g and -g0. */
10791 : if (NOTE_P (insn) && NOTE_KIND (insn)
10792 : == NOTE_INSN_DELETED_DEBUG_LABEL)
10793 : deleted_debug_label = insn;
10794 : insn = PREV_INSN (insn);
10795 : }
10796 :
10797 : /* If we have:
10798 : label:
10799 : barrier
10800 : then this needs to be detected, so skip past the barrier. */
10801 :
10802 : if (insn && BARRIER_P (insn))
10803 : insn = PREV_INSN (insn);
10804 :
10805 : /* Up to now we've only seen notes or barriers. */
10806 : if (insn)
10807 : {
10808 : if (LABEL_P (insn)
10809 : || (NOTE_P (insn)
10810 : && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10811 : /* Trailing label. */
10812 : fputs ("\tnop\n", file);
10813 : else if (cfun && ! cfun->is_thunk)
10814 : {
10815 : /* See if we have a completely empty function body, skipping
10816 : the special case of the picbase thunk emitted as asm. */
10817 : while (insn && ! INSN_P (insn))
10818 : insn = PREV_INSN (insn);
10819 : /* If we don't find any insns, we've got an empty function body;
10820 : I.e. completely empty - without a return or branch. This is
10821 : taken as the case where a function body has been removed
10822 : because it contains an inline __builtin_unreachable(). GCC
10823 : declares that reaching __builtin_unreachable() means UB so
10824 : we're not obliged to do anything special; however, we want
10825 : non-zero-sized function bodies. To meet this, and help the
10826 : user out, let's trap the case. */
10827 : if (insn == NULL)
10828 : fputs ("\tud2\n", file);
10829 : }
10830 : }
10831 : else if (deleted_debug_label)
10832 : for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10833 : if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10834 : CODE_LABEL_NUMBER (insn) = -1;
10835 : }
10836 1493745 : }
10837 :
10838 : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10839 :
10840 : void
10841 59 : ix86_print_patchable_function_entry (FILE *file,
10842 : unsigned HOST_WIDE_INT patch_area_size,
10843 : bool record_p)
10844 : {
10845 59 : if (cfun->machine->function_label_emitted)
10846 : {
10847 : /* NB: When ix86_print_patchable_function_entry is called after
10848 : function table has been emitted, we have inserted or queued
10849 : a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10850 : place. There is nothing to do here. */
10851 : return;
10852 : }
10853 :
10854 8 : default_print_patchable_function_entry (file, patch_area_size,
10855 : record_p);
10856 : }
10857 :
10858 : /* Output patchable area. NB: default_print_patchable_function_entry
10859 : isn't available in i386.md. */
10860 :
10861 : void
10862 51 : ix86_output_patchable_area (unsigned int patch_area_size,
10863 : bool record_p)
10864 : {
10865 51 : default_print_patchable_function_entry (asm_out_file,
10866 : patch_area_size,
10867 : record_p);
10868 51 : }
10869 :
10870 : /* Return a scratch register to use in the split stack prologue. The
10871 : split stack prologue is used for -fsplit-stack. It is the first
10872 : instructions in the function, even before the regular prologue.
10873 : The scratch register can be any caller-saved register which is not
10874 : used for parameters or for the static chain. */
10875 :
10876 : static unsigned int
10877 24613 : split_stack_prologue_scratch_regno (void)
10878 : {
10879 24613 : if (TARGET_64BIT)
10880 : return R11_REG;
10881 : else
10882 : {
10883 6946 : bool is_fastcall, is_thiscall;
10884 6946 : int regparm;
10885 :
10886 6946 : is_fastcall = (lookup_attribute ("fastcall",
10887 6946 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10888 : != NULL);
10889 6946 : is_thiscall = (lookup_attribute ("thiscall",
10890 6946 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10891 : != NULL);
10892 6946 : regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10893 :
10894 6946 : if (is_fastcall)
10895 : {
10896 0 : if (DECL_STATIC_CHAIN (cfun->decl))
10897 : {
10898 0 : sorry ("%<-fsplit-stack%> does not support fastcall with "
10899 : "nested function");
10900 0 : return INVALID_REGNUM;
10901 : }
10902 : return AX_REG;
10903 : }
10904 6946 : else if (is_thiscall)
10905 : {
10906 0 : if (!DECL_STATIC_CHAIN (cfun->decl))
10907 : return DX_REG;
10908 0 : return AX_REG;
10909 : }
10910 6946 : else if (regparm < 3)
10911 : {
10912 6946 : if (!DECL_STATIC_CHAIN (cfun->decl))
10913 : return CX_REG;
10914 : else
10915 : {
10916 459 : if (regparm >= 2)
10917 : {
10918 0 : sorry ("%<-fsplit-stack%> does not support 2 register "
10919 : "parameters for a nested function");
10920 0 : return INVALID_REGNUM;
10921 : }
10922 : return DX_REG;
10923 : }
10924 : }
10925 : else
10926 : {
10927 : /* FIXME: We could make this work by pushing a register
10928 : around the addition and comparison. */
10929 0 : sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10930 0 : return INVALID_REGNUM;
10931 : }
10932 : }
10933 : }
10934 :
10935 : /* A SYMBOL_REF for the function which allocates new stackspace for
10936 : -fsplit-stack. */
10937 :
10938 : static GTY(()) rtx split_stack_fn;
10939 :
10940 : /* A SYMBOL_REF for the more stack function when using the large model. */
10941 :
10942 : static GTY(()) rtx split_stack_fn_large;
10943 :
10944 : /* Return location of the stack guard value in the TLS block. */
10945 :
10946 : rtx
10947 260053 : ix86_split_stack_guard (void)
10948 : {
10949 260053 : int offset;
10950 260053 : addr_space_t as = DEFAULT_TLS_SEG_REG;
10951 260053 : rtx r;
10952 :
10953 260053 : gcc_assert (flag_split_stack);
10954 :
10955 : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10956 260053 : offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10957 : #else
10958 : gcc_unreachable ();
10959 : #endif
10960 :
10961 260053 : r = GEN_INT (offset);
10962 358058 : r = gen_const_mem (Pmode, r);
10963 260053 : set_mem_addr_space (r, as);
10964 :
10965 260053 : return r;
10966 : }
10967 :
10968 : /* Handle -fsplit-stack. These are the first instructions in the
10969 : function, even before the regular prologue. */
10970 :
10971 : void
10972 260043 : ix86_expand_split_stack_prologue (void)
10973 : {
10974 260043 : HOST_WIDE_INT allocate;
10975 260043 : unsigned HOST_WIDE_INT args_size;
10976 260043 : rtx_code_label *label;
10977 260043 : rtx limit, current, allocate_rtx, call_fusage;
10978 260043 : rtx_insn *call_insn;
10979 260043 : unsigned int scratch_regno = INVALID_REGNUM;
10980 260043 : rtx scratch_reg = NULL_RTX;
10981 260043 : rtx_code_label *varargs_label = NULL;
10982 260043 : rtx fn;
10983 :
10984 260043 : gcc_assert (flag_split_stack && reload_completed);
10985 :
10986 260043 : ix86_finalize_stack_frame_flags ();
10987 260043 : struct ix86_frame &frame = cfun->machine->frame;
10988 260043 : allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10989 :
10990 : /* This is the label we will branch to if we have enough stack
10991 : space. We expect the basic block reordering pass to reverse this
10992 : branch if optimizing, so that we branch in the unlikely case. */
10993 260043 : label = gen_label_rtx ();
10994 :
10995 : /* We need to compare the stack pointer minus the frame size with
10996 : the stack boundary in the TCB. The stack boundary always gives
10997 : us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10998 : can compare directly. Otherwise we need to do an addition. */
10999 :
11000 260043 : limit = ix86_split_stack_guard ();
11001 :
11002 260043 : if (allocate >= SPLIT_STACK_AVAILABLE
11003 235593 : || flag_force_indirect_call)
11004 : {
11005 24465 : scratch_regno = split_stack_prologue_scratch_regno ();
11006 24465 : if (scratch_regno == INVALID_REGNUM)
11007 0 : return;
11008 : }
11009 :
11010 260043 : if (allocate >= SPLIT_STACK_AVAILABLE)
11011 : {
11012 24450 : rtx offset;
11013 :
11014 : /* We need a scratch register to hold the stack pointer minus
11015 : the required frame size. Since this is the very start of the
11016 : function, the scratch register can be any caller-saved
11017 : register which is not used for parameters. */
11018 24450 : offset = GEN_INT (- allocate);
11019 :
11020 31342 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11021 24450 : if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11022 : {
11023 : /* We don't use gen_add in this case because it will
11024 : want to split to lea, but when not optimizing the insn
11025 : will not be split after this point. */
11026 31342 : emit_insn (gen_rtx_SET (scratch_reg,
11027 : gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11028 : offset)));
11029 : }
11030 : else
11031 : {
11032 0 : emit_move_insn (scratch_reg, offset);
11033 0 : emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
11034 : }
11035 : current = scratch_reg;
11036 : }
11037 : else
11038 235593 : current = stack_pointer_rtx;
11039 :
11040 260043 : ix86_expand_branch (GEU, current, limit, label);
11041 260043 : rtx_insn *jump_insn = get_last_insn ();
11042 260043 : JUMP_LABEL (jump_insn) = label;
11043 :
11044 : /* Mark the jump as very likely to be taken. */
11045 260043 : add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
11046 :
11047 260043 : if (split_stack_fn == NULL_RTX)
11048 : {
11049 5451 : split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11050 4347 : SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
11051 : }
11052 260043 : fn = split_stack_fn;
11053 :
11054 : /* Get more stack space. We pass in the desired stack space and the
11055 : size of the arguments to copy to the new stack. In 32-bit mode
11056 : we push the parameters; __morestack will return on a new stack
11057 : anyhow. In 64-bit mode we pass the parameters in r10 and
11058 : r11. */
11059 260043 : allocate_rtx = GEN_INT (allocate);
11060 260043 : args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
11061 260043 : call_fusage = NULL_RTX;
11062 260043 : rtx pop = NULL_RTX;
11063 260043 : if (TARGET_64BIT)
11064 : {
11065 162038 : rtx reg10, reg11;
11066 :
11067 162038 : reg10 = gen_rtx_REG (DImode, R10_REG);
11068 162038 : reg11 = gen_rtx_REG (DImode, R11_REG);
11069 :
11070 : /* If this function uses a static chain, it will be in %r10.
11071 : Preserve it across the call to __morestack. */
11072 162038 : if (DECL_STATIC_CHAIN (cfun->decl))
11073 : {
11074 7505 : rtx rax;
11075 :
11076 7505 : rax = gen_rtx_REG (word_mode, AX_REG);
11077 7505 : emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11078 7505 : use_reg (&call_fusage, rax);
11079 : }
11080 :
11081 162038 : if (flag_force_indirect_call
11082 162023 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11083 : {
11084 16 : HOST_WIDE_INT argval;
11085 :
11086 16 : if (split_stack_fn_large == NULL_RTX)
11087 : {
11088 7 : split_stack_fn_large
11089 7 : = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11090 7 : SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
11091 : }
11092 :
11093 16 : fn = split_stack_fn_large;
11094 :
11095 16 : if (ix86_cmodel == CM_LARGE_PIC)
11096 : {
11097 3 : rtx_code_label *label;
11098 3 : rtx x;
11099 :
11100 3 : gcc_assert (Pmode == DImode);
11101 :
11102 3 : label = gen_label_rtx ();
11103 3 : emit_label (label);
11104 3 : LABEL_PRESERVE_P (label) = 1;
11105 3 : emit_insn (gen_set_rip_rex64 (reg10, label));
11106 3 : emit_insn (gen_set_got_offset_rex64 (reg11, label));
11107 3 : emit_insn (gen_add2_insn (reg10, reg11));
11108 3 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
11109 3 : x = gen_rtx_CONST (Pmode, x);
11110 3 : emit_move_insn (reg11, x);
11111 3 : x = gen_rtx_PLUS (Pmode, reg10, reg11);
11112 3 : x = gen_const_mem (Pmode, x);
11113 3 : fn = copy_to_suggested_reg (x, reg11, Pmode);
11114 : }
11115 13 : else if (ix86_cmodel == CM_LARGE)
11116 1 : fn = copy_to_suggested_reg (fn, reg11, Pmode);
11117 :
11118 : /* When using the large model we need to load the address
11119 : into a register, and we've run out of registers. So we
11120 : switch to a different calling convention, and we call a
11121 : different function: __morestack_large. We pass the
11122 : argument size in the upper 32 bits of r10 and pass the
11123 : frame size in the lower 32 bits. */
11124 16 : gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
11125 16 : gcc_assert ((args_size & 0xffffffff) == args_size);
11126 :
11127 16 : argval = ((args_size << 16) << 16) + allocate;
11128 16 : emit_move_insn (reg10, GEN_INT (argval));
11129 16 : }
11130 : else
11131 : {
11132 162022 : emit_move_insn (reg10, allocate_rtx);
11133 162022 : emit_move_insn (reg11, GEN_INT (args_size));
11134 162022 : use_reg (&call_fusage, reg11);
11135 : }
11136 :
11137 162038 : use_reg (&call_fusage, reg10);
11138 : }
11139 : else
11140 : {
11141 98005 : if (flag_force_indirect_call && flag_pic)
11142 : {
11143 0 : rtx x;
11144 :
11145 0 : gcc_assert (Pmode == SImode);
11146 :
11147 0 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11148 :
11149 0 : emit_insn (gen_set_got (scratch_reg));
11150 0 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
11151 : UNSPEC_GOT);
11152 0 : x = gen_rtx_CONST (Pmode, x);
11153 0 : x = gen_rtx_PLUS (Pmode, scratch_reg, x);
11154 0 : x = gen_const_mem (Pmode, x);
11155 0 : fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
11156 : }
11157 :
11158 98005 : rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
11159 196010 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
11160 98005 : insn = emit_insn (gen_push (allocate_rtx));
11161 196010 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
11162 196010 : pop = GEN_INT (2 * UNITS_PER_WORD);
11163 : }
11164 :
11165 260043 : if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
11166 : {
11167 12 : scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
11168 :
11169 12 : if (GET_MODE (fn) != word_mode)
11170 0 : fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
11171 :
11172 12 : fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
11173 : }
11174 :
11175 260043 : call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11176 260043 : GEN_INT (UNITS_PER_WORD), constm1_rtx,
11177 : pop, false);
11178 260043 : add_function_usage_to (call_insn, call_fusage);
11179 260043 : if (!TARGET_64BIT)
11180 98005 : add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
11181 : /* Indicate that this function can't jump to non-local gotos. */
11182 260043 : make_reg_eh_region_note_nothrow_nononlocal (call_insn);
11183 :
11184 : /* In order to make call/return prediction work right, we now need
11185 : to execute a return instruction. See
11186 : libgcc/config/i386/morestack.S for the details on how this works.
11187 :
11188 : For flow purposes gcc must not see this as a return
11189 : instruction--we need control flow to continue at the subsequent
11190 : label. Therefore, we use an unspec. */
11191 260043 : gcc_assert (crtl->args.pops_args < 65536);
11192 260043 : rtx_insn *ret_insn
11193 260043 : = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11194 :
11195 260043 : if ((flag_cf_protection & CF_BRANCH))
11196 : {
11197 : /* Insert ENDBR since __morestack will jump back here via indirect
11198 : call. */
11199 21 : rtx cet_eb = gen_nop_endbr ();
11200 21 : emit_insn_after (cet_eb, ret_insn);
11201 : }
11202 :
11203 : /* If we are in 64-bit mode and this function uses a static chain,
11204 : we saved %r10 in %rax before calling _morestack. */
11205 260043 : if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11206 7505 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11207 : gen_rtx_REG (word_mode, AX_REG));
11208 :
11209 : /* If this function calls va_start, we need to store a pointer to
11210 : the arguments on the old stack, because they may not have been
11211 : all copied to the new stack. At this point the old stack can be
11212 : found at the frame pointer value used by __morestack, because
11213 : __morestack has set that up before calling back to us. Here we
11214 : store that pointer in a scratch register, and in
11215 : ix86_expand_prologue we store the scratch register in a stack
11216 : slot. */
11217 260043 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11218 : {
11219 12 : rtx frame_reg;
11220 12 : int words;
11221 :
11222 12 : scratch_regno = split_stack_prologue_scratch_regno ();
11223 16 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11224 16 : frame_reg = gen_rtx_REG (Pmode, BP_REG);
11225 :
11226 : /* 64-bit:
11227 : fp -> old fp value
11228 : return address within this function
11229 : return address of caller of this function
11230 : stack arguments
11231 : So we add three words to get to the stack arguments.
11232 :
11233 : 32-bit:
11234 : fp -> old fp value
11235 : return address within this function
11236 : first argument to __morestack
11237 : second argument to __morestack
11238 : return address of caller of this function
11239 : stack arguments
11240 : So we add five words to get to the stack arguments.
11241 : */
11242 12 : words = TARGET_64BIT ? 3 : 5;
11243 20 : emit_insn (gen_rtx_SET (scratch_reg,
11244 : plus_constant (Pmode, frame_reg,
11245 : words * UNITS_PER_WORD)));
11246 :
11247 12 : varargs_label = gen_label_rtx ();
11248 12 : emit_jump_insn (gen_jump (varargs_label));
11249 12 : JUMP_LABEL (get_last_insn ()) = varargs_label;
11250 :
11251 12 : emit_barrier ();
11252 : }
11253 :
11254 260043 : emit_label (label);
11255 260043 : LABEL_NUSES (label) = 1;
11256 :
11257 : /* If this function calls va_start, we now have to set the scratch
11258 : register for the case where we do not call __morestack. In this
11259 : case we need to set it based on the stack pointer. */
11260 260043 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11261 : {
11262 20 : emit_insn (gen_rtx_SET (scratch_reg,
11263 : plus_constant (Pmode, stack_pointer_rtx,
11264 : UNITS_PER_WORD)));
11265 :
11266 12 : emit_label (varargs_label);
11267 12 : LABEL_NUSES (varargs_label) = 1;
11268 : }
11269 : }
11270 :
11271 : /* We may have to tell the dataflow pass that the split stack prologue
11272 : is initializing a scratch register. */
11273 :
11274 : static void
11275 15893874 : ix86_live_on_entry (bitmap regs)
11276 : {
11277 15893874 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11278 : {
11279 124 : gcc_assert (flag_split_stack);
11280 124 : bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11281 : }
11282 15893874 : }
11283 :
11284 : /* Extract the parts of an RTL expression that is a valid memory address
11285 : for an instruction. Return false if the structure of the address is
11286 : grossly off. */
11287 :
11288 : bool
11289 4328599941 : ix86_decompose_address (rtx addr, struct ix86_address *out)
11290 : {
11291 4328599941 : rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11292 4328599941 : rtx base_reg, index_reg;
11293 4328599941 : HOST_WIDE_INT scale = 1;
11294 4328599941 : rtx scale_rtx = NULL_RTX;
11295 4328599941 : rtx tmp;
11296 4328599941 : addr_space_t seg = ADDR_SPACE_GENERIC;
11297 :
11298 : /* Allow zero-extended SImode addresses,
11299 : they will be emitted with addr32 prefix. */
11300 4328599941 : if (TARGET_64BIT && GET_MODE (addr) == DImode)
11301 : {
11302 2279648289 : if (GET_CODE (addr) == ZERO_EXTEND
11303 2170304 : && GET_MODE (XEXP (addr, 0)) == SImode)
11304 : {
11305 2075099 : addr = XEXP (addr, 0);
11306 2075099 : if (CONST_INT_P (addr))
11307 : return false;
11308 : }
11309 2277573190 : else if (GET_CODE (addr) == AND)
11310 : {
11311 2826049 : rtx mask = XEXP (addr, 1);
11312 2826049 : rtx shift_val;
11313 :
11314 2826049 : if (const_32bit_mask (mask, DImode)
11315 : /* For ASHIFT inside AND, combine will not generate
11316 : canonical zero-extend. Merge mask for AND and shift_count
11317 : to check if it is canonical zero-extend. */
11318 2826049 : || (CONST_INT_P (mask)
11319 1834920 : && GET_CODE (XEXP (addr, 0)) == ASHIFT
11320 142541 : && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11321 139442 : && ((UINTVAL (mask)
11322 139442 : | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11323 : == HOST_WIDE_INT_UC (0xffffffff))))
11324 : {
11325 82855 : addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11326 82855 : if (addr == NULL_RTX)
11327 : return false;
11328 :
11329 82855 : if (CONST_INT_P (addr))
11330 : return false;
11331 : }
11332 : }
11333 : }
11334 :
11335 : /* Allow SImode subregs of DImode addresses,
11336 : they will be emitted with addr32 prefix. */
11337 4328599941 : if (TARGET_64BIT && GET_MODE (addr) == SImode)
11338 : {
11339 17222166 : if (SUBREG_P (addr)
11340 217253 : && GET_MODE (SUBREG_REG (addr)) == DImode)
11341 : {
11342 190276 : addr = SUBREG_REG (addr);
11343 190276 : if (CONST_INT_P (addr))
11344 : return false;
11345 : }
11346 : }
11347 :
11348 4328599941 : if (REG_P (addr))
11349 : base = addr;
11350 : else if (SUBREG_P (addr))
11351 : {
11352 458417 : if (REG_P (SUBREG_REG (addr)))
11353 : base = addr;
11354 : else
11355 : return false;
11356 : }
11357 : else if (GET_CODE (addr) == PLUS)
11358 : {
11359 : rtx addends[4], op;
11360 : int n = 0, i;
11361 :
11362 : op = addr;
11363 3164744291 : do
11364 : {
11365 3164744291 : if (n >= 4)
11366 643762767 : return false;
11367 3164738976 : addends[n++] = XEXP (op, 1);
11368 3164738976 : op = XEXP (op, 0);
11369 : }
11370 3164738976 : while (GET_CODE (op) == PLUS);
11371 3101459674 : if (n >= 4)
11372 : return false;
11373 3101452928 : addends[n] = op;
11374 :
11375 8083215547 : for (i = n; i >= 0; --i)
11376 : {
11377 5625513325 : op = addends[i];
11378 5625513325 : switch (GET_CODE (op))
11379 : {
11380 61215979 : case MULT:
11381 61215979 : if (index)
11382 : return false;
11383 61176425 : index = XEXP (op, 0);
11384 61176425 : scale_rtx = XEXP (op, 1);
11385 61176425 : break;
11386 :
11387 12696505 : case ASHIFT:
11388 12696505 : if (index)
11389 : return false;
11390 12623681 : index = XEXP (op, 0);
11391 12623681 : tmp = XEXP (op, 1);
11392 12623681 : if (!CONST_INT_P (tmp))
11393 : return false;
11394 12609068 : scale = INTVAL (tmp);
11395 12609068 : if ((unsigned HOST_WIDE_INT) scale > 3)
11396 : return false;
11397 12201582 : scale = 1 << scale;
11398 12201582 : break;
11399 :
11400 1013003 : case ZERO_EXTEND:
11401 1013003 : op = XEXP (op, 0);
11402 1013003 : if (GET_CODE (op) != UNSPEC)
11403 : return false;
11404 : /* FALLTHRU */
11405 :
11406 698267 : case UNSPEC:
11407 698267 : if (XINT (op, 1) == UNSPEC_TP
11408 689947 : && TARGET_TLS_DIRECT_SEG_REFS
11409 689947 : && seg == ADDR_SPACE_GENERIC)
11410 689947 : seg = DEFAULT_TLS_SEG_REG;
11411 : else
11412 : return false;
11413 : break;
11414 :
11415 505472 : case SUBREG:
11416 505472 : if (!REG_P (SUBREG_REG (op)))
11417 : return false;
11418 : /* FALLTHRU */
11419 :
11420 2528164786 : case REG:
11421 2528164786 : if (!base)
11422 : base = op;
11423 82216450 : else if (!index)
11424 : index = op;
11425 : else
11426 : return false;
11427 : break;
11428 :
11429 2380348892 : case CONST:
11430 2380348892 : case CONST_INT:
11431 2380348892 : case SYMBOL_REF:
11432 2380348892 : case LABEL_REF:
11433 2380348892 : if (disp)
11434 : return false;
11435 : disp = op;
11436 : break;
11437 :
11438 : default:
11439 : return false;
11440 : }
11441 : }
11442 : }
11443 : else if (GET_CODE (addr) == MULT)
11444 : {
11445 3719784 : index = XEXP (addr, 0); /* index*scale */
11446 3719784 : scale_rtx = XEXP (addr, 1);
11447 : }
11448 : else if (GET_CODE (addr) == ASHIFT)
11449 : {
11450 : /* We're called for lea too, which implements ashift on occasion. */
11451 3238325 : index = XEXP (addr, 0);
11452 3238325 : tmp = XEXP (addr, 1);
11453 3238325 : if (!CONST_INT_P (tmp))
11454 : return false;
11455 2852278 : scale = INTVAL (tmp);
11456 2852278 : if ((unsigned HOST_WIDE_INT) scale > 3)
11457 : return false;
11458 2115022 : scale = 1 << scale;
11459 : }
11460 : else
11461 : disp = addr; /* displacement */
11462 :
11463 2463537028 : if (index)
11464 : {
11465 151844924 : if (REG_P (index))
11466 : ;
11467 4040348 : else if (SUBREG_P (index)
11468 257830 : && REG_P (SUBREG_REG (index)))
11469 : ;
11470 : else
11471 : return false;
11472 : }
11473 :
11474 : /* Extract the integral value of scale. */
11475 3679865220 : if (scale_rtx)
11476 : {
11477 56432837 : if (!CONST_INT_P (scale_rtx))
11478 : return false;
11479 55810074 : scale = INTVAL (scale_rtx);
11480 : }
11481 :
11482 3679242457 : base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11483 3679242457 : index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11484 :
11485 : /* Avoid useless 0 displacement. */
11486 3679242457 : if (disp == const0_rtx && (base || index))
11487 3679242457 : disp = NULL_RTX;
11488 :
11489 : /* Allow arg pointer and stack pointer as index if there is not scaling. */
11490 2685267914 : if (base_reg && index_reg && scale == 1
11491 3759978965 : && (REGNO (index_reg) == ARG_POINTER_REGNUM
11492 : || REGNO (index_reg) == FRAME_POINTER_REGNUM
11493 : || REGNO (index_reg) == SP_REG))
11494 : {
11495 : std::swap (base, index);
11496 : std::swap (base_reg, index_reg);
11497 : }
11498 :
11499 : /* Special case: rewrite index*1+disp into base+disp. */
11500 3679242457 : if (!base && index && scale == 1)
11501 4 : base = index, base_reg = index_reg, index = index_reg = NULL_RTX;
11502 :
11503 : /* Special case: %ebp cannot be encoded as a base without a displacement.
11504 : Similarly %r13. */
11505 323332547 : if (!disp && base_reg
11506 3998253540 : && (REGNO (base_reg) == ARG_POINTER_REGNUM
11507 : || REGNO (base_reg) == FRAME_POINTER_REGNUM
11508 : || REGNO (base_reg) == BP_REG
11509 : || REGNO (base_reg) == R13_REG))
11510 : disp = const0_rtx;
11511 :
11512 : /* Special case: on K6, [%esi] makes the instruction vector decoded.
11513 : Avoid this by transforming to [%esi+0].
11514 : Reload calls address legitimization without cfun defined, so we need
11515 : to test cfun for being non-NULL. */
11516 0 : if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11517 0 : && base_reg && !index_reg && !disp
11518 3679242457 : && REGNO (base_reg) == SI_REG)
11519 0 : disp = const0_rtx;
11520 :
11521 : /* Special case: encode reg+reg instead of reg*2. */
11522 3679242457 : if (!base && index && scale == 2)
11523 993974539 : base = index, base_reg = index_reg, scale = 1;
11524 :
11525 : /* Special case: scaling cannot be encoded without base or displacement. */
11526 993974539 : if (!base && !disp && index && scale != 1)
11527 3437934 : disp = const0_rtx;
11528 :
11529 3679242457 : out->base = base;
11530 3679242457 : out->index = index;
11531 3679242457 : out->disp = disp;
11532 3679242457 : out->scale = scale;
11533 3679242457 : out->seg = seg;
11534 :
11535 3679242457 : return true;
11536 : }
11537 :
11538 : /* Return cost of the memory address x.
11539 : For i386, it is better to use a complex address than let gcc copy
11540 : the address into a reg and make a new pseudo. But not if the address
11541 : requires to two regs - that would mean more pseudos with longer
11542 : lifetimes. */
11543 : static int
11544 11316720 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11545 : {
11546 11316720 : struct ix86_address parts;
11547 11316720 : int cost = 1;
11548 11316720 : int ok = ix86_decompose_address (x, &parts);
11549 :
11550 11316720 : gcc_assert (ok);
11551 :
11552 11316720 : if (parts.base && SUBREG_P (parts.base))
11553 466 : parts.base = SUBREG_REG (parts.base);
11554 11316720 : if (parts.index && SUBREG_P (parts.index))
11555 20 : parts.index = SUBREG_REG (parts.index);
11556 :
11557 : /* Attempt to minimize number of registers in the address by increasing
11558 : address cost for each used register. We don't increase address cost
11559 : for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11560 : is not invariant itself it most likely means that base or index is not
11561 : invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11562 : which is not profitable for x86. */
11563 11316720 : if (parts.base
11564 9910357 : && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11565 20931502 : && (current_pass->type == GIMPLE_PASS
11566 2678072 : || !pic_offset_table_rtx
11567 127287 : || !REG_P (parts.base)
11568 127287 : || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11569 : cost++;
11570 :
11571 11316720 : if (parts.index
11572 5488104 : && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11573 16790854 : && (current_pass->type == GIMPLE_PASS
11574 645653 : || !pic_offset_table_rtx
11575 55572 : || !REG_P (parts.index)
11576 55572 : || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11577 5472831 : cost++;
11578 :
11579 : /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11580 : since it's predecode logic can't detect the length of instructions
11581 : and it degenerates to vector decoded. Increase cost of such
11582 : addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11583 : to split such addresses or even refuse such addresses at all.
11584 :
11585 : Following addressing modes are affected:
11586 : [base+scale*index]
11587 : [scale*index+disp]
11588 : [base+index]
11589 :
11590 : The first and last case may be avoidable by explicitly coding the zero in
11591 : memory address, but I don't have AMD-K6 machine handy to check this
11592 : theory. */
11593 :
11594 11316720 : if (TARGET_CPU_P (K6)
11595 0 : && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11596 0 : || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11597 0 : || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11598 0 : cost += 10;
11599 :
11600 11316720 : return cost;
11601 : }
11602 :
11603 : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
11604 :
11605 : bool
11606 1179688 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
11607 : unsigned int align,
11608 : enum by_pieces_operation op,
11609 : bool speed_p)
11610 : {
11611 : /* Return true when we are currently expanding memcpy/memset epilogue
11612 : with move_by_pieces or store_by_pieces. */
11613 1179688 : if (cfun->machine->by_pieces_in_use)
11614 : return true;
11615 :
11616 1177579 : return default_use_by_pieces_infrastructure_p (size, align, op,
11617 1177579 : speed_p);
11618 : }
11619 :
11620 : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11621 : this is used for to form addresses to local data when -fPIC is in
11622 : use. */
11623 :
11624 : static bool
11625 0 : darwin_local_data_pic (rtx disp)
11626 : {
11627 0 : return (GET_CODE (disp) == UNSPEC
11628 0 : && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11629 : }
11630 :
11631 : /* True if the function symbol operand X should be loaded from GOT.
11632 : If CALL_P is true, X is a call operand.
11633 :
11634 : NB: -mno-direct-extern-access doesn't force load from GOT for
11635 : call.
11636 :
11637 : NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11638 : statements, since a PIC register could not be available at the
11639 : call site. */
11640 :
11641 : bool
11642 1839372862 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
11643 : {
11644 96344826 : return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11645 : && !TARGET_PECOFF && !TARGET_MACHO
11646 1836510826 : && (!flag_pic || this_is_asm_operands)
11647 1816219429 : && ix86_cmodel != CM_LARGE
11648 1816213430 : && ix86_cmodel != CM_LARGE_PIC
11649 1816213429 : && SYMBOL_REF_P (x)
11650 1816213427 : && ((!call_p
11651 1810788549 : && (!ix86_direct_extern_access
11652 1810786279 : || (SYMBOL_REF_DECL (x)
11653 1631693872 : && lookup_attribute ("nodirect_extern_access",
11654 1631693872 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11655 1816210703 : || (SYMBOL_REF_FUNCTION_P (x)
11656 685252434 : && (!flag_plt
11657 685248023 : || (SYMBOL_REF_DECL (x)
11658 685248023 : && lookup_attribute ("noplt",
11659 685248023 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11660 1839380396 : && !SYMBOL_REF_LOCAL_P (x));
11661 : }
11662 :
11663 : /* Determine if a given RTX is a valid constant. We already know this
11664 : satisfies CONSTANT_P. */
11665 :
11666 : static bool
11667 1546395025 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
11668 : {
11669 1546395025 : switch (GET_CODE (x))
11670 : {
11671 135897883 : case CONST:
11672 135897883 : x = XEXP (x, 0);
11673 :
11674 135897883 : if (GET_CODE (x) == PLUS)
11675 : {
11676 135780171 : if (!CONST_INT_P (XEXP (x, 1)))
11677 : return false;
11678 135780171 : x = XEXP (x, 0);
11679 : }
11680 :
11681 135897883 : if (TARGET_MACHO && darwin_local_data_pic (x))
11682 : return true;
11683 :
11684 : /* Only some unspecs are valid as "constants". */
11685 135897883 : if (GET_CODE (x) == UNSPEC)
11686 494976 : switch (XINT (x, 1))
11687 : {
11688 21143 : case UNSPEC_GOT:
11689 21143 : case UNSPEC_GOTOFF:
11690 21143 : case UNSPEC_PLTOFF:
11691 21143 : return TARGET_64BIT;
11692 473470 : case UNSPEC_TPOFF:
11693 473470 : case UNSPEC_NTPOFF:
11694 473470 : x = XVECEXP (x, 0, 0);
11695 473470 : return (SYMBOL_REF_P (x)
11696 473470 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11697 275 : case UNSPEC_DTPOFF:
11698 275 : x = XVECEXP (x, 0, 0);
11699 275 : return (SYMBOL_REF_P (x)
11700 275 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11701 0 : case UNSPEC_SECREL32:
11702 0 : x = XVECEXP (x, 0, 0);
11703 0 : return SYMBOL_REF_P (x);
11704 : default:
11705 : return false;
11706 : }
11707 :
11708 : /* We must have drilled down to a symbol. */
11709 135402907 : if (LABEL_REF_P (x))
11710 : return true;
11711 135397633 : if (!SYMBOL_REF_P (x))
11712 : return false;
11713 : /* FALLTHRU */
11714 :
11715 922245957 : case SYMBOL_REF:
11716 : /* TLS symbols are never valid. */
11717 922245957 : if (SYMBOL_REF_TLS_MODEL (x))
11718 : return false;
11719 :
11720 : /* DLLIMPORT symbols are never valid. */
11721 922141485 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11722 : && SYMBOL_REF_DLLIMPORT_P (x))
11723 : return false;
11724 :
11725 : #if TARGET_MACHO
11726 : /* mdynamic-no-pic */
11727 : if (MACHO_DYNAMIC_NO_PIC_P)
11728 : return machopic_symbol_defined_p (x);
11729 : #endif
11730 :
11731 : /* External function address should be loaded
11732 : via the GOT slot to avoid PLT. */
11733 922141485 : if (ix86_force_load_from_GOT_p (x))
11734 : return false;
11735 :
11736 : break;
11737 :
11738 602684688 : CASE_CONST_SCALAR_INT:
11739 602684688 : if (ix86_endbr_immediate_operand (x, VOIDmode))
11740 : return false;
11741 :
11742 602684487 : switch (mode)
11743 : {
11744 1454356 : case E_TImode:
11745 1454356 : if (TARGET_64BIT)
11746 : return true;
11747 : /* FALLTHRU */
11748 26007 : case E_OImode:
11749 26007 : case E_XImode:
11750 26007 : if (!standard_sse_constant_p (x, mode)
11751 43218 : && GET_MODE_SIZE (TARGET_AVX512F
11752 : ? XImode
11753 : : (TARGET_AVX
11754 : ? OImode
11755 : : (TARGET_SSE2
11756 17211 : ? TImode : DImode))) < GET_MODE_SIZE (mode))
11757 : return false;
11758 : default:
11759 : break;
11760 : }
11761 : break;
11762 :
11763 8637727 : case CONST_VECTOR:
11764 8637727 : if (!standard_sse_constant_p (x, mode))
11765 : return false;
11766 : break;
11767 :
11768 7696204 : case CONST_DOUBLE:
11769 7696204 : if (mode == E_BFmode)
11770 : return false;
11771 :
11772 : default:
11773 : break;
11774 : }
11775 :
11776 : /* Otherwise we handle everything else in the move patterns. */
11777 : return true;
11778 : }
11779 :
11780 : /* Determine if it's legal to put X into the constant pool. This
11781 : is not possible for the address of thread-local symbols, which
11782 : is checked above. */
11783 :
11784 : static bool
11785 61553751 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11786 : {
11787 : /* We can put any immediate constant in memory. */
11788 61553751 : switch (GET_CODE (x))
11789 : {
11790 : CASE_CONST_ANY:
11791 : return false;
11792 :
11793 1797151 : default:
11794 1797151 : break;
11795 : }
11796 :
11797 1797151 : return !ix86_legitimate_constant_p (mode, x);
11798 : }
11799 :
11800 : /* Return a unique alias set for the GOT. */
11801 :
11802 : alias_set_type
11803 189283 : ix86_GOT_alias_set (void)
11804 : {
11805 189283 : static alias_set_type set = -1;
11806 189283 : if (set == -1)
11807 2988 : set = new_alias_set ();
11808 189283 : return set;
11809 : }
11810 :
11811 : /* Nonzero if the constant value X is a legitimate general operand
11812 : when generating PIC code. It is given that flag_pic is on and
11813 : that X satisfies CONSTANT_P. */
11814 :
11815 : bool
11816 126156698 : legitimate_pic_operand_p (rtx x)
11817 : {
11818 126156698 : rtx inner;
11819 :
11820 126156698 : switch (GET_CODE (x))
11821 : {
11822 2505949 : case CONST:
11823 2505949 : inner = XEXP (x, 0);
11824 2505949 : if (GET_CODE (inner) == PLUS
11825 358195 : && CONST_INT_P (XEXP (inner, 1)))
11826 358195 : inner = XEXP (inner, 0);
11827 :
11828 : /* Only some unspecs are valid as "constants". */
11829 2505949 : if (GET_CODE (inner) == UNSPEC)
11830 2255943 : switch (XINT (inner, 1))
11831 : {
11832 2195382 : case UNSPEC_GOT:
11833 2195382 : case UNSPEC_GOTOFF:
11834 2195382 : case UNSPEC_PLTOFF:
11835 2195382 : return TARGET_64BIT;
11836 0 : case UNSPEC_TPOFF:
11837 0 : x = XVECEXP (inner, 0, 0);
11838 0 : return (SYMBOL_REF_P (x)
11839 0 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11840 0 : case UNSPEC_SECREL32:
11841 0 : x = XVECEXP (inner, 0, 0);
11842 0 : return SYMBOL_REF_P (x);
11843 0 : case UNSPEC_MACHOPIC_OFFSET:
11844 0 : return legitimate_pic_address_disp_p (x);
11845 : default:
11846 : return false;
11847 : }
11848 : /* FALLTHRU */
11849 :
11850 6990801 : case SYMBOL_REF:
11851 6990801 : case LABEL_REF:
11852 6990801 : return legitimate_pic_address_disp_p (x);
11853 :
11854 : default:
11855 : return true;
11856 : }
11857 : }
11858 :
11859 : /* Determine if a given CONST RTX is a valid memory displacement
11860 : in PIC mode. */
11861 :
11862 : bool
11863 64749842 : legitimate_pic_address_disp_p (rtx disp)
11864 : {
11865 64749842 : bool saw_plus;
11866 :
11867 : /* In 64bit mode we can allow direct addresses of symbols and labels
11868 : when they are not dynamic symbols. */
11869 64749842 : if (TARGET_64BIT)
11870 : {
11871 39577700 : rtx op0 = disp, op1;
11872 :
11873 39577700 : switch (GET_CODE (disp))
11874 : {
11875 : case LABEL_REF:
11876 : return true;
11877 :
11878 10945329 : case CONST:
11879 10945329 : if (GET_CODE (XEXP (disp, 0)) != PLUS)
11880 : break;
11881 1177172 : op0 = XEXP (XEXP (disp, 0), 0);
11882 1177172 : op1 = XEXP (XEXP (disp, 0), 1);
11883 1177172 : if (!CONST_INT_P (op1))
11884 : break;
11885 1177172 : if (GET_CODE (op0) == UNSPEC
11886 296 : && (XINT (op0, 1) == UNSPEC_DTPOFF
11887 296 : || XINT (op0, 1) == UNSPEC_NTPOFF)
11888 1177468 : && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11889 : return true;
11890 1176876 : if (INTVAL (op1) >= 16*1024*1024
11891 1176876 : || INTVAL (op1) < -16*1024*1024)
11892 : break;
11893 1176788 : if (LABEL_REF_P (op0))
11894 : return true;
11895 1176788 : if (GET_CODE (op0) == CONST
11896 0 : && GET_CODE (XEXP (op0, 0)) == UNSPEC
11897 0 : && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11898 : return true;
11899 1176788 : if (GET_CODE (op0) == UNSPEC
11900 0 : && XINT (op0, 1) == UNSPEC_PCREL)
11901 : return true;
11902 1176788 : if (!SYMBOL_REF_P (op0))
11903 : break;
11904 : /* FALLTHRU */
11905 :
11906 29585603 : case SYMBOL_REF:
11907 : /* TLS references should always be enclosed in UNSPEC.
11908 : The dllimported symbol needs always to be resolved. */
11909 29585603 : if (SYMBOL_REF_TLS_MODEL (op0)
11910 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11911 : return false;
11912 :
11913 29429812 : if (TARGET_PECOFF)
11914 : {
11915 : #if TARGET_PECOFF
11916 : if (is_imported_p (op0))
11917 : return true;
11918 : #endif
11919 :
11920 : if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11921 : break;
11922 :
11923 : /* Non-external-weak function symbols need to be resolved only
11924 : for the large model. Non-external symbols don't need to be
11925 : resolved for large and medium models. For the small model,
11926 : we don't need to resolve anything here. */
11927 : if ((ix86_cmodel != CM_LARGE_PIC
11928 : && SYMBOL_REF_FUNCTION_P (op0)
11929 : && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11930 : || !SYMBOL_REF_EXTERNAL_P (op0)
11931 : || ix86_cmodel == CM_SMALL_PIC)
11932 : return true;
11933 : }
11934 29429812 : else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11935 29429808 : && (SYMBOL_REF_LOCAL_P (op0)
11936 17842659 : || ((ix86_direct_extern_access
11937 35513608 : && !(SYMBOL_REF_DECL (op0)
11938 17671112 : && lookup_attribute ("nodirect_extern_access",
11939 17671112 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11940 : && HAVE_LD_PIE_COPYRELOC
11941 17842333 : && flag_pie
11942 34047 : && !SYMBOL_REF_WEAK (op0)
11943 33659 : && !SYMBOL_REF_FUNCTION_P (op0)))
11944 41020694 : && ix86_cmodel != CM_LARGE_PIC)
11945 : return true;
11946 : break;
11947 :
11948 : default:
11949 : break;
11950 : }
11951 : }
11952 52783192 : if (GET_CODE (disp) != CONST)
11953 : return false;
11954 14966138 : disp = XEXP (disp, 0);
11955 :
11956 14966138 : if (TARGET_64BIT)
11957 : {
11958 : /* We are unsafe to allow PLUS expressions. This limit allowed distance
11959 : of GOT tables. We should not need these anyway. */
11960 9820639 : if (GET_CODE (disp) != UNSPEC
11961 9768157 : || (XINT (disp, 1) != UNSPEC_GOTPCREL
11962 9768157 : && XINT (disp, 1) != UNSPEC_GOTOFF
11963 : && XINT (disp, 1) != UNSPEC_PCREL
11964 : && XINT (disp, 1) != UNSPEC_PLTOFF))
11965 : return false;
11966 :
11967 9768157 : if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11968 9768157 : && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
11969 : return false;
11970 : return true;
11971 : }
11972 :
11973 5145499 : saw_plus = false;
11974 5145499 : if (GET_CODE (disp) == PLUS)
11975 : {
11976 589234 : if (!CONST_INT_P (XEXP (disp, 1)))
11977 : return false;
11978 589234 : disp = XEXP (disp, 0);
11979 589234 : saw_plus = true;
11980 : }
11981 :
11982 5145499 : if (TARGET_MACHO && darwin_local_data_pic (disp))
11983 : return true;
11984 :
11985 5145499 : if (GET_CODE (disp) != UNSPEC)
11986 : return false;
11987 :
11988 4980299 : switch (XINT (disp, 1))
11989 : {
11990 2265997 : case UNSPEC_GOT:
11991 2265997 : if (saw_plus)
11992 : return false;
11993 : /* We need to check for both symbols and labels because VxWorks loads
11994 : text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11995 : details. */
11996 2265996 : return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11997 2265996 : || LABEL_REF_P (XVECEXP (disp, 0, 0)));
11998 2714302 : case UNSPEC_GOTOFF:
11999 : /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12000 : While ABI specify also 32bit relocation but we don't produce it in
12001 : small PIC model at all. */
12002 2714302 : if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
12003 2714302 : || LABEL_REF_P (XVECEXP (disp, 0, 0)))
12004 : && !TARGET_64BIT)
12005 5428604 : return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12006 : return false;
12007 0 : case UNSPEC_GOTTPOFF:
12008 0 : case UNSPEC_GOTNTPOFF:
12009 0 : case UNSPEC_INDNTPOFF:
12010 0 : if (saw_plus)
12011 : return false;
12012 0 : disp = XVECEXP (disp, 0, 0);
12013 0 : return (SYMBOL_REF_P (disp)
12014 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12015 0 : case UNSPEC_NTPOFF:
12016 0 : disp = XVECEXP (disp, 0, 0);
12017 0 : return (SYMBOL_REF_P (disp)
12018 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12019 0 : case UNSPEC_DTPOFF:
12020 0 : disp = XVECEXP (disp, 0, 0);
12021 0 : return (SYMBOL_REF_P (disp)
12022 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12023 0 : case UNSPEC_SECREL32:
12024 0 : disp = XVECEXP (disp, 0, 0);
12025 0 : return SYMBOL_REF_P (disp);
12026 : }
12027 :
12028 : return false;
12029 : }
12030 :
12031 : /* Determine if op is suitable RTX for an address register.
12032 : Return naked register if a register or a register subreg is
12033 : found, otherwise return NULL_RTX. */
12034 :
12035 : static rtx
12036 1373338834 : ix86_validate_address_register (rtx op)
12037 : {
12038 1373338834 : machine_mode mode = GET_MODE (op);
12039 :
12040 : /* Only SImode or DImode registers can form the address. */
12041 1373338834 : if (mode != SImode && mode != DImode)
12042 : return NULL_RTX;
12043 :
12044 1373331923 : if (REG_P (op))
12045 : return op;
12046 694996 : else if (SUBREG_P (op))
12047 : {
12048 694996 : rtx reg = SUBREG_REG (op);
12049 :
12050 694996 : if (!REG_P (reg))
12051 : return NULL_RTX;
12052 :
12053 694996 : mode = GET_MODE (reg);
12054 :
12055 : /* Don't allow SUBREGs that span more than a word. It can
12056 : lead to spill failures when the register is one word out
12057 : of a two word structure. */
12058 1435356 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12059 : return NULL_RTX;
12060 :
12061 : /* Allow only SUBREGs of non-eliminable hard registers. */
12062 234033 : if (register_no_elim_operand (reg, mode))
12063 : return reg;
12064 : }
12065 :
12066 : /* Op is not a register. */
12067 : return NULL_RTX;
12068 : }
12069 :
12070 : /* Determine which memory address register set insn can use. */
12071 :
12072 : static enum attr_addr
12073 253806684 : ix86_memory_address_reg_class (rtx_insn* insn)
12074 : {
12075 : /* LRA can do some initialization with NULL insn,
12076 : return maximum register class in this case. */
12077 253806684 : enum attr_addr addr_rclass = ADDR_GPR32;
12078 :
12079 253806684 : if (!insn)
12080 : return addr_rclass;
12081 :
12082 72377934 : if (asm_noperands (PATTERN (insn)) >= 0
12083 72377934 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)
12084 75424 : return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
12085 :
12086 : /* Return maximum register class for unrecognized instructions. */
12087 72340222 : if (INSN_CODE (insn) < 0)
12088 : return addr_rclass;
12089 :
12090 : /* Try to recognize the insn before calling get_attr_addr.
12091 : Save current recog_data and current alternative. */
12092 72340222 : struct recog_data_d saved_recog_data = recog_data;
12093 72340222 : int saved_alternative = which_alternative;
12094 :
12095 : /* Update recog_data for processing of alternatives. */
12096 72340222 : extract_insn_cached (insn);
12097 :
12098 : /* If current alternative is not set, loop through enabled
12099 : alternatives and get the most limited register class. */
12100 72340222 : if (saved_alternative == -1)
12101 : {
12102 72340222 : alternative_mask enabled = get_enabled_alternatives (insn);
12103 :
12104 1249068023 : for (int i = 0; i < recog_data.n_alternatives; i++)
12105 : {
12106 1176727801 : if (!TEST_BIT (enabled, i))
12107 349115972 : continue;
12108 :
12109 827611829 : which_alternative = i;
12110 827611829 : addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
12111 : }
12112 : }
12113 : else
12114 : {
12115 0 : which_alternative = saved_alternative;
12116 0 : addr_rclass = get_attr_addr (insn);
12117 : }
12118 :
12119 72340222 : recog_data = saved_recog_data;
12120 72340222 : which_alternative = saved_alternative;
12121 :
12122 72340222 : return addr_rclass;
12123 : }
12124 :
12125 : /* Return memory address register class insn can use. */
12126 :
12127 : enum reg_class
12128 213290846 : ix86_insn_base_reg_class (rtx_insn* insn)
12129 : {
12130 213290846 : switch (ix86_memory_address_reg_class (insn))
12131 : {
12132 : case ADDR_GPR8:
12133 : return LEGACY_GENERAL_REGS;
12134 : case ADDR_GPR16:
12135 : return GENERAL_GPR16;
12136 : case ADDR_GPR32:
12137 : break;
12138 0 : default:
12139 0 : gcc_unreachable ();
12140 : }
12141 :
12142 : return BASE_REG_CLASS;
12143 : }
12144 :
12145 : bool
12146 1240506 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
12147 : {
12148 1240506 : switch (ix86_memory_address_reg_class (insn))
12149 : {
12150 0 : case ADDR_GPR8:
12151 0 : return LEGACY_INT_REGNO_P (regno);
12152 0 : case ADDR_GPR16:
12153 0 : return GENERAL_GPR16_REGNO_P (regno);
12154 1240506 : case ADDR_GPR32:
12155 1240506 : break;
12156 0 : default:
12157 0 : gcc_unreachable ();
12158 : }
12159 :
12160 1240506 : return GENERAL_REGNO_P (regno);
12161 : }
12162 :
12163 : enum reg_class
12164 39275332 : ix86_insn_index_reg_class (rtx_insn* insn)
12165 : {
12166 39275332 : switch (ix86_memory_address_reg_class (insn))
12167 : {
12168 : case ADDR_GPR8:
12169 : return LEGACY_INDEX_REGS;
12170 : case ADDR_GPR16:
12171 : return INDEX_GPR16;
12172 : case ADDR_GPR32:
12173 : break;
12174 0 : default:
12175 0 : gcc_unreachable ();
12176 : }
12177 :
12178 : return INDEX_REG_CLASS;
12179 : }
12180 :
12181 : /* Recognizes RTL expressions that are valid memory addresses for an
12182 : instruction. The MODE argument is the machine mode for the MEM
12183 : expression that wants to use this address.
12184 :
12185 : It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12186 : convert common non-canonical forms to canonical form so that they will
12187 : be recognized. */
12188 :
12189 : static bool
12190 2242806018 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
12191 : code_helper = ERROR_MARK)
12192 : {
12193 2242806018 : struct ix86_address parts;
12194 2242806018 : rtx base, index, disp;
12195 2242806018 : HOST_WIDE_INT scale;
12196 2242806018 : addr_space_t seg;
12197 :
12198 2242806018 : if (ix86_decompose_address (addr, &parts) == 0)
12199 : /* Decomposition failed. */
12200 : return false;
12201 :
12202 2231252893 : base = parts.base;
12203 2231252893 : index = parts.index;
12204 2231252893 : disp = parts.disp;
12205 2231252893 : scale = parts.scale;
12206 2231252893 : seg = parts.seg;
12207 :
12208 : /* Validate base register. */
12209 2231252893 : if (base)
12210 : {
12211 1286095528 : rtx reg = ix86_validate_address_register (base);
12212 :
12213 1286095528 : if (reg == NULL_RTX)
12214 : return false;
12215 :
12216 1285664712 : unsigned int regno = REGNO (reg);
12217 1285664712 : if ((strict && !REGNO_OK_FOR_BASE_P (regno))
12218 1281239428 : || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
12219 : /* Base is not valid. */
12220 : return false;
12221 : }
12222 :
12223 : /* Validate index register. */
12224 2229489639 : if (index)
12225 : {
12226 87243306 : rtx reg = ix86_validate_address_register (index);
12227 :
12228 87243306 : if (reg == NULL_RTX)
12229 : return false;
12230 :
12231 87206073 : unsigned int regno = REGNO (reg);
12232 87206073 : if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
12233 87198126 : || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
12234 : /* Index is not valid. */
12235 : return false;
12236 : }
12237 :
12238 : /* Index and base should have the same mode. */
12239 2229450385 : if (base && index
12240 77615623 : && GET_MODE (base) != GET_MODE (index))
12241 : return false;
12242 :
12243 : /* Address override works only on the (%reg) part of %fs:(%reg). */
12244 2229190252 : if (seg != ADDR_SPACE_GENERIC
12245 2229190252 : && ((base && GET_MODE (base) != word_mode)
12246 339890 : || (index && GET_MODE (index) != word_mode)))
12247 : return false;
12248 :
12249 : /* Validate scale factor. */
12250 2229190223 : if (scale != 1)
12251 : {
12252 39989635 : if (!index)
12253 : /* Scale without index. */
12254 : return false;
12255 :
12256 39989635 : if (scale != 2 && scale != 4 && scale != 8)
12257 : /* Scale is not a valid multiplier. */
12258 : return false;
12259 : }
12260 :
12261 : /* Validate displacement. */
12262 2226031536 : if (disp)
12263 : {
12264 1999784338 : if (ix86_endbr_immediate_operand (disp, VOIDmode))
12265 : return false;
12266 :
12267 1999784295 : if (GET_CODE (disp) == CONST
12268 146428852 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
12269 15401655 : && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12270 15401655 : switch (XINT (XEXP (disp, 0), 1))
12271 : {
12272 : /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
12273 : when used. While ABI specify also 32bit relocations, we
12274 : don't produce them at all and use IP relative instead.
12275 : Allow GOT in 32bit mode for both PIC and non-PIC if symbol
12276 : should be loaded via GOT. */
12277 2266055 : case UNSPEC_GOT:
12278 2266055 : if (!TARGET_64BIT
12279 2266055 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12280 0 : goto is_legitimate_pic;
12281 : /* FALLTHRU */
12282 4556468 : case UNSPEC_GOTOFF:
12283 4556468 : gcc_assert (flag_pic);
12284 4556468 : if (!TARGET_64BIT)
12285 4556265 : goto is_legitimate_pic;
12286 :
12287 : /* 64bit address unspec. */
12288 : return false;
12289 :
12290 9768129 : case UNSPEC_GOTPCREL:
12291 9768129 : if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12292 2534 : goto is_legitimate_pic;
12293 : /* FALLTHRU */
12294 9765595 : case UNSPEC_PCREL:
12295 9765595 : gcc_assert (flag_pic);
12296 9765595 : goto is_legitimate_pic;
12297 :
12298 : case UNSPEC_GOTTPOFF:
12299 : case UNSPEC_GOTNTPOFF:
12300 : case UNSPEC_INDNTPOFF:
12301 : case UNSPEC_NTPOFF:
12302 : case UNSPEC_DTPOFF:
12303 : case UNSPEC_SECREL32:
12304 : break;
12305 :
12306 : default:
12307 : /* Invalid address unspec. */
12308 : return false;
12309 : }
12310 :
12311 1261807855 : else if (SYMBOLIC_CONST (disp)
12312 2115409837 : && (flag_pic
12313 : #if TARGET_MACHO
12314 : || (MACHOPIC_INDIRECT
12315 : && !machopic_operand_p (disp))
12316 : #endif
12317 : ))
12318 : {
12319 :
12320 57597759 : is_legitimate_pic:
12321 57597759 : if (TARGET_64BIT && (index || base))
12322 : {
12323 : /* foo@dtpoff(%rX) is ok. */
12324 37405 : if (GET_CODE (disp) != CONST
12325 7108 : || GET_CODE (XEXP (disp, 0)) != PLUS
12326 7108 : || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12327 4637 : || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12328 4637 : || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12329 4637 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
12330 6 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12331 : /* Non-constant pic memory reference. */
12332 : return false;
12333 : }
12334 57560354 : else if ((!TARGET_MACHO || flag_pic)
12335 57560354 : && ! legitimate_pic_address_disp_p (disp))
12336 : /* Displacement is an invalid pic construct. */
12337 : return false;
12338 : #if TARGET_MACHO
12339 : else if (MACHO_DYNAMIC_NO_PIC_P
12340 : && !ix86_legitimate_constant_p (Pmode, disp))
12341 : /* displacement must be referenced via non_lazy_pointer */
12342 : return false;
12343 : #endif
12344 :
12345 : /* This code used to verify that a symbolic pic displacement
12346 : includes the pic_offset_table_rtx register.
12347 :
12348 : While this is good idea, unfortunately these constructs may
12349 : be created by "adds using lea" optimization for incorrect
12350 : code like:
12351 :
12352 : int a;
12353 : int foo(int i)
12354 : {
12355 : return *(&a+i);
12356 : }
12357 :
12358 : This code is nonsensical, but results in addressing
12359 : GOT table with pic_offset_table_rtx base. We can't
12360 : just refuse it easily, since it gets matched by
12361 : "addsi3" pattern, that later gets split to lea in the
12362 : case output register differs from input. While this
12363 : can be handled by separate addsi pattern for this case
12364 : that never results in lea, this seems to be easier and
12365 : correct fix for crash to disable this test. */
12366 : }
12367 1941109275 : else if (!LABEL_REF_P (disp)
12368 1940932751 : && !CONST_INT_P (disp)
12369 867390853 : && (GET_CODE (disp) != CONST
12370 132464446 : || !ix86_legitimate_constant_p (Pmode, disp))
12371 2679000186 : && (!SYMBOL_REF_P (disp)
12372 745781144 : || !ix86_legitimate_constant_p (Pmode, disp)))
12373 : /* Displacement is not constant. */
12374 57317512 : return false;
12375 1883791763 : else if (TARGET_64BIT
12376 1883791763 : && !x86_64_immediate_operand (disp, VOIDmode))
12377 : /* Displacement is out of range. */
12378 : return false;
12379 : /* In x32 mode, constant addresses are sign extended to 64bit, so
12380 : we have to prevent addresses from 0x80000000 to 0xffffffff. */
12381 45622 : else if (TARGET_X32 && !(index || base)
12382 17326 : && CONST_INT_P (disp)
12383 1883286380 : && val_signbit_known_set_p (SImode, INTVAL (disp)))
12384 : return false;
12385 : }
12386 :
12387 : /* Everything looks valid. */
12388 : return true;
12389 : }
12390 :
12391 : /* Determine if a given RTX is a valid constant address. */
12392 :
12393 : bool
12394 2775903345 : constant_address_p (rtx x)
12395 : {
12396 2856287337 : return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12397 : }
12398 :
12399 :
12400 : /* Return a legitimate reference for ORIG (an address) using the
12401 : register REG. If REG is 0, a new pseudo is generated.
12402 :
12403 : There are two types of references that must be handled:
12404 :
12405 : 1. Global data references must load the address from the GOT, via
12406 : the PIC reg. An insn is emitted to do this load, and the reg is
12407 : returned.
12408 :
12409 : 2. Static data references, constant pool addresses, and code labels
12410 : compute the address as an offset from the GOT, whose base is in
12411 : the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12412 : differentiate them from global data objects. The returned
12413 : address is the PIC reg + an unspec constant.
12414 :
12415 : TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12416 : reg also appears in the address. */
12417 :
12418 : rtx
12419 398533 : legitimize_pic_address (rtx orig, rtx reg)
12420 : {
12421 398533 : rtx addr = orig;
12422 398533 : rtx new_rtx = orig;
12423 :
12424 : #if TARGET_MACHO
12425 : if (TARGET_MACHO && !TARGET_64BIT)
12426 : {
12427 : if (reg == 0)
12428 : reg = gen_reg_rtx (Pmode);
12429 : /* Use the generic Mach-O PIC machinery. */
12430 : return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12431 : }
12432 : #endif
12433 :
12434 398533 : if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12435 : {
12436 : #if TARGET_PECOFF
12437 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12438 : if (tmp)
12439 : return tmp;
12440 : #endif
12441 : }
12442 :
12443 398533 : if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12444 : new_rtx = addr;
12445 302160 : else if ((!TARGET_64BIT
12446 102314 : || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12447 : && !TARGET_PECOFF
12448 502103 : && gotoff_operand (addr, Pmode))
12449 : {
12450 : /* This symbol may be referenced via a displacement
12451 : from the PIC base address (@GOTOFF). */
12452 96408 : if (GET_CODE (addr) == CONST)
12453 3047 : addr = XEXP (addr, 0);
12454 :
12455 96408 : if (GET_CODE (addr) == PLUS)
12456 : {
12457 6094 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12458 : UNSPEC_GOTOFF);
12459 6094 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12460 : }
12461 : else
12462 186693 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12463 :
12464 192787 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12465 :
12466 96408 : if (TARGET_64BIT)
12467 29 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12468 :
12469 96408 : if (reg != 0)
12470 : {
12471 3 : gcc_assert (REG_P (reg));
12472 3 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12473 : new_rtx, reg, 1, OPTAB_DIRECT);
12474 : }
12475 : else
12476 192784 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12477 : }
12478 384201 : else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
12479 : /* We can't always use @GOTOFF for text labels
12480 : on VxWorks, see gotoff_operand. */
12481 205752 : || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
12482 : {
12483 : #if TARGET_PECOFF
12484 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12485 : if (tmp)
12486 : return tmp;
12487 : #endif
12488 :
12489 : /* For x64 PE-COFF there is no GOT table,
12490 : so we use address directly. */
12491 178446 : if (TARGET_64BIT && TARGET_PECOFF)
12492 : {
12493 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12494 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12495 : }
12496 178446 : else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12497 : {
12498 95044 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12499 : UNSPEC_GOTPCREL);
12500 95044 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12501 95044 : new_rtx = gen_const_mem (Pmode, new_rtx);
12502 95041 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12503 : }
12504 : else
12505 : {
12506 : /* This symbol must be referenced via a load
12507 : from the Global Offset Table (@GOT). */
12508 166787 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12509 166787 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12510 :
12511 83405 : if (TARGET_64BIT)
12512 23 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12513 :
12514 83405 : if (reg != 0)
12515 : {
12516 0 : gcc_assert (REG_P (reg));
12517 0 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12518 : new_rtx, reg, 1, OPTAB_DIRECT);
12519 : }
12520 : else
12521 166787 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12522 :
12523 166787 : new_rtx = gen_const_mem (Pmode, new_rtx);
12524 83405 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12525 : }
12526 :
12527 261831 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12528 : }
12529 : else
12530 : {
12531 27306 : if (CONST_INT_P (addr)
12532 27306 : && !x86_64_immediate_operand (addr, VOIDmode))
12533 8 : new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12534 27298 : else if (GET_CODE (addr) == CONST)
12535 : {
12536 16751 : addr = XEXP (addr, 0);
12537 :
12538 : /* We must match stuff we generate before. Assume the only
12539 : unspecs that can get here are ours. Not that we could do
12540 : anything with them anyway.... */
12541 16751 : if (GET_CODE (addr) == UNSPEC
12542 8970 : || (GET_CODE (addr) == PLUS
12543 8970 : && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12544 : return orig;
12545 6840 : gcc_assert (GET_CODE (addr) == PLUS);
12546 : }
12547 :
12548 17395 : if (GET_CODE (addr) == PLUS)
12549 : {
12550 8698 : rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12551 :
12552 : /* Check first to see if this is a constant
12553 : offset from a @GOTOFF symbol reference. */
12554 8698 : if (!TARGET_PECOFF
12555 13785 : && gotoff_operand (op0, Pmode)
12556 8698 : && CONST_INT_P (op1))
12557 : {
12558 4 : if (!TARGET_64BIT)
12559 : {
12560 0 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12561 : UNSPEC_GOTOFF);
12562 0 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12563 0 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12564 :
12565 0 : if (reg != 0)
12566 : {
12567 0 : gcc_assert (REG_P (reg));
12568 0 : new_rtx = expand_simple_binop (Pmode, PLUS,
12569 : pic_offset_table_rtx,
12570 : new_rtx, reg, 1,
12571 : OPTAB_DIRECT);
12572 : }
12573 : else
12574 0 : new_rtx
12575 0 : = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12576 : }
12577 : else
12578 : {
12579 4 : if (INTVAL (op1) < -16*1024*1024
12580 4 : || INTVAL (op1) >= 16*1024*1024)
12581 : {
12582 4 : if (!x86_64_immediate_operand (op1, Pmode))
12583 4 : op1 = force_reg (Pmode, op1);
12584 :
12585 4 : new_rtx
12586 4 : = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12587 : }
12588 : }
12589 : }
12590 : else
12591 : {
12592 8694 : rtx base = legitimize_pic_address (op0, reg);
12593 8694 : machine_mode mode = GET_MODE (base);
12594 8694 : new_rtx
12595 8694 : = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12596 :
12597 8694 : if (CONST_INT_P (new_rtx))
12598 : {
12599 6828 : if (INTVAL (new_rtx) < -16*1024*1024
12600 6828 : || INTVAL (new_rtx) >= 16*1024*1024)
12601 : {
12602 0 : if (!x86_64_immediate_operand (new_rtx, mode))
12603 0 : new_rtx = force_reg (mode, new_rtx);
12604 :
12605 0 : new_rtx
12606 0 : = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12607 : }
12608 : else
12609 6828 : new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12610 : }
12611 : else
12612 : {
12613 : /* For %rip addressing, we have to use
12614 : just disp32, not base nor index. */
12615 1866 : if (TARGET_64BIT
12616 101 : && (SYMBOL_REF_P (base)
12617 101 : || LABEL_REF_P (base)))
12618 7 : base = force_reg (mode, base);
12619 1866 : if (GET_CODE (new_rtx) == PLUS
12620 1745 : && CONSTANT_P (XEXP (new_rtx, 1)))
12621 : {
12622 1741 : base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12623 1741 : new_rtx = XEXP (new_rtx, 1);
12624 : }
12625 1866 : new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12626 : }
12627 : }
12628 : }
12629 : }
12630 : return new_rtx;
12631 : }
12632 :
12633 : /* Load the thread pointer. If TO_REG is true, force it into a register. */
12634 :
12635 : static rtx
12636 24541 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
12637 : {
12638 24541 : rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12639 :
12640 24541 : if (GET_MODE (tp) != tp_mode)
12641 : {
12642 11 : gcc_assert (GET_MODE (tp) == SImode);
12643 11 : gcc_assert (tp_mode == DImode);
12644 :
12645 11 : tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12646 : }
12647 :
12648 24541 : if (to_reg)
12649 8159 : tp = copy_to_mode_reg (tp_mode, tp);
12650 :
12651 24541 : return tp;
12652 : }
12653 :
12654 : /* Construct the SYMBOL_REF for the _tls_index symbol. */
12655 :
12656 : static GTY(()) rtx ix86_tls_index_symbol;
12657 :
12658 : static rtx
12659 0 : ix86_tls_index (void)
12660 : {
12661 0 : if (!ix86_tls_index_symbol)
12662 0 : ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12663 :
12664 0 : if (flag_pic)
12665 0 : return gen_rtx_CONST (Pmode,
12666 : gen_rtx_UNSPEC (Pmode,
12667 : gen_rtvec (1, ix86_tls_index_symbol),
12668 : UNSPEC_PCREL));
12669 : else
12670 0 : return ix86_tls_index_symbol;
12671 : }
12672 :
12673 : /* Construct the SYMBOL_REF for the tls_get_addr function. */
12674 :
12675 : static GTY(()) rtx ix86_tls_symbol;
12676 :
12677 : rtx
12678 6720 : ix86_tls_get_addr (void)
12679 : {
12680 6720 : if (cfun->machine->call_saved_registers
12681 6720 : == TYPE_NO_CALLER_SAVED_REGISTERS)
12682 : {
12683 : /* __tls_get_addr doesn't preserve vector registers. When a
12684 : function with no_caller_saved_registers attribute calls
12685 : __tls_get_addr, YMM and ZMM registers will be clobbered.
12686 : Issue an error and suggest -mtls-dialect=gnu2 in this case. */
12687 3 : if (cfun->machine->func_type == TYPE_NORMAL)
12688 1 : error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
12689 : " with the %<no_caller_saved_registers%> attribute"));
12690 : else
12691 3 : error (cfun->machine->func_type == TYPE_EXCEPTION
12692 : ? G_("%<-mtls-dialect=gnu2%> must be used with an"
12693 : " exception service routine")
12694 : : G_("%<-mtls-dialect=gnu2%> must be used with an"
12695 : " interrupt service routine"));
12696 : /* Don't issue the same error twice. */
12697 3 : cfun->machine->func_type = TYPE_NORMAL;
12698 3 : cfun->machine->call_saved_registers
12699 3 : = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
12700 : }
12701 :
12702 6720 : if (!ix86_tls_symbol)
12703 : {
12704 209 : const char *sym
12705 246 : = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12706 246 : ? "___tls_get_addr" : "__tls_get_addr");
12707 :
12708 283 : ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12709 : }
12710 :
12711 6720 : if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12712 : {
12713 2 : rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12714 : UNSPEC_PLTOFF);
12715 2 : return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12716 : gen_rtx_CONST (Pmode, unspec));
12717 : }
12718 :
12719 6718 : return ix86_tls_symbol;
12720 : }
12721 :
12722 : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12723 :
12724 : static GTY(()) rtx ix86_tls_module_base_symbol;
12725 :
12726 : rtx
12727 98 : ix86_tls_module_base (void)
12728 : {
12729 98 : if (!ix86_tls_module_base_symbol)
12730 : {
12731 11 : ix86_tls_module_base_symbol
12732 11 : = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12733 :
12734 11 : SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12735 11 : |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12736 : }
12737 :
12738 98 : return ix86_tls_module_base_symbol;
12739 : }
12740 :
12741 : /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12742 : false if we expect this to be used for a memory address and true if
12743 : we expect to load the address into a register. */
12744 :
12745 : rtx
12746 30969 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12747 : {
12748 30969 : rtx dest, base, off;
12749 30969 : rtx pic = NULL_RTX, tp = NULL_RTX;
12750 30969 : machine_mode tp_mode = Pmode;
12751 30969 : int type;
12752 :
12753 : /* Windows implements a single form of TLS. */
12754 30969 : if (TARGET_WIN32_TLS)
12755 : {
12756 : /* Load the 32-bit index. */
12757 : rtx ind = gen_const_mem (SImode, ix86_tls_index ());
12758 : set_mem_alias_set (ind, GOT_ALIAS_SET);
12759 : if (TARGET_64BIT)
12760 : ind = convert_to_mode (Pmode, ind, 1);
12761 : ind = force_reg (Pmode, ind);
12762 :
12763 : /* Add it to the thread pointer and load the base. */
12764 : tp = get_thread_pointer (Pmode, true);
12765 : rtx addr = gen_rtx_PLUS (Pmode, tp,
12766 : gen_rtx_MULT (Pmode, ind,
12767 : GEN_INT (UNITS_PER_WORD)));
12768 : base = gen_const_mem (Pmode, addr);
12769 : set_mem_alias_set (base, GOT_ALIAS_SET);
12770 :
12771 : /* Add the 32-bit section-relative offset to the base. */
12772 : base = force_reg (Pmode, base);
12773 : off = gen_rtx_CONST (Pmode,
12774 : gen_rtx_UNSPEC (SImode,
12775 : gen_rtvec (1, x),
12776 : UNSPEC_SECREL32));
12777 : return gen_rtx_PLUS (Pmode, base, off);
12778 : }
12779 :
12780 : /* Fall back to global dynamic model if tool chain cannot support local
12781 : dynamic. */
12782 30969 : if (TARGET_SUN_TLS && !TARGET_64BIT
12783 : && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12784 : && model == TLS_MODEL_LOCAL_DYNAMIC)
12785 : model = TLS_MODEL_GLOBAL_DYNAMIC;
12786 :
12787 30969 : switch (model)
12788 : {
12789 6121 : case TLS_MODEL_GLOBAL_DYNAMIC:
12790 6121 : if (!TARGET_64BIT)
12791 : {
12792 1930 : if (flag_pic && !TARGET_PECOFF)
12793 1930 : pic = pic_offset_table_rtx;
12794 : else
12795 : {
12796 0 : pic = gen_reg_rtx (Pmode);
12797 0 : emit_insn (gen_set_got (pic));
12798 : }
12799 : }
12800 :
12801 6121 : if (TARGET_GNU2_TLS)
12802 : {
12803 53 : dest = gen_reg_rtx (ptr_mode);
12804 53 : if (TARGET_64BIT)
12805 53 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12806 : else
12807 0 : emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12808 :
12809 53 : tp = get_thread_pointer (ptr_mode, true);
12810 53 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12811 61 : if (GET_MODE (dest) != Pmode)
12812 6 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12813 61 : dest = force_reg (Pmode, dest);
12814 :
12815 61 : if (GET_MODE (x) != Pmode)
12816 3 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12817 :
12818 53 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12819 : }
12820 : else
12821 : {
12822 6068 : rtx caddr = ix86_tls_get_addr ();
12823 :
12824 7998 : dest = gen_reg_rtx (Pmode);
12825 6068 : if (TARGET_64BIT)
12826 : {
12827 4138 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12828 4138 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12829 4138 : rtx_insn *insns;
12830 :
12831 4138 : start_sequence ();
12832 4138 : emit_call_insn
12833 4138 : (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
12834 4138 : insns = end_sequence ();
12835 :
12836 4138 : if (GET_MODE (x) != Pmode)
12837 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12838 :
12839 4138 : RTL_CONST_CALL_P (insns) = 1;
12840 4138 : emit_libcall_block (insns, dest, rax, x);
12841 : }
12842 : else
12843 1930 : emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12844 : }
12845 : break;
12846 :
12847 386 : case TLS_MODEL_LOCAL_DYNAMIC:
12848 386 : if (!TARGET_64BIT)
12849 : {
12850 92 : if (flag_pic)
12851 92 : pic = pic_offset_table_rtx;
12852 : else
12853 : {
12854 0 : pic = gen_reg_rtx (Pmode);
12855 0 : emit_insn (gen_set_got (pic));
12856 : }
12857 : }
12858 :
12859 386 : if (TARGET_GNU2_TLS)
12860 : {
12861 26 : rtx tmp = ix86_tls_module_base ();
12862 :
12863 26 : base = gen_reg_rtx (ptr_mode);
12864 26 : if (TARGET_64BIT)
12865 26 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12866 : else
12867 0 : emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12868 :
12869 26 : tp = get_thread_pointer (ptr_mode, true);
12870 32 : if (GET_MODE (base) != Pmode)
12871 2 : base = gen_rtx_ZERO_EXTEND (Pmode, base);
12872 32 : base = force_reg (Pmode, base);
12873 : }
12874 : else
12875 : {
12876 360 : rtx caddr = ix86_tls_get_addr ();
12877 :
12878 452 : base = gen_reg_rtx (Pmode);
12879 360 : if (TARGET_64BIT)
12880 : {
12881 268 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12882 268 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12883 268 : rtx_insn *insns;
12884 268 : rtx eqv;
12885 :
12886 268 : start_sequence ();
12887 268 : emit_call_insn
12888 268 : (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
12889 268 : insns = end_sequence ();
12890 :
12891 : /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12892 : share the LD_BASE result with other LD model accesses. */
12893 268 : eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12894 : UNSPEC_TLS_LD_BASE);
12895 :
12896 268 : RTL_CONST_CALL_P (insns) = 1;
12897 268 : emit_libcall_block (insns, base, rax, eqv);
12898 : }
12899 : else
12900 92 : emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12901 : }
12902 :
12903 484 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12904 484 : off = gen_rtx_CONST (Pmode, off);
12905 :
12906 582 : dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12907 :
12908 386 : if (TARGET_GNU2_TLS)
12909 : {
12910 32 : if (GET_MODE (tp) != Pmode)
12911 : {
12912 2 : dest = lowpart_subreg (ptr_mode, dest, Pmode);
12913 2 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12914 2 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12915 : }
12916 : else
12917 24 : dest = gen_rtx_PLUS (Pmode, tp, dest);
12918 32 : dest = force_reg (Pmode, dest);
12919 :
12920 32 : if (GET_MODE (x) != Pmode)
12921 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12922 :
12923 26 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12924 : }
12925 : break;
12926 :
12927 10817 : case TLS_MODEL_INITIAL_EXEC:
12928 10817 : if (TARGET_64BIT)
12929 : {
12930 : /* Generate DImode references to avoid %fs:(%reg32)
12931 : problems and linker IE->LE relaxation bug. */
12932 : tp_mode = DImode;
12933 : pic = NULL;
12934 : type = UNSPEC_GOTNTPOFF;
12935 : }
12936 764 : else if (flag_pic)
12937 : {
12938 763 : pic = pic_offset_table_rtx;
12939 763 : type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12940 : }
12941 1 : else if (!TARGET_ANY_GNU_TLS)
12942 : {
12943 0 : pic = gen_reg_rtx (Pmode);
12944 0 : emit_insn (gen_set_got (pic));
12945 0 : type = UNSPEC_GOTTPOFF;
12946 : }
12947 : else
12948 : {
12949 : pic = NULL;
12950 : type = UNSPEC_INDNTPOFF;
12951 : }
12952 :
12953 10817 : off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12954 10817 : off = gen_rtx_CONST (tp_mode, off);
12955 10817 : if (pic)
12956 763 : off = gen_rtx_PLUS (tp_mode, pic, off);
12957 10817 : off = gen_const_mem (tp_mode, off);
12958 10817 : set_mem_alias_set (off, GOT_ALIAS_SET);
12959 :
12960 10817 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12961 : {
12962 10817 : base = get_thread_pointer (tp_mode,
12963 10817 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12964 10817 : off = force_reg (tp_mode, off);
12965 10817 : dest = gen_rtx_PLUS (tp_mode, base, off);
12966 11585 : if (tp_mode != Pmode)
12967 4 : dest = convert_to_mode (Pmode, dest, 1);
12968 : }
12969 : else
12970 : {
12971 0 : base = get_thread_pointer (Pmode, true);
12972 0 : dest = gen_reg_rtx (Pmode);
12973 0 : emit_insn (gen_sub3_insn (dest, base, off));
12974 : }
12975 : break;
12976 :
12977 13645 : case TLS_MODEL_LOCAL_EXEC:
12978 28061 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12979 : (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12980 : ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12981 14416 : off = gen_rtx_CONST (Pmode, off);
12982 :
12983 13645 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12984 : {
12985 14416 : base = get_thread_pointer (Pmode,
12986 13645 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12987 14416 : return gen_rtx_PLUS (Pmode, base, off);
12988 : }
12989 : else
12990 : {
12991 0 : base = get_thread_pointer (Pmode, true);
12992 0 : dest = gen_reg_rtx (Pmode);
12993 0 : emit_insn (gen_sub3_insn (dest, base, off));
12994 : }
12995 0 : break;
12996 :
12997 0 : default:
12998 0 : gcc_unreachable ();
12999 : }
13000 :
13001 : return dest;
13002 : }
13003 :
13004 : /* Return true if the TLS address requires insn using integer registers.
13005 : It's used to prevent KMOV/VMOV in TLS code sequences which require integer
13006 : MOV instructions, refer to PR103275. */
13007 : bool
13008 15131374 : ix86_gpr_tls_address_pattern_p (rtx mem)
13009 : {
13010 15131374 : gcc_assert (MEM_P (mem));
13011 :
13012 15131374 : rtx addr = XEXP (mem, 0);
13013 15131374 : subrtx_var_iterator::array_type array;
13014 52610383 : FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
13015 : {
13016 37486594 : rtx op = *iter;
13017 37486594 : if (GET_CODE (op) == UNSPEC)
13018 201262 : switch (XINT (op, 1))
13019 : {
13020 : case UNSPEC_GOTNTPOFF:
13021 7585 : return true;
13022 0 : case UNSPEC_TPOFF:
13023 0 : if (!TARGET_64BIT)
13024 : return true;
13025 : break;
13026 : default:
13027 : break;
13028 : }
13029 : }
13030 :
13031 15123789 : return false;
13032 15131374 : }
13033 :
13034 : /* Return true if OP refers to a TLS address. */
13035 : bool
13036 232068861 : ix86_tls_address_pattern_p (rtx op)
13037 : {
13038 232068861 : subrtx_var_iterator::array_type array;
13039 1381116263 : FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
13040 : {
13041 1149065518 : rtx op = *iter;
13042 1149065518 : if (MEM_P (op))
13043 : {
13044 104887277 : rtx *x = &XEXP (op, 0);
13045 165938522 : while (GET_CODE (*x) == PLUS)
13046 : {
13047 : int i;
13048 183171874 : for (i = 0; i < 2; i++)
13049 : {
13050 122120629 : rtx u = XEXP (*x, i);
13051 122120629 : if (GET_CODE (u) == ZERO_EXTEND)
13052 111164 : u = XEXP (u, 0);
13053 122120629 : if (GET_CODE (u) == UNSPEC
13054 18148 : && XINT (u, 1) == UNSPEC_TP)
13055 18116 : return true;
13056 : }
13057 61051245 : x = &XEXP (*x, 0);
13058 : }
13059 :
13060 104869161 : iter.skip_subrtxes ();
13061 : }
13062 : }
13063 :
13064 232050745 : return false;
13065 232068861 : }
13066 :
13067 : /* Rewrite *LOC so that it refers to a default TLS address space. */
13068 : static void
13069 18116 : ix86_rewrite_tls_address_1 (rtx *loc)
13070 : {
13071 18116 : subrtx_ptr_iterator::array_type array;
13072 53756 : FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
13073 : {
13074 53756 : rtx *loc = *iter;
13075 53756 : if (MEM_P (*loc))
13076 : {
13077 18303 : rtx addr = XEXP (*loc, 0);
13078 18303 : rtx *x = &addr;
13079 23147 : while (GET_CODE (*x) == PLUS)
13080 : {
13081 : int i;
13082 32671 : for (i = 0; i < 2; i++)
13083 : {
13084 27827 : rtx u = XEXP (*x, i);
13085 27827 : if (GET_CODE (u) == ZERO_EXTEND)
13086 19 : u = XEXP (u, 0);
13087 27827 : if (GET_CODE (u) == UNSPEC
13088 18116 : && XINT (u, 1) == UNSPEC_TP)
13089 : {
13090 : /* NB: Since address override only applies to the
13091 : (reg32) part in fs:(reg32), return if address
13092 : override is used. */
13093 19747 : if (Pmode != word_mode
13094 18116 : && REG_P (XEXP (*x, 1 - i)))
13095 18116 : return;
13096 :
13097 18114 : addr_space_t as = DEFAULT_TLS_SEG_REG;
13098 :
13099 18114 : *x = XEXP (*x, 1 - i);
13100 :
13101 18114 : *loc = replace_equiv_address_nv (*loc, addr, true);
13102 18114 : set_mem_addr_space (*loc, as);
13103 18114 : return;
13104 : }
13105 : }
13106 4844 : x = &XEXP (*x, 0);
13107 : }
13108 :
13109 187 : iter.skip_subrtxes ();
13110 : }
13111 : }
13112 18116 : }
13113 :
13114 : /* Rewrite instruction pattern involvning TLS address
13115 : so that it refers to a default TLS address space. */
13116 : rtx
13117 18116 : ix86_rewrite_tls_address (rtx pattern)
13118 : {
13119 18116 : pattern = copy_insn (pattern);
13120 18116 : ix86_rewrite_tls_address_1 (&pattern);
13121 18116 : return pattern;
13122 : }
13123 :
13124 : /* Try machine-dependent ways of modifying an illegitimate address
13125 : to be legitimate. If we find one, return the new, valid address.
13126 : This macro is used in only one place: `memory_address' in explow.cc.
13127 :
13128 : OLDX is the address as it was before break_out_memory_refs was called.
13129 : In some cases it is useful to look at this to decide what needs to be done.
13130 :
13131 : It is always safe for this macro to do nothing. It exists to recognize
13132 : opportunities to optimize the output.
13133 :
13134 : For the 80386, we handle X+REG by loading X into a register R and
13135 : using R+REG. R will go in a general reg and indexing will be used.
13136 : However, if REG is a broken-out memory address or multiplication,
13137 : nothing needs to be done because REG can certainly go in a general reg.
13138 :
13139 : When -fpic is used, special handling is needed for symbolic references.
13140 : See comments by legitimize_pic_address in i386.cc for details. */
13141 :
13142 : static rtx
13143 664871 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
13144 : {
13145 664871 : bool changed = false;
13146 664871 : unsigned log;
13147 :
13148 664871 : log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
13149 151784 : if (log)
13150 20803 : return legitimize_tls_address (x, (enum tls_model) log, false);
13151 644068 : if (GET_CODE (x) == CONST
13152 508 : && GET_CODE (XEXP (x, 0)) == PLUS
13153 508 : && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13154 644576 : && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13155 : {
13156 4 : rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13157 : (enum tls_model) log, false);
13158 5 : return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13159 : }
13160 :
13161 644064 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13162 : {
13163 : #if TARGET_PECOFF
13164 : rtx tmp = legitimize_pe_coff_symbol (x, true);
13165 : if (tmp)
13166 : return tmp;
13167 : #endif
13168 : }
13169 :
13170 644064 : if (flag_pic && SYMBOLIC_CONST (x))
13171 131364 : return legitimize_pic_address (x, 0);
13172 :
13173 : #if TARGET_MACHO
13174 : if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13175 : return machopic_indirect_data_reference (x, 0);
13176 : #endif
13177 :
13178 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13179 512700 : if (GET_CODE (x) == ASHIFT
13180 0 : && CONST_INT_P (XEXP (x, 1))
13181 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13182 : {
13183 0 : changed = true;
13184 0 : log = INTVAL (XEXP (x, 1));
13185 0 : x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13186 : GEN_INT (1 << log));
13187 : }
13188 :
13189 512700 : if (GET_CODE (x) == PLUS)
13190 : {
13191 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13192 :
13193 172312 : if (GET_CODE (XEXP (x, 0)) == ASHIFT
13194 622 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13195 622 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13196 : {
13197 622 : changed = true;
13198 622 : log = INTVAL (XEXP (XEXP (x, 0), 1));
13199 1822 : XEXP (x, 0) = gen_rtx_MULT (Pmode,
13200 : force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13201 : GEN_INT (1 << log));
13202 : }
13203 :
13204 172312 : if (GET_CODE (XEXP (x, 1)) == ASHIFT
13205 0 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13206 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13207 : {
13208 0 : changed = true;
13209 0 : log = INTVAL (XEXP (XEXP (x, 1), 1));
13210 0 : XEXP (x, 1) = gen_rtx_MULT (Pmode,
13211 : force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13212 : GEN_INT (1 << log));
13213 : }
13214 :
13215 : /* Put multiply first if it isn't already. */
13216 172312 : if (GET_CODE (XEXP (x, 1)) == MULT)
13217 : {
13218 0 : std::swap (XEXP (x, 0), XEXP (x, 1));
13219 0 : changed = true;
13220 : }
13221 :
13222 : /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13223 : into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13224 : created by virtual register instantiation, register elimination, and
13225 : similar optimizations. */
13226 172312 : if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13227 : {
13228 9473 : changed = true;
13229 15081 : x = gen_rtx_PLUS (Pmode,
13230 : gen_rtx_PLUS (Pmode, XEXP (x, 0),
13231 : XEXP (XEXP (x, 1), 0)),
13232 : XEXP (XEXP (x, 1), 1));
13233 : }
13234 :
13235 : /* Canonicalize
13236 : (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13237 : into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13238 162839 : else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13239 100995 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13240 50960 : && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13241 0 : && CONSTANT_P (XEXP (x, 1)))
13242 : {
13243 0 : rtx constant;
13244 0 : rtx other = NULL_RTX;
13245 :
13246 0 : if (CONST_INT_P (XEXP (x, 1)))
13247 : {
13248 0 : constant = XEXP (x, 1);
13249 0 : other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13250 : }
13251 0 : else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13252 : {
13253 : constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13254 : other = XEXP (x, 1);
13255 : }
13256 : else
13257 : constant = 0;
13258 :
13259 0 : if (constant)
13260 : {
13261 0 : changed = true;
13262 0 : x = gen_rtx_PLUS (Pmode,
13263 : gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13264 : XEXP (XEXP (XEXP (x, 0), 1), 0)),
13265 : plus_constant (Pmode, other,
13266 : INTVAL (constant)));
13267 : }
13268 : }
13269 :
13270 172312 : if (changed && ix86_legitimate_address_p (mode, x, false))
13271 9509 : return x;
13272 :
13273 162803 : if (GET_CODE (XEXP (x, 0)) == MULT)
13274 : {
13275 19765 : changed = true;
13276 19765 : XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13277 : }
13278 :
13279 162803 : if (GET_CODE (XEXP (x, 1)) == MULT)
13280 : {
13281 0 : changed = true;
13282 0 : XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13283 : }
13284 :
13285 162803 : if (changed
13286 19773 : && REG_P (XEXP (x, 1))
13287 16178 : && REG_P (XEXP (x, 0)))
13288 : return x;
13289 :
13290 146625 : if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13291 : {
13292 1858 : changed = true;
13293 1858 : x = legitimize_pic_address (x, 0);
13294 : }
13295 :
13296 146625 : if (changed && ix86_legitimate_address_p (mode, x, false))
13297 3868 : return x;
13298 :
13299 142757 : if (REG_P (XEXP (x, 0)))
13300 : {
13301 44058 : rtx temp = gen_reg_rtx (Pmode);
13302 41301 : rtx val = force_operand (XEXP (x, 1), temp);
13303 41301 : if (val != temp)
13304 : {
13305 31960 : val = convert_to_mode (Pmode, val, 1);
13306 31667 : emit_move_insn (temp, val);
13307 : }
13308 :
13309 41301 : XEXP (x, 1) = temp;
13310 41301 : return x;
13311 : }
13312 :
13313 101456 : else if (REG_P (XEXP (x, 1)))
13314 : {
13315 3291 : rtx temp = gen_reg_rtx (Pmode);
13316 2657 : rtx val = force_operand (XEXP (x, 0), temp);
13317 2657 : if (val != temp)
13318 : {
13319 0 : val = convert_to_mode (Pmode, val, 1);
13320 0 : emit_move_insn (temp, val);
13321 : }
13322 :
13323 2657 : XEXP (x, 0) = temp;
13324 2657 : return x;
13325 : }
13326 : }
13327 :
13328 : return x;
13329 : }
13330 :
13331 : /* Print an integer constant expression in assembler syntax. Addition
13332 : and subtraction are the only arithmetic that may appear in these
13333 : expressions. FILE is the stdio stream to write to, X is the rtx, and
13334 : CODE is the operand print code from the output string. */
13335 :
13336 : static void
13337 3681555 : output_pic_addr_const (FILE *file, rtx x, int code)
13338 : {
13339 3911497 : char buf[256];
13340 :
13341 3911497 : switch (GET_CODE (x))
13342 : {
13343 0 : case PC:
13344 0 : gcc_assert (flag_pic);
13345 0 : putc ('.', file);
13346 0 : break;
13347 :
13348 861658 : case SYMBOL_REF:
13349 861658 : if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
13350 861658 : output_addr_const (file, x);
13351 : else
13352 : {
13353 : const char *name = XSTR (x, 0);
13354 :
13355 : /* Mark the decl as referenced so that cgraph will
13356 : output the function. */
13357 : if (SYMBOL_REF_DECL (x))
13358 : mark_decl_referenced (SYMBOL_REF_DECL (x));
13359 :
13360 : #if TARGET_MACHO
13361 : if (MACHOPIC_INDIRECT
13362 : && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13363 : name = machopic_indirection_name (x, /*stub_p=*/true);
13364 : #endif
13365 : assemble_name (file, name);
13366 : }
13367 861658 : if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13368 861658 : && code == 'P' && ix86_call_use_plt_p (x))
13369 391109 : fputs ("@PLT", file);
13370 : break;
13371 :
13372 2706 : case LABEL_REF:
13373 2706 : x = XEXP (x, 0);
13374 : /* FALLTHRU */
13375 2706 : case CODE_LABEL:
13376 2706 : ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13377 2706 : assemble_name (asm_out_file, buf);
13378 2706 : break;
13379 :
13380 2616021 : CASE_CONST_SCALAR_INT:
13381 2616021 : output_addr_const (file, x);
13382 2616021 : break;
13383 :
13384 210822 : case CONST:
13385 : /* This used to output parentheses around the expression,
13386 : but that does not work on the 386 (either ATT or BSD assembler). */
13387 210822 : output_pic_addr_const (file, XEXP (x, 0), code);
13388 210822 : break;
13389 :
13390 0 : case CONST_DOUBLE:
13391 : /* We can't handle floating point constants;
13392 : TARGET_PRINT_OPERAND must handle them. */
13393 0 : output_operand_lossage ("floating constant misused");
13394 0 : break;
13395 :
13396 19120 : case PLUS:
13397 : /* Some assemblers need integer constants to appear first. */
13398 19120 : if (CONST_INT_P (XEXP (x, 0)))
13399 : {
13400 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13401 0 : putc ('+', file);
13402 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13403 : }
13404 : else
13405 : {
13406 19120 : gcc_assert (CONST_INT_P (XEXP (x, 1)));
13407 19120 : output_pic_addr_const (file, XEXP (x, 1), code);
13408 19120 : putc ('+', file);
13409 19120 : output_pic_addr_const (file, XEXP (x, 0), code);
13410 : }
13411 : break;
13412 :
13413 0 : case MINUS:
13414 0 : if (!TARGET_MACHO)
13415 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13416 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13417 0 : putc ('-', file);
13418 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13419 0 : if (!TARGET_MACHO)
13420 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13421 0 : break;
13422 :
13423 201170 : case UNSPEC:
13424 201170 : gcc_assert (XVECLEN (x, 0) == 1);
13425 201170 : output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13426 201170 : switch (XINT (x, 1))
13427 : {
13428 43320 : case UNSPEC_GOT:
13429 43320 : fputs ("@GOT", file);
13430 43320 : break;
13431 77894 : case UNSPEC_GOTOFF:
13432 77894 : fputs ("@GOTOFF", file);
13433 77894 : break;
13434 36 : case UNSPEC_PLTOFF:
13435 36 : fputs ("@PLTOFF", file);
13436 36 : break;
13437 0 : case UNSPEC_PCREL:
13438 0 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13439 : "(%rip)" : "[rip]", file);
13440 0 : break;
13441 75732 : case UNSPEC_GOTPCREL:
13442 75732 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13443 : "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13444 75732 : break;
13445 0 : case UNSPEC_GOTTPOFF:
13446 : /* FIXME: This might be @TPOFF in Sun ld too. */
13447 0 : fputs ("@gottpoff", file);
13448 0 : break;
13449 0 : case UNSPEC_TPOFF:
13450 0 : fputs ("@tpoff", file);
13451 0 : break;
13452 1459 : case UNSPEC_NTPOFF:
13453 1459 : if (TARGET_64BIT)
13454 1459 : fputs ("@tpoff", file);
13455 : else
13456 0 : fputs ("@ntpoff", file);
13457 : break;
13458 315 : case UNSPEC_DTPOFF:
13459 315 : fputs ("@dtpoff", file);
13460 315 : break;
13461 2414 : case UNSPEC_GOTNTPOFF:
13462 2414 : if (TARGET_64BIT)
13463 2150 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13464 : "@gottpoff(%rip)": "@gottpoff[rip]", file);
13465 : else
13466 264 : fputs ("@gotntpoff", file);
13467 : break;
13468 0 : case UNSPEC_INDNTPOFF:
13469 0 : fputs ("@indntpoff", file);
13470 0 : break;
13471 0 : case UNSPEC_SECREL32:
13472 0 : fputs ("@secrel32", file);
13473 0 : break;
13474 : #if TARGET_MACHO
13475 : case UNSPEC_MACHOPIC_OFFSET:
13476 : putc ('-', file);
13477 : machopic_output_function_base_name (file);
13478 : break;
13479 : #endif
13480 0 : default:
13481 0 : output_operand_lossage ("invalid UNSPEC as operand");
13482 0 : break;
13483 : }
13484 : break;
13485 :
13486 0 : default:
13487 0 : output_operand_lossage ("invalid expression as operand");
13488 : }
13489 3681555 : }
13490 :
13491 : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13492 : We need to emit DTP-relative relocations. */
13493 :
13494 : static void ATTRIBUTE_UNUSED
13495 694 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13496 : {
13497 694 : fputs (ASM_LONG, file);
13498 694 : output_addr_const (file, x);
13499 : #if TARGET_WIN32_TLS
13500 : fputs ("@secrel32", file);
13501 : #else
13502 694 : fputs ("@dtpoff", file);
13503 : #endif
13504 694 : switch (size)
13505 : {
13506 : case 4:
13507 : break;
13508 555 : case 8:
13509 555 : fputs (", 0", file);
13510 555 : break;
13511 0 : default:
13512 0 : gcc_unreachable ();
13513 : }
13514 694 : }
13515 :
13516 : /* Return true if X is a representation of the PIC register. This copes
13517 : with calls from ix86_find_base_term, where the register might have
13518 : been replaced by a cselib value. */
13519 :
13520 : static bool
13521 26858218 : ix86_pic_register_p (rtx x)
13522 : {
13523 26858218 : if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13524 753736 : return (pic_offset_table_rtx
13525 753736 : && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13526 26104482 : else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13527 : return true;
13528 26099904 : else if (!REG_P (x))
13529 : return false;
13530 25493696 : else if (pic_offset_table_rtx)
13531 : {
13532 25474487 : if (REGNO (x) == REGNO (pic_offset_table_rtx))
13533 : return true;
13534 403994 : if (HARD_REGISTER_P (x)
13535 382104 : && !HARD_REGISTER_P (pic_offset_table_rtx)
13536 786098 : && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13537 : return true;
13538 : return false;
13539 : }
13540 : else
13541 19209 : return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13542 : }
13543 :
13544 : /* Helper function for ix86_delegitimize_address.
13545 : Attempt to delegitimize TLS local-exec accesses. */
13546 :
13547 : static rtx
13548 3509788824 : ix86_delegitimize_tls_address (rtx orig_x)
13549 : {
13550 3509788824 : rtx x = orig_x, unspec;
13551 3509788824 : struct ix86_address addr;
13552 :
13553 3509788824 : if (!TARGET_TLS_DIRECT_SEG_REFS)
13554 : return orig_x;
13555 3509788824 : if (MEM_P (x))
13556 42597995 : x = XEXP (x, 0);
13557 5044059545 : if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13558 : return orig_x;
13559 1687472408 : if (ix86_decompose_address (x, &addr) == 0
13560 1951430359 : || addr.seg != DEFAULT_TLS_SEG_REG
13561 268427 : || addr.disp == NULL_RTX
13562 1687688800 : || GET_CODE (addr.disp) != CONST)
13563 : return orig_x;
13564 111900 : unspec = XEXP (addr.disp, 0);
13565 111900 : if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13566 65400 : unspec = XEXP (unspec, 0);
13567 111900 : if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13568 : return orig_x;
13569 111837 : x = XVECEXP (unspec, 0, 0);
13570 111837 : gcc_assert (SYMBOL_REF_P (x));
13571 111837 : if (unspec != XEXP (addr.disp, 0))
13572 86432 : x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13573 111837 : if (addr.index)
13574 : {
13575 185 : rtx idx = addr.index;
13576 185 : if (addr.scale != 1)
13577 185 : idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13578 185 : x = gen_rtx_PLUS (Pmode, idx, x);
13579 : }
13580 111837 : if (addr.base)
13581 2 : x = gen_rtx_PLUS (Pmode, addr.base, x);
13582 111837 : if (MEM_P (orig_x))
13583 198 : x = replace_equiv_address_nv (orig_x, x);
13584 : return x;
13585 : }
13586 :
13587 : /* In the name of slightly smaller debug output, and to cater to
13588 : general assembler lossage, recognize PIC+GOTOFF and turn it back
13589 : into a direct symbol reference.
13590 :
13591 : On Darwin, this is necessary to avoid a crash, because Darwin
13592 : has a different PIC label for each routine but the DWARF debugging
13593 : information is not associated with any particular routine, so it's
13594 : necessary to remove references to the PIC label from RTL stored by
13595 : the DWARF output code.
13596 :
13597 : This helper is used in the normal ix86_delegitimize_address
13598 : entrypoint (e.g. used in the target delegitimization hook) and
13599 : in ix86_find_base_term. As compile time memory optimization, we
13600 : avoid allocating rtxes that will not change anything on the outcome
13601 : of the callers (find_base_value and find_base_term). */
13602 :
13603 : static inline rtx
13604 3534622840 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13605 : {
13606 3534622840 : rtx orig_x = delegitimize_mem_from_attrs (x);
13607 : /* addend is NULL or some rtx if x is something+GOTOFF where
13608 : something doesn't include the PIC register. */
13609 3534622840 : rtx addend = NULL_RTX;
13610 : /* reg_addend is NULL or a multiple of some register. */
13611 3534622840 : rtx reg_addend = NULL_RTX;
13612 : /* const_addend is NULL or a const_int. */
13613 3534622840 : rtx const_addend = NULL_RTX;
13614 : /* This is the result, or NULL. */
13615 3534622840 : rtx result = NULL_RTX;
13616 :
13617 3534622840 : x = orig_x;
13618 :
13619 3534622840 : if (MEM_P (x))
13620 61798783 : x = XEXP (x, 0);
13621 :
13622 3534622840 : if (TARGET_64BIT)
13623 : {
13624 249973723 : if (GET_CODE (x) == CONST
13625 8614279 : && GET_CODE (XEXP (x, 0)) == PLUS
13626 6673786 : && GET_MODE (XEXP (x, 0)) == Pmode
13627 6673737 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13628 6673737 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13629 249977868 : && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13630 : {
13631 : /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13632 : base. A CONST can't be arg_pointer_rtx based. */
13633 0 : if (base_term_p && MEM_P (orig_x))
13634 : return orig_x;
13635 0 : rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13636 0 : x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13637 0 : if (MEM_P (orig_x))
13638 0 : x = replace_equiv_address_nv (orig_x, x);
13639 0 : return x;
13640 : }
13641 :
13642 249973723 : if (GET_CODE (x) == CONST
13643 8614279 : && GET_CODE (XEXP (x, 0)) == UNSPEC
13644 1940542 : && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13645 655038 : || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13646 1285504 : && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13647 : {
13648 294674 : x = XVECEXP (XEXP (x, 0), 0, 0);
13649 294674 : if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13650 : {
13651 9 : x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13652 9 : if (x == NULL_RTX)
13653 : return orig_x;
13654 : }
13655 294674 : return x;
13656 : }
13657 :
13658 249679049 : if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13659 249677388 : return ix86_delegitimize_tls_address (orig_x);
13660 :
13661 : /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13662 : and -mcmodel=medium -fpic. */
13663 : }
13664 :
13665 3284650778 : if (GET_CODE (x) != PLUS
13666 1558809835 : || GET_CODE (XEXP (x, 1)) != CONST)
13667 3258344203 : return ix86_delegitimize_tls_address (orig_x);
13668 :
13669 26306575 : if (ix86_pic_register_p (XEXP (x, 0)))
13670 : /* %ebx + GOT/GOTOFF */
13671 : ;
13672 1280086 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
13673 : {
13674 : /* %ebx + %reg * scale + GOT/GOTOFF */
13675 473596 : reg_addend = XEXP (x, 0);
13676 473596 : if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13677 395549 : reg_addend = XEXP (reg_addend, 1);
13678 78047 : else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13679 46661 : reg_addend = XEXP (reg_addend, 0);
13680 : else
13681 : {
13682 31386 : reg_addend = NULL_RTX;
13683 31386 : addend = XEXP (x, 0);
13684 : }
13685 : }
13686 : else
13687 : addend = XEXP (x, 0);
13688 :
13689 26306575 : x = XEXP (XEXP (x, 1), 0);
13690 26306575 : if (GET_CODE (x) == PLUS
13691 1447840 : && CONST_INT_P (XEXP (x, 1)))
13692 : {
13693 1447840 : const_addend = XEXP (x, 1);
13694 1447840 : x = XEXP (x, 0);
13695 : }
13696 :
13697 26306575 : if (GET_CODE (x) == UNSPEC
13698 25631934 : && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13699 6725820 : || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13700 1092596 : || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13701 4 : && !MEM_P (orig_x) && !addend)))
13702 24539342 : result = XVECEXP (x, 0, 0);
13703 :
13704 24539342 : if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13705 : && !MEM_P (orig_x))
13706 : result = XVECEXP (x, 0, 0);
13707 :
13708 24539342 : if (! result)
13709 1767233 : return ix86_delegitimize_tls_address (orig_x);
13710 :
13711 : /* For (PLUS something CONST_INT) both find_base_{value,term} just
13712 : recurse on the first operand. */
13713 24539342 : if (const_addend && !base_term_p)
13714 355425 : result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13715 24539342 : if (reg_addend)
13716 859412 : result = gen_rtx_PLUS (Pmode, reg_addend, result);
13717 24539342 : if (addend)
13718 : {
13719 : /* If the rest of original X doesn't involve the PIC register, add
13720 : addend and subtract pic_offset_table_rtx. This can happen e.g.
13721 : for code like:
13722 : leal (%ebx, %ecx, 4), %ecx
13723 : ...
13724 : movl foo@GOTOFF(%ecx), %edx
13725 : in which case we return (%ecx - %ebx) + foo
13726 : or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13727 : and reload has completed. Don't do the latter for debug,
13728 : as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13729 135856 : if (pic_offset_table_rtx
13730 135856 : && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13731 2370 : result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13732 : pic_offset_table_rtx),
13733 : result);
13734 135066 : else if (base_term_p
13735 128758 : && pic_offset_table_rtx
13736 : && !TARGET_MACHO
13737 : && !TARGET_VXWORKS_VAROFF)
13738 : {
13739 257516 : rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13740 257516 : tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13741 257516 : result = gen_rtx_PLUS (Pmode, tmp, result);
13742 128758 : }
13743 : else
13744 : return orig_x;
13745 : }
13746 49065983 : if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13747 : {
13748 0 : result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13749 0 : if (result == NULL_RTX)
13750 : return orig_x;
13751 : }
13752 : return result;
13753 : }
13754 :
13755 : /* The normal instantiation of the above template. */
13756 :
13757 : static rtx
13758 320677177 : ix86_delegitimize_address (rtx x)
13759 : {
13760 320677177 : return ix86_delegitimize_address_1 (x, false);
13761 : }
13762 :
13763 : /* If X is a machine specific address (i.e. a symbol or label being
13764 : referenced as a displacement from the GOT implemented using an
13765 : UNSPEC), then return the base term. Otherwise return X. */
13766 :
13767 : rtx
13768 6683261833 : ix86_find_base_term (rtx x)
13769 : {
13770 6683261833 : rtx term;
13771 :
13772 6683261833 : if (TARGET_64BIT)
13773 : {
13774 3469316170 : if (GET_CODE (x) != CONST)
13775 : return x;
13776 38640994 : term = XEXP (x, 0);
13777 38640994 : if (GET_CODE (term) == PLUS
13778 38626109 : && CONST_INT_P (XEXP (term, 1)))
13779 38626109 : term = XEXP (term, 0);
13780 38640994 : if (GET_CODE (term) != UNSPEC
13781 40591 : || (XINT (term, 1) != UNSPEC_GOTPCREL
13782 40591 : && XINT (term, 1) != UNSPEC_PCREL))
13783 : return x;
13784 :
13785 0 : return XVECEXP (term, 0, 0);
13786 : }
13787 :
13788 3213945663 : return ix86_delegitimize_address_1 (x, true);
13789 : }
13790 :
13791 : /* Return true if X shouldn't be emitted into the debug info.
13792 : Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13793 : symbol easily into the .debug_info section, so we need not to
13794 : delegitimize, but instead assemble as @gotoff.
13795 : Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13796 : assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13797 :
13798 : static bool
13799 1743576 : ix86_const_not_ok_for_debug_p (rtx x)
13800 : {
13801 1743576 : if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13802 : return true;
13803 :
13804 1743556 : if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13805 0 : return true;
13806 :
13807 : return false;
13808 : }
13809 :
13810 : static void
13811 7106781 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13812 : bool fp, FILE *file)
13813 : {
13814 7106781 : const char *suffix;
13815 :
13816 7106781 : if (mode == CCFPmode)
13817 : {
13818 565676 : code = ix86_fp_compare_code_to_integer (code);
13819 565676 : mode = CCmode;
13820 : }
13821 7106781 : if (reverse)
13822 209421 : code = reverse_condition (code);
13823 :
13824 7106781 : switch (code)
13825 : {
13826 2746891 : case EQ:
13827 2746891 : gcc_assert (mode != CCGZmode);
13828 2746891 : switch (mode)
13829 : {
13830 : case E_CCAmode:
13831 : suffix = "a";
13832 : break;
13833 : case E_CCCmode:
13834 26378 : suffix = "c";
13835 : break;
13836 : case E_CCOmode:
13837 7106781 : suffix = "o";
13838 : break;
13839 : case E_CCPmode:
13840 234093 : suffix = "p";
13841 : break;
13842 : case E_CCSmode:
13843 121626 : suffix = "s";
13844 : break;
13845 2727163 : default:
13846 2727163 : suffix = "e";
13847 2727163 : break;
13848 : }
13849 : break;
13850 2315036 : case NE:
13851 2315036 : gcc_assert (mode != CCGZmode);
13852 2315036 : switch (mode)
13853 : {
13854 : case E_CCAmode:
13855 : suffix = "na";
13856 : break;
13857 : case E_CCCmode:
13858 12016 : suffix = "nc";
13859 : break;
13860 10766 : case E_CCOmode:
13861 10766 : suffix = "no";
13862 10766 : break;
13863 : case E_CCPmode:
13864 4443 : suffix = "np";
13865 : break;
13866 : case E_CCSmode:
13867 50707 : suffix = "ns";
13868 : break;
13869 2302384 : default:
13870 2302384 : suffix = "ne";
13871 2302384 : break;
13872 : }
13873 : break;
13874 256217 : case GT:
13875 256217 : gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13876 : suffix = "g";
13877 : break;
13878 172715 : case GTU:
13879 : /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13880 : Those same assemblers have the same but opposite lossage on cmov. */
13881 172715 : if (mode == CCmode)
13882 172777 : suffix = fp ? "nbe" : "a";
13883 : else
13884 0 : gcc_unreachable ();
13885 : break;
13886 236850 : case LT:
13887 236850 : switch (mode)
13888 : {
13889 : case E_CCNOmode:
13890 : case E_CCGOCmode:
13891 : suffix = "s";
13892 : break;
13893 :
13894 : case E_CCmode:
13895 : case E_CCGCmode:
13896 : case E_CCGZmode:
13897 7106781 : suffix = "l";
13898 : break;
13899 :
13900 0 : default:
13901 0 : gcc_unreachable ();
13902 : }
13903 : break;
13904 442094 : case LTU:
13905 442094 : if (mode == CCmode || mode == CCGZmode)
13906 : suffix = "b";
13907 25047 : else if (mode == CCCmode)
13908 26378 : suffix = fp ? "b" : "c";
13909 : else
13910 0 : gcc_unreachable ();
13911 : break;
13912 145286 : case GE:
13913 145286 : switch (mode)
13914 : {
13915 : case E_CCNOmode:
13916 : case E_CCGOCmode:
13917 : suffix = "ns";
13918 : break;
13919 :
13920 : case E_CCmode:
13921 : case E_CCGCmode:
13922 : case E_CCGZmode:
13923 7106781 : suffix = "ge";
13924 : break;
13925 :
13926 0 : default:
13927 0 : gcc_unreachable ();
13928 : }
13929 : break;
13930 188608 : case GEU:
13931 188608 : if (mode == CCmode || mode == CCGZmode)
13932 : suffix = "nb";
13933 10150 : else if (mode == CCCmode)
13934 12016 : suffix = fp ? "nb" : "nc";
13935 : else
13936 0 : gcc_unreachable ();
13937 : break;
13938 247320 : case LE:
13939 247320 : gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13940 : suffix = "le";
13941 : break;
13942 117226 : case LEU:
13943 117226 : if (mode == CCmode)
13944 : suffix = "be";
13945 : else
13946 0 : gcc_unreachable ();
13947 : break;
13948 234093 : case UNORDERED:
13949 234100 : suffix = fp ? "u" : "p";
13950 : break;
13951 4445 : case ORDERED:
13952 4450 : suffix = fp ? "nu" : "np";
13953 : break;
13954 0 : default:
13955 0 : gcc_unreachable ();
13956 : }
13957 7106781 : fputs (suffix, file);
13958 7106781 : }
13959 :
13960 : /* Print the name of register X to FILE based on its machine mode and number.
13961 : If CODE is 'w', pretend the mode is HImode.
13962 : If CODE is 'b', pretend the mode is QImode.
13963 : If CODE is 'k', pretend the mode is SImode.
13964 : If CODE is 'q', pretend the mode is DImode.
13965 : If CODE is 'x', pretend the mode is V4SFmode.
13966 : If CODE is 't', pretend the mode is V8SFmode.
13967 : If CODE is 'g', pretend the mode is V16SFmode.
13968 : If CODE is 'h', pretend the reg is the 'high' byte register.
13969 : If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13970 : If CODE is 'd', duplicate the operand for AVX instruction.
13971 : If CODE is 'V', print naked full integer register name without %.
13972 : */
13973 :
13974 : void
13975 123330134 : print_reg (rtx x, int code, FILE *file)
13976 : {
13977 123330134 : const char *reg;
13978 123330134 : int msize;
13979 123330134 : unsigned int regno;
13980 123330134 : bool duplicated;
13981 :
13982 123330134 : if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13983 123327681 : putc ('%', file);
13984 :
13985 123330134 : if (x == pc_rtx)
13986 : {
13987 5732733 : gcc_assert (TARGET_64BIT);
13988 5732733 : fputs ("rip", file);
13989 5732733 : return;
13990 : }
13991 :
13992 117597401 : if (code == 'y' && STACK_TOP_P (x))
13993 : {
13994 289965 : fputs ("st(0)", file);
13995 289965 : return;
13996 : }
13997 :
13998 117307436 : if (code == 'w')
13999 : msize = 2;
14000 : else if (code == 'b')
14001 : msize = 1;
14002 : else if (code == 'k')
14003 : msize = 4;
14004 : else if (code == 'q')
14005 : msize = 8;
14006 : else if (code == 'h')
14007 : msize = 0;
14008 : else if (code == 'x')
14009 : msize = 16;
14010 : else if (code == 't')
14011 : msize = 32;
14012 : else if (code == 'g')
14013 : msize = 64;
14014 : else
14015 200503384 : msize = GET_MODE_SIZE (GET_MODE (x));
14016 :
14017 117307436 : regno = REGNO (x);
14018 :
14019 117307436 : if (regno == ARG_POINTER_REGNUM
14020 117307436 : || regno == FRAME_POINTER_REGNUM
14021 117307436 : || regno == FPSR_REG)
14022 : {
14023 0 : output_operand_lossage
14024 0 : ("invalid use of register '%s'", reg_names[regno]);
14025 0 : return;
14026 : }
14027 117307436 : else if (regno == FLAGS_REG)
14028 : {
14029 1 : output_operand_lossage ("invalid use of asm flag output");
14030 1 : return;
14031 : }
14032 :
14033 117307435 : if (code == 'V')
14034 : {
14035 1 : if (GENERAL_REGNO_P (regno))
14036 2 : msize = GET_MODE_SIZE (word_mode);
14037 : else
14038 0 : error ("%<V%> modifier on non-integer register");
14039 : }
14040 :
14041 117307435 : duplicated = code == 'd' && TARGET_AVX;
14042 :
14043 117307435 : switch (msize)
14044 : {
14045 77818215 : case 16:
14046 77818215 : case 12:
14047 77818215 : case 8:
14048 145570853 : if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
14049 5 : warning (0, "unsupported size for integer register");
14050 : /* FALLTHRU */
14051 113877268 : case 4:
14052 113877268 : if (LEGACY_INT_REGNO_P (regno))
14053 123269204 : putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
14054 : /* FALLTHRU */
14055 114757936 : case 2:
14056 22193275 : normal:
14057 114757936 : reg = hi_reg_name[regno];
14058 114757936 : break;
14059 2283053 : case 1:
14060 2283053 : if (regno >= ARRAY_SIZE (qi_reg_name))
14061 273789 : goto normal;
14062 2009264 : if (!ANY_QI_REGNO_P (regno))
14063 0 : error ("unsupported size for integer register");
14064 2009264 : reg = qi_reg_name[regno];
14065 2009264 : break;
14066 27105 : case 0:
14067 27105 : if (regno >= ARRAY_SIZE (qi_high_reg_name))
14068 0 : goto normal;
14069 27105 : reg = qi_high_reg_name[regno];
14070 27105 : break;
14071 513130 : case 32:
14072 513130 : case 64:
14073 513130 : if (SSE_REGNO_P (regno))
14074 : {
14075 513130 : gcc_assert (!duplicated);
14076 718784 : putc (msize == 32 ? 'y' : 'z', file);
14077 513130 : reg = hi_reg_name[regno] + 1;
14078 513130 : break;
14079 : }
14080 0 : goto normal;
14081 0 : default:
14082 0 : gcc_unreachable ();
14083 : }
14084 :
14085 117307435 : fputs (reg, file);
14086 :
14087 : /* Irritatingly, AMD extended registers use
14088 : different naming convention: "r%d[bwd]" */
14089 117307435 : if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
14090 : {
14091 10398674 : gcc_assert (TARGET_64BIT);
14092 10398674 : switch (msize)
14093 : {
14094 0 : case 0:
14095 0 : error ("extended registers have no high halves");
14096 0 : break;
14097 181509 : case 1:
14098 181509 : putc ('b', file);
14099 181509 : break;
14100 27729 : case 2:
14101 27729 : putc ('w', file);
14102 27729 : break;
14103 2535580 : case 4:
14104 2535580 : putc ('d', file);
14105 2535580 : break;
14106 : case 8:
14107 : /* no suffix */
14108 : break;
14109 0 : default:
14110 0 : error ("unsupported operand size for extended register");
14111 0 : break;
14112 : }
14113 10398674 : return;
14114 : }
14115 :
14116 106908761 : if (duplicated)
14117 : {
14118 16919 : if (ASSEMBLER_DIALECT == ASM_ATT)
14119 16898 : fprintf (file, ", %%%s", reg);
14120 : else
14121 21 : fprintf (file, ", %s", reg);
14122 : }
14123 : }
14124 :
14125 : /* Meaning of CODE:
14126 : L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14127 : C -- print opcode suffix for set/cmov insn.
14128 : c -- like C, but print reversed condition
14129 : F,f -- likewise, but for floating-point.
14130 : O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14131 : otherwise nothing
14132 : R -- print embedded rounding and sae.
14133 : r -- print only sae.
14134 : z -- print the opcode suffix for the size of the current operand.
14135 : Z -- likewise, with special suffixes for x87 instructions.
14136 : * -- print a star (in certain assembler syntax)
14137 : A -- print an absolute memory reference.
14138 : E -- print address with DImode register names if TARGET_64BIT.
14139 : w -- print the operand as if it's a "word" (HImode) even if it isn't.
14140 : s -- print a shift double count, followed by the assemblers argument
14141 : delimiter.
14142 : b -- print the QImode name of the register for the indicated operand.
14143 : %b0 would print %al if operands[0] is reg 0.
14144 : w -- likewise, print the HImode name of the register.
14145 : k -- likewise, print the SImode name of the register.
14146 : q -- likewise, print the DImode name of the register.
14147 : x -- likewise, print the V4SFmode name of the register.
14148 : t -- likewise, print the V8SFmode name of the register.
14149 : g -- likewise, print the V16SFmode name of the register.
14150 : h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14151 : y -- print "st(0)" instead of "st" as a register.
14152 : d -- print duplicated register operand for AVX instruction.
14153 : D -- print condition for SSE cmp instruction.
14154 : P -- if PIC, print an @PLT suffix. For -fno-plt, load function
14155 : address from GOT.
14156 : p -- print raw symbol name.
14157 : X -- don't print any sort of PIC '@' suffix for a symbol.
14158 : & -- print some in-use local-dynamic symbol name.
14159 : H -- print a memory address offset by 8; used for sse high-parts
14160 : Y -- print condition for XOP pcom* instruction.
14161 : V -- print naked full integer register name without %.
14162 : v -- print segment override prefix
14163 : + -- print a branch hint as 'cs' or 'ds' prefix
14164 : ; -- print a semicolon (after prefixes due to bug in older gas).
14165 : ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14166 : ^ -- print addr32 prefix if Pmode != word_mode
14167 : M -- print addr32 prefix for TARGET_X32 with VSIB address.
14168 : ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
14169 : N -- print maskz if it's constant 0 operand.
14170 : G -- print embedded flag for ccmp/ctest.
14171 : */
14172 :
14173 : void
14174 176569534 : ix86_print_operand (FILE *file, rtx x, int code)
14175 : {
14176 176759063 : if (code)
14177 : {
14178 61916564 : switch (code)
14179 : {
14180 189525 : case 'A':
14181 189525 : switch (ASSEMBLER_DIALECT)
14182 : {
14183 189525 : case ASM_ATT:
14184 189525 : putc ('*', file);
14185 189525 : break;
14186 :
14187 0 : case ASM_INTEL:
14188 : /* Intel syntax. For absolute addresses, registers should not
14189 : be surrounded by braces. */
14190 0 : if (!REG_P (x))
14191 : {
14192 0 : putc ('[', file);
14193 0 : ix86_print_operand (file, x, 0);
14194 0 : putc (']', file);
14195 0 : return;
14196 : }
14197 : break;
14198 :
14199 0 : default:
14200 0 : gcc_unreachable ();
14201 : }
14202 :
14203 189525 : ix86_print_operand (file, x, 0);
14204 189525 : return;
14205 :
14206 3529794 : case 'E':
14207 : /* Wrap address in an UNSPEC to declare special handling. */
14208 3529794 : if (TARGET_64BIT)
14209 3045556 : x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14210 :
14211 3529794 : output_address (VOIDmode, x);
14212 3529794 : return;
14213 :
14214 0 : case 'L':
14215 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14216 0 : putc ('l', file);
14217 0 : return;
14218 :
14219 0 : case 'W':
14220 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14221 0 : putc ('w', file);
14222 0 : return;
14223 :
14224 0 : case 'B':
14225 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14226 0 : putc ('b', file);
14227 0 : return;
14228 :
14229 0 : case 'Q':
14230 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14231 0 : putc ('l', file);
14232 0 : return;
14233 :
14234 0 : case 'S':
14235 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14236 0 : putc ('s', file);
14237 0 : return;
14238 :
14239 0 : case 'T':
14240 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14241 0 : putc ('t', file);
14242 0 : return;
14243 :
14244 : case 'O':
14245 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14246 : if (ASSEMBLER_DIALECT != ASM_ATT)
14247 : return;
14248 :
14249 : switch (GET_MODE_SIZE (GET_MODE (x)))
14250 : {
14251 : case 2:
14252 : putc ('w', file);
14253 : break;
14254 :
14255 : case 4:
14256 : putc ('l', file);
14257 : break;
14258 :
14259 : case 8:
14260 : putc ('q', file);
14261 : break;
14262 :
14263 : default:
14264 : output_operand_lossage ("invalid operand size for operand "
14265 : "code 'O'");
14266 : return;
14267 : }
14268 :
14269 : putc ('.', file);
14270 : #endif
14271 : return;
14272 :
14273 38044 : case 'z':
14274 38044 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14275 : {
14276 : /* Opcodes don't get size suffixes if using Intel opcodes. */
14277 38042 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14278 : return;
14279 :
14280 76084 : switch (GET_MODE_SIZE (GET_MODE (x)))
14281 : {
14282 6 : case 1:
14283 6 : putc ('b', file);
14284 6 : return;
14285 :
14286 6 : case 2:
14287 6 : putc ('w', file);
14288 6 : return;
14289 :
14290 37549 : case 4:
14291 37549 : putc ('l', file);
14292 37549 : return;
14293 :
14294 481 : case 8:
14295 481 : putc ('q', file);
14296 481 : return;
14297 :
14298 0 : default:
14299 0 : output_operand_lossage ("invalid operand size for operand "
14300 : "code 'z'");
14301 0 : return;
14302 : }
14303 : }
14304 :
14305 2 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14306 : {
14307 1 : if (this_is_asm_operands)
14308 1 : warning_for_asm (this_is_asm_operands,
14309 : "non-integer operand used with operand code %<z%>");
14310 : else
14311 0 : warning (0, "non-integer operand used with operand code %<z%>");
14312 : }
14313 : /* FALLTHRU */
14314 :
14315 378797 : case 'Z':
14316 : /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14317 378797 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14318 : return;
14319 :
14320 378797 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14321 : {
14322 29330 : switch (GET_MODE_SIZE (GET_MODE (x)))
14323 : {
14324 3525 : case 2:
14325 : #ifdef HAVE_AS_IX86_FILDS
14326 3525 : putc ('s', file);
14327 : #endif
14328 3525 : return;
14329 :
14330 3944 : case 4:
14331 3944 : putc ('l', file);
14332 3944 : return;
14333 :
14334 7196 : case 8:
14335 : #ifdef HAVE_AS_IX86_FILDQ
14336 7196 : putc ('q', file);
14337 : #else
14338 : fputs ("ll", file);
14339 : #endif
14340 7196 : return;
14341 :
14342 : default:
14343 : break;
14344 : }
14345 : }
14346 364132 : else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14347 : {
14348 : /* 387 opcodes don't get size suffixes
14349 : if the operands are registers. */
14350 364130 : if (STACK_REG_P (x))
14351 : return;
14352 :
14353 683708 : switch (GET_MODE_SIZE (GET_MODE (x)))
14354 : {
14355 23309 : case 4:
14356 23309 : putc ('s', file);
14357 23309 : return;
14358 :
14359 32699 : case 8:
14360 32699 : putc ('l', file);
14361 32699 : return;
14362 :
14363 285844 : case 12:
14364 285844 : case 16:
14365 285844 : putc ('t', file);
14366 285844 : return;
14367 :
14368 : default:
14369 : break;
14370 : }
14371 : }
14372 : else
14373 : {
14374 2 : output_operand_lossage ("invalid operand type used with "
14375 : "operand code '%c'", code);
14376 2 : return;
14377 : }
14378 :
14379 2 : output_operand_lossage ("invalid operand size for operand code '%c'",
14380 : code);
14381 2 : return;
14382 :
14383 : case 'd':
14384 : case 'b':
14385 : case 'w':
14386 : case 'k':
14387 : case 'q':
14388 : case 'h':
14389 : case 't':
14390 : case 'g':
14391 : case 'y':
14392 : case 'x':
14393 : case 'X':
14394 : case 'P':
14395 : case 'p':
14396 : case 'V':
14397 : break;
14398 :
14399 0 : case 's':
14400 0 : if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14401 : {
14402 0 : ix86_print_operand (file, x, 0);
14403 0 : fputs (", ", file);
14404 : }
14405 0 : return;
14406 :
14407 494 : case 'Y':
14408 494 : switch (GET_CODE (x))
14409 : {
14410 182 : case NE:
14411 182 : fputs ("neq", file);
14412 182 : break;
14413 32 : case EQ:
14414 32 : fputs ("eq", file);
14415 32 : break;
14416 64 : case GE:
14417 64 : case GEU:
14418 64 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14419 64 : break;
14420 40 : case GT:
14421 40 : case GTU:
14422 40 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14423 40 : break;
14424 64 : case LE:
14425 64 : case LEU:
14426 64 : fputs ("le", file);
14427 64 : break;
14428 112 : case LT:
14429 112 : case LTU:
14430 112 : fputs ("lt", file);
14431 112 : break;
14432 0 : case UNORDERED:
14433 0 : fputs ("unord", file);
14434 0 : break;
14435 0 : case ORDERED:
14436 0 : fputs ("ord", file);
14437 0 : break;
14438 0 : case UNEQ:
14439 0 : fputs ("ueq", file);
14440 0 : break;
14441 0 : case UNGE:
14442 0 : fputs ("nlt", file);
14443 0 : break;
14444 0 : case UNGT:
14445 0 : fputs ("nle", file);
14446 0 : break;
14447 0 : case UNLE:
14448 0 : fputs ("ule", file);
14449 0 : break;
14450 0 : case UNLT:
14451 0 : fputs ("ult", file);
14452 0 : break;
14453 0 : case LTGT:
14454 0 : fputs ("une", file);
14455 0 : break;
14456 0 : default:
14457 0 : output_operand_lossage ("operand is not a condition code, "
14458 : "invalid operand code 'Y'");
14459 0 : return;
14460 : }
14461 494 : return;
14462 :
14463 9339 : case 'D':
14464 : /* Little bit of braindamage here. The SSE compare instructions
14465 : does use completely different names for the comparisons that the
14466 : fp conditional moves. */
14467 9339 : switch (GET_CODE (x))
14468 : {
14469 3 : case UNEQ:
14470 3 : if (TARGET_AVX)
14471 : {
14472 3 : fputs ("eq_us", file);
14473 3 : break;
14474 : }
14475 : /* FALLTHRU */
14476 4635 : case EQ:
14477 4635 : fputs ("eq", file);
14478 4635 : break;
14479 0 : case UNLT:
14480 0 : if (TARGET_AVX)
14481 : {
14482 0 : fputs ("nge", file);
14483 0 : break;
14484 : }
14485 : /* FALLTHRU */
14486 1637 : case LT:
14487 1637 : fputs ("lt", file);
14488 1637 : break;
14489 0 : case UNLE:
14490 0 : if (TARGET_AVX)
14491 : {
14492 0 : fputs ("ngt", file);
14493 0 : break;
14494 : }
14495 : /* FALLTHRU */
14496 795 : case LE:
14497 795 : fputs ("le", file);
14498 795 : break;
14499 95 : case UNORDERED:
14500 95 : fputs ("unord", file);
14501 95 : break;
14502 24 : case LTGT:
14503 24 : if (TARGET_AVX)
14504 : {
14505 24 : fputs ("neq_oq", file);
14506 24 : break;
14507 : }
14508 : /* FALLTHRU */
14509 898 : case NE:
14510 898 : fputs ("neq", file);
14511 898 : break;
14512 0 : case GE:
14513 0 : if (TARGET_AVX)
14514 : {
14515 0 : fputs ("ge", file);
14516 0 : break;
14517 : }
14518 : /* FALLTHRU */
14519 402 : case UNGE:
14520 402 : fputs ("nlt", file);
14521 402 : break;
14522 0 : case GT:
14523 0 : if (TARGET_AVX)
14524 : {
14525 0 : fputs ("gt", file);
14526 0 : break;
14527 : }
14528 : /* FALLTHRU */
14529 767 : case UNGT:
14530 767 : fputs ("nle", file);
14531 767 : break;
14532 83 : case ORDERED:
14533 83 : fputs ("ord", file);
14534 83 : break;
14535 0 : default:
14536 0 : output_operand_lossage ("operand is not a condition code, "
14537 : "invalid operand code 'D'");
14538 0 : return;
14539 : }
14540 9339 : return;
14541 :
14542 7106781 : case 'F':
14543 7106781 : case 'f':
14544 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14545 : if (ASSEMBLER_DIALECT == ASM_ATT)
14546 : putc ('.', file);
14547 : gcc_fallthrough ();
14548 : #endif
14549 :
14550 7106781 : case 'C':
14551 7106781 : case 'c':
14552 7106781 : if (!COMPARISON_P (x))
14553 : {
14554 0 : output_operand_lossage ("operand is not a condition code, "
14555 : "invalid operand code '%c'", code);
14556 0 : return;
14557 : }
14558 7106781 : put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14559 7106781 : code == 'c' || code == 'f',
14560 7106781 : code == 'F' || code == 'f',
14561 : file);
14562 7106781 : return;
14563 :
14564 21 : case 'G':
14565 21 : {
14566 21 : int dfv = INTVAL (x);
14567 21 : const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14568 21 : fputs (dfv_suffix, file);
14569 : }
14570 21 : return;
14571 :
14572 1286 : case 'H':
14573 1286 : if (!offsettable_memref_p (x))
14574 : {
14575 1 : output_operand_lossage ("operand is not an offsettable memory "
14576 : "reference, invalid operand code 'H'");
14577 1 : return;
14578 : }
14579 : /* It doesn't actually matter what mode we use here, as we're
14580 : only going to use this for printing. */
14581 1285 : x = adjust_address_nv (x, DImode, 8);
14582 : /* Output 'qword ptr' for intel assembler dialect. */
14583 1285 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14584 0 : code = 'q';
14585 : break;
14586 :
14587 75584 : case 'K':
14588 75584 : if (!CONST_INT_P (x))
14589 : {
14590 1 : output_operand_lossage ("operand is not an integer, invalid "
14591 : "operand code 'K'");
14592 1 : return;
14593 : }
14594 :
14595 75583 : if (INTVAL (x) & IX86_HLE_ACQUIRE)
14596 : #ifdef HAVE_AS_IX86_HLE
14597 22 : fputs ("xacquire ", file);
14598 : #else
14599 : fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14600 : #endif
14601 75561 : else if (INTVAL (x) & IX86_HLE_RELEASE)
14602 : #ifdef HAVE_AS_IX86_HLE
14603 24 : fputs ("xrelease ", file);
14604 : #else
14605 : fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14606 : #endif
14607 : /* We do not want to print value of the operand. */
14608 75583 : return;
14609 :
14610 43238 : case 'N':
14611 43238 : if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14612 15513 : fputs ("{z}", file);
14613 43238 : return;
14614 :
14615 4117 : case 'r':
14616 4117 : if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14617 : {
14618 2 : output_operand_lossage ("operand is not a specific integer, "
14619 : "invalid operand code 'r'");
14620 2 : return;
14621 : }
14622 :
14623 4115 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14624 1 : fputs (", ", file);
14625 :
14626 4115 : fputs ("{sae}", file);
14627 :
14628 4115 : if (ASSEMBLER_DIALECT == ASM_ATT)
14629 4114 : fputs (", ", file);
14630 :
14631 4115 : return;
14632 :
14633 6097 : case 'R':
14634 6097 : if (!CONST_INT_P (x))
14635 : {
14636 1 : output_operand_lossage ("operand is not an integer, invalid "
14637 : "operand code 'R'");
14638 1 : return;
14639 : }
14640 :
14641 6096 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14642 6 : fputs (", ", file);
14643 :
14644 6096 : switch (INTVAL (x))
14645 : {
14646 5281 : case ROUND_NEAREST_INT | ROUND_SAE:
14647 5281 : fputs ("{rn-sae}", file);
14648 5281 : break;
14649 637 : case ROUND_NEG_INF | ROUND_SAE:
14650 637 : fputs ("{rd-sae}", file);
14651 637 : break;
14652 56 : case ROUND_POS_INF | ROUND_SAE:
14653 56 : fputs ("{ru-sae}", file);
14654 56 : break;
14655 121 : case ROUND_ZERO | ROUND_SAE:
14656 121 : fputs ("{rz-sae}", file);
14657 121 : break;
14658 1 : default:
14659 1 : output_operand_lossage ("operand is not a specific integer, "
14660 : "invalid operand code 'R'");
14661 : }
14662 :
14663 6096 : if (ASSEMBLER_DIALECT == ASM_ATT)
14664 6090 : fputs (", ", file);
14665 :
14666 6096 : return;
14667 :
14668 10281 : case 'v':
14669 10281 : if (MEM_P (x))
14670 : {
14671 10400 : switch (MEM_ADDR_SPACE (x))
14672 : {
14673 : case ADDR_SPACE_GENERIC:
14674 : break;
14675 0 : case ADDR_SPACE_SEG_FS:
14676 0 : fputs ("fs ", file);
14677 0 : break;
14678 0 : case ADDR_SPACE_SEG_GS:
14679 0 : fputs ("gs ", file);
14680 0 : break;
14681 0 : default:
14682 0 : gcc_unreachable ();
14683 : }
14684 : }
14685 : else
14686 0 : output_operand_lossage ("operand is not a memory reference, "
14687 : "invalid operand code 'v'");
14688 10281 : return;
14689 :
14690 0 : case '*':
14691 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14692 0 : putc ('*', file);
14693 0 : return;
14694 :
14695 202 : case '&':
14696 202 : {
14697 202 : const char *name = get_some_local_dynamic_name ();
14698 202 : if (name == NULL)
14699 1 : output_operand_lossage ("'%%&' used without any "
14700 : "local dynamic TLS references");
14701 : else
14702 201 : assemble_name (file, name);
14703 202 : return;
14704 : }
14705 :
14706 6457293 : case '+':
14707 6457293 : {
14708 6457293 : rtx x;
14709 :
14710 6457293 : if (!optimize
14711 5031381 : || optimize_function_for_size_p (cfun)
14712 11300134 : || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14713 4842841 : && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14714 6457293 : return;
14715 :
14716 0 : x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14717 0 : if (x)
14718 : {
14719 0 : int pred_val = profile_probability::from_reg_br_prob_note
14720 0 : (XINT (x, 0)).to_reg_br_prob_base ();
14721 :
14722 0 : bool taken = pred_val > REG_BR_PROB_BASE / 2;
14723 : /* We use 3e (DS) prefix for taken branches and
14724 : 2e (CS) prefix for not taken branches. */
14725 0 : if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14726 0 : fputs ("ds ; ", file);
14727 0 : else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14728 0 : fputs ("cs ; ", file);
14729 : }
14730 0 : return;
14731 : }
14732 :
14733 : case ';':
14734 : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14735 : putc (';', file);
14736 : #endif
14737 : return;
14738 :
14739 3267 : case '~':
14740 3267 : putc (TARGET_AVX2 ? 'i' : 'f', file);
14741 3267 : return;
14742 :
14743 1675 : case 'M':
14744 1675 : if (TARGET_X32)
14745 : {
14746 : /* NB: 32-bit indices in VSIB address are sign-extended
14747 : to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14748 : sign-extended to 0xfffffffff7fa3010 which is invalid
14749 : address. Add addr32 prefix if there is no base
14750 : register nor symbol. */
14751 40 : bool ok;
14752 40 : struct ix86_address parts;
14753 40 : ok = ix86_decompose_address (x, &parts);
14754 40 : gcc_assert (ok && parts.index == NULL_RTX);
14755 40 : if (parts.base == NULL_RTX
14756 40 : && (parts.disp == NULL_RTX
14757 34 : || !symbolic_operand (parts.disp,
14758 34 : GET_MODE (parts.disp))))
14759 34 : fputs ("addr32 ", file);
14760 : }
14761 1675 : return;
14762 :
14763 22156 : case '^':
14764 25346 : if (Pmode != word_mode)
14765 0 : fputs ("addr32 ", file);
14766 22156 : return;
14767 :
14768 14773479 : case '!':
14769 14773479 : if (ix86_notrack_prefixed_insn_p (current_output_insn))
14770 4579 : fputs ("notrack ", file);
14771 14773479 : return;
14772 :
14773 1 : default:
14774 1 : output_operand_lossage ("invalid operand code '%c'", code);
14775 : }
14776 : }
14777 :
14778 143707757 : if (REG_P (x))
14779 85613092 : print_reg (x, code, file);
14780 :
14781 58094665 : else if (MEM_P (x))
14782 : {
14783 33274130 : rtx addr = XEXP (x, 0);
14784 :
14785 : /* No `byte ptr' prefix for call instructions ... */
14786 33274130 : if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14787 : {
14788 321 : machine_mode mode = GET_MODE (x);
14789 321 : const char *size;
14790 :
14791 : /* Check for explicit size override codes. */
14792 321 : if (code == 'b')
14793 : size = "BYTE";
14794 : else if (code == 'w')
14795 : size = "WORD";
14796 : else if (code == 'k')
14797 : size = "DWORD";
14798 : else if (code == 'q')
14799 : size = "QWORD";
14800 : else if (code == 'x')
14801 : size = "XMMWORD";
14802 : else if (code == 't')
14803 : size = "YMMWORD";
14804 : else if (code == 'g')
14805 : size = "ZMMWORD";
14806 235 : else if (mode == BLKmode)
14807 : /* ... or BLKmode operands, when not overridden. */
14808 : size = NULL;
14809 : else
14810 466 : switch (GET_MODE_SIZE (mode))
14811 : {
14812 : case 1: size = "BYTE"; break;
14813 : case 2: size = "WORD"; break;
14814 : case 4: size = "DWORD"; break;
14815 : case 8: size = "QWORD"; break;
14816 : case 12: size = "TBYTE"; break;
14817 7 : case 16:
14818 7 : if (mode == XFmode)
14819 : size = "TBYTE";
14820 : else
14821 : size = "XMMWORD";
14822 : break;
14823 : case 32: size = "YMMWORD"; break;
14824 : case 64: size = "ZMMWORD"; break;
14825 0 : default:
14826 0 : gcc_unreachable ();
14827 : }
14828 : if (size)
14829 : {
14830 319 : fputs (size, file);
14831 319 : fputs (" PTR ", file);
14832 : }
14833 : }
14834 :
14835 33274130 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14836 0 : output_operand_lossage ("invalid constraints for operand");
14837 : else
14838 33274130 : ix86_print_operand_address_as
14839 33910120 : (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14840 : }
14841 :
14842 24820535 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14843 : {
14844 767 : long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14845 767 : REAL_MODE_FORMAT (HFmode));
14846 767 : if (ASSEMBLER_DIALECT == ASM_ATT)
14847 767 : putc ('$', file);
14848 767 : fprintf (file, "0x%04x", (unsigned int) l);
14849 767 : }
14850 :
14851 24819768 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14852 : {
14853 20697 : long l;
14854 :
14855 20697 : REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14856 :
14857 20697 : if (ASSEMBLER_DIALECT == ASM_ATT)
14858 20697 : putc ('$', file);
14859 : /* Sign extend 32bit SFmode immediate to 8 bytes. */
14860 20697 : if (code == 'q')
14861 327 : fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14862 : (unsigned long long) (int) l);
14863 : else
14864 20370 : fprintf (file, "0x%08x", (unsigned int) l);
14865 : }
14866 :
14867 24799071 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14868 : {
14869 3281 : long l[2];
14870 :
14871 3281 : REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14872 :
14873 3281 : if (ASSEMBLER_DIALECT == ASM_ATT)
14874 3281 : putc ('$', file);
14875 3281 : fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14876 3281 : }
14877 :
14878 : /* These float cases don't actually occur as immediate operands. */
14879 24795790 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14880 : {
14881 0 : char dstr[30];
14882 :
14883 0 : real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14884 0 : fputs (dstr, file);
14885 0 : }
14886 :
14887 : /* Print bcst_mem_operand. */
14888 24795790 : else if (GET_CODE (x) == VEC_DUPLICATE)
14889 : {
14890 314 : machine_mode vmode = GET_MODE (x);
14891 : /* Must be bcst_memory_operand. */
14892 314 : gcc_assert (bcst_mem_operand (x, vmode));
14893 :
14894 314 : rtx mem = XEXP (x,0);
14895 314 : ix86_print_operand (file, mem, 0);
14896 :
14897 314 : switch (vmode)
14898 : {
14899 28 : case E_V2DImode:
14900 28 : case E_V2DFmode:
14901 28 : fputs ("{1to2}", file);
14902 28 : break;
14903 74 : case E_V4SImode:
14904 74 : case E_V4SFmode:
14905 74 : case E_V4DImode:
14906 74 : case E_V4DFmode:
14907 74 : fputs ("{1to4}", file);
14908 74 : break;
14909 94 : case E_V8SImode:
14910 94 : case E_V8SFmode:
14911 94 : case E_V8DFmode:
14912 94 : case E_V8DImode:
14913 94 : case E_V8HFmode:
14914 94 : fputs ("{1to8}", file);
14915 94 : break;
14916 110 : case E_V16SFmode:
14917 110 : case E_V16SImode:
14918 110 : case E_V16HFmode:
14919 110 : fputs ("{1to16}", file);
14920 110 : break;
14921 8 : case E_V32HFmode:
14922 8 : fputs ("{1to32}", file);
14923 8 : break;
14924 0 : default:
14925 0 : gcc_unreachable ();
14926 : }
14927 : }
14928 :
14929 : else
14930 : {
14931 : /* We have patterns that allow zero sets of memory, for instance.
14932 : In 64-bit mode, we should probably support all 8-byte vectors,
14933 : since we can in fact encode that into an immediate. */
14934 24795476 : if (CONST_VECTOR_P (x))
14935 : {
14936 3258 : if (x != CONST0_RTX (GET_MODE (x)))
14937 2 : output_operand_lossage ("invalid vector immediate");
14938 3258 : x = const0_rtx;
14939 : }
14940 :
14941 24795476 : if (code == 'P')
14942 : {
14943 5912944 : if (ix86_force_load_from_GOT_p (x, true))
14944 : {
14945 : /* For inline assembly statement, load function address
14946 : from GOT with 'P' operand modifier to avoid PLT. */
14947 4 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14948 : (TARGET_64BIT
14949 : ? UNSPEC_GOTPCREL
14950 : : UNSPEC_GOT));
14951 4 : x = gen_rtx_CONST (Pmode, x);
14952 4 : x = gen_const_mem (Pmode, x);
14953 4 : ix86_print_operand (file, x, 'A');
14954 4 : return;
14955 : }
14956 : }
14957 18882532 : else if (code != 'p')
14958 : {
14959 18882423 : if (CONST_INT_P (x))
14960 : {
14961 15599638 : if (ASSEMBLER_DIALECT == ASM_ATT)
14962 15599410 : putc ('$', file);
14963 : }
14964 3282785 : else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
14965 9409 : || LABEL_REF_P (x))
14966 : {
14967 3282783 : if (ASSEMBLER_DIALECT == ASM_ATT)
14968 3282759 : putc ('$', file);
14969 : else
14970 24 : fputs ("OFFSET FLAT:", file);
14971 : }
14972 : }
14973 24795472 : if (CONST_INT_P (x))
14974 15599724 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14975 9195748 : else if (flag_pic || MACHOPIC_INDIRECT)
14976 522767 : output_pic_addr_const (file, x, code);
14977 : else
14978 8672981 : output_addr_const (file, x);
14979 : }
14980 : }
14981 :
14982 : static bool
14983 21336742 : ix86_print_operand_punct_valid_p (unsigned char code)
14984 : {
14985 21336742 : return (code == '*' || code == '+' || code == '&' || code == ';'
14986 14795635 : || code == '~' || code == '^' || code == '!');
14987 : }
14988 :
14989 : /* Print a memory operand whose address is ADDR. */
14990 :
14991 : static void
14992 36806190 : ix86_print_operand_address_as (FILE *file, rtx addr,
14993 : addr_space_t as, bool raw)
14994 : {
14995 36806190 : struct ix86_address parts;
14996 36806190 : rtx base, index, disp;
14997 36806190 : int scale;
14998 36806190 : int ok;
14999 36806190 : bool vsib = false;
15000 36806190 : int code = 0;
15001 :
15002 36806190 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15003 : {
15004 1675 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15005 1675 : gcc_assert (parts.index == NULL_RTX);
15006 1675 : parts.index = XVECEXP (addr, 0, 1);
15007 1675 : parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15008 1675 : addr = XVECEXP (addr, 0, 0);
15009 1675 : vsib = true;
15010 : }
15011 36804515 : else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15012 : {
15013 3045556 : gcc_assert (TARGET_64BIT);
15014 3045556 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15015 3045556 : code = 'q';
15016 : }
15017 : else
15018 33758959 : ok = ix86_decompose_address (addr, &parts);
15019 :
15020 36806190 : gcc_assert (ok);
15021 :
15022 36806190 : base = parts.base;
15023 36806190 : index = parts.index;
15024 36806190 : disp = parts.disp;
15025 36806190 : scale = parts.scale;
15026 :
15027 36806190 : if (ADDR_SPACE_GENERIC_P (as))
15028 36524289 : as = parts.seg;
15029 : else
15030 281901 : gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
15031 :
15032 36806190 : if (!ADDR_SPACE_GENERIC_P (as) && !raw)
15033 : {
15034 281916 : if (ASSEMBLER_DIALECT == ASM_ATT)
15035 281914 : putc ('%', file);
15036 :
15037 281916 : switch (as)
15038 : {
15039 182225 : case ADDR_SPACE_SEG_FS:
15040 182225 : fputs ("fs:", file);
15041 182225 : break;
15042 99691 : case ADDR_SPACE_SEG_GS:
15043 99691 : fputs ("gs:", file);
15044 99691 : break;
15045 0 : default:
15046 0 : gcc_unreachable ();
15047 : }
15048 : }
15049 :
15050 : /* Use one byte shorter RIP relative addressing for 64bit mode. */
15051 36806190 : if (TARGET_64BIT && !base && !index && !raw)
15052 : {
15053 5994273 : rtx symbol = disp;
15054 :
15055 5994273 : if (GET_CODE (disp) == CONST
15056 2173578 : && GET_CODE (XEXP (disp, 0)) == PLUS
15057 2088445 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15058 2088445 : symbol = XEXP (XEXP (disp, 0), 0);
15059 :
15060 5994273 : if (LABEL_REF_P (symbol)
15061 5994273 : || (SYMBOL_REF_P (symbol)
15062 5732831 : && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15063 5732733 : base = pc_rtx;
15064 : }
15065 :
15066 36806190 : if (!base && !index)
15067 : {
15068 : /* Displacement only requires special attention. */
15069 601147 : if (CONST_INT_P (disp))
15070 : {
15071 269387 : if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
15072 0 : fputs ("ds:", file);
15073 269387 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15074 : }
15075 : /* Load the external function address via the GOT slot to avoid PLT. */
15076 331760 : else if (GET_CODE (disp) == CONST
15077 113436 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
15078 85372 : && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
15079 9640 : || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
15080 407492 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15081 24 : output_pic_addr_const (file, disp, 0);
15082 331736 : else if (flag_pic)
15083 114778 : output_pic_addr_const (file, disp, 0);
15084 : else
15085 216958 : output_addr_const (file, disp);
15086 : }
15087 : else
15088 : {
15089 : /* Print SImode register names to force addr32 prefix. */
15090 36205043 : if (SImode_address_operand (addr, VOIDmode))
15091 : {
15092 37 : if (flag_checking)
15093 : {
15094 37 : gcc_assert (TARGET_64BIT);
15095 37 : switch (GET_CODE (addr))
15096 : {
15097 0 : case SUBREG:
15098 0 : gcc_assert (GET_MODE (addr) == SImode);
15099 0 : gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15100 : break;
15101 37 : case ZERO_EXTEND:
15102 37 : case AND:
15103 37 : gcc_assert (GET_MODE (addr) == DImode);
15104 : break;
15105 0 : default:
15106 0 : gcc_unreachable ();
15107 : }
15108 : }
15109 37 : gcc_assert (!code);
15110 : code = 'k';
15111 : }
15112 36205006 : else if (code == 0
15113 33161087 : && TARGET_X32
15114 482 : && disp
15115 410 : && CONST_INT_P (disp)
15116 311 : && INTVAL (disp) < -16*1024*1024)
15117 : {
15118 : /* X32 runs in 64-bit mode, where displacement, DISP, in
15119 : address DISP(%r64), is encoded as 32-bit immediate sign-
15120 : extended from 32-bit to 64-bit. For -0x40000300(%r64),
15121 : address is %r64 + 0xffffffffbffffd00. When %r64 <
15122 : 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15123 : which is invalid for x32. The correct address is %r64
15124 : - 0x40000300 == 0xf7ffdd64. To properly encode
15125 : -0x40000300(%r64) for x32, we zero-extend negative
15126 : displacement by forcing addr32 prefix which truncates
15127 : 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15128 : zero-extend all negative displacements, including -1(%rsp).
15129 : However, for small negative displacements, sign-extension
15130 : won't cause overflow. We only zero-extend negative
15131 : displacements if they < -16*1024*1024, which is also used
15132 : to check legitimate address displacements for PIC. */
15133 38 : code = 'k';
15134 : }
15135 :
15136 : /* Since the upper 32 bits of RSP are always zero for x32,
15137 : we can encode %esp as %rsp to avoid 0x67 prefix if
15138 : there is no index register. */
15139 976 : if (TARGET_X32 && Pmode == SImode
15140 36205447 : && !index && base && REG_P (base) && REGNO (base) == SP_REG)
15141 : code = 'q';
15142 :
15143 36205043 : if (ASSEMBLER_DIALECT == ASM_ATT)
15144 : {
15145 36204673 : if (disp)
15146 : {
15147 32151519 : if (flag_pic)
15148 2823696 : output_pic_addr_const (file, disp, 0);
15149 29327823 : else if (LABEL_REF_P (disp))
15150 5907 : output_asm_label (disp);
15151 : else
15152 29321916 : output_addr_const (file, disp);
15153 : }
15154 :
15155 36204673 : putc ('(', file);
15156 36204673 : if (base)
15157 35785886 : print_reg (base, code, file);
15158 36204673 : if (index)
15159 : {
15160 1930733 : putc (',', file);
15161 3859839 : print_reg (index, vsib ? 0 : code, file);
15162 1930733 : if (scale != 1 || vsib)
15163 1029121 : fprintf (file, ",%d", scale);
15164 : }
15165 36204673 : putc (')', file);
15166 : }
15167 : else
15168 : {
15169 370 : rtx offset = NULL_RTX;
15170 :
15171 370 : if (disp)
15172 : {
15173 : /* Pull out the offset of a symbol; print any symbol itself. */
15174 290 : if (GET_CODE (disp) == CONST
15175 18 : && GET_CODE (XEXP (disp, 0)) == PLUS
15176 18 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15177 : {
15178 18 : offset = XEXP (XEXP (disp, 0), 1);
15179 18 : disp = gen_rtx_CONST (VOIDmode,
15180 : XEXP (XEXP (disp, 0), 0));
15181 : }
15182 :
15183 290 : if (flag_pic)
15184 0 : output_pic_addr_const (file, disp, 0);
15185 290 : else if (LABEL_REF_P (disp))
15186 0 : output_asm_label (disp);
15187 290 : else if (CONST_INT_P (disp))
15188 : offset = disp;
15189 : else
15190 123 : output_addr_const (file, disp);
15191 : }
15192 :
15193 370 : putc ('[', file);
15194 370 : if (base)
15195 : {
15196 329 : print_reg (base, code, file);
15197 329 : if (offset)
15198 : {
15199 185 : if (INTVAL (offset) >= 0)
15200 20 : putc ('+', file);
15201 185 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15202 : }
15203 : }
15204 41 : else if (offset)
15205 0 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15206 : else
15207 41 : putc ('0', file);
15208 :
15209 370 : if (index)
15210 : {
15211 94 : putc ('+', file);
15212 140 : print_reg (index, vsib ? 0 : code, file);
15213 94 : if (scale != 1 || vsib)
15214 92 : fprintf (file, "*%d", scale);
15215 : }
15216 370 : putc (']', file);
15217 : }
15218 : }
15219 36806190 : }
15220 :
15221 : static void
15222 3532061 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
15223 : {
15224 3532061 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
15225 1 : output_operand_lossage ("invalid constraints for operand");
15226 : else
15227 3532060 : ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
15228 3532061 : }
15229 :
15230 : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15231 :
15232 : static bool
15233 15451 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
15234 : {
15235 15451 : rtx op;
15236 :
15237 15451 : if (GET_CODE (x) != UNSPEC)
15238 : return false;
15239 :
15240 15451 : op = XVECEXP (x, 0, 0);
15241 15451 : switch (XINT (x, 1))
15242 : {
15243 1357 : case UNSPEC_GOTOFF:
15244 1357 : output_addr_const (file, op);
15245 1357 : fputs ("@gotoff", file);
15246 1357 : break;
15247 0 : case UNSPEC_GOTTPOFF:
15248 0 : output_addr_const (file, op);
15249 : /* FIXME: This might be @TPOFF in Sun ld. */
15250 0 : fputs ("@gottpoff", file);
15251 0 : break;
15252 0 : case UNSPEC_TPOFF:
15253 0 : output_addr_const (file, op);
15254 0 : fputs ("@tpoff", file);
15255 0 : break;
15256 10998 : case UNSPEC_NTPOFF:
15257 10998 : output_addr_const (file, op);
15258 10998 : if (TARGET_64BIT)
15259 10249 : fputs ("@tpoff", file);
15260 : else
15261 749 : fputs ("@ntpoff", file);
15262 : break;
15263 0 : case UNSPEC_DTPOFF:
15264 0 : output_addr_const (file, op);
15265 0 : fputs ("@dtpoff", file);
15266 0 : break;
15267 3095 : case UNSPEC_GOTNTPOFF:
15268 3095 : output_addr_const (file, op);
15269 3095 : if (TARGET_64BIT)
15270 3095 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15271 : "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15272 : else
15273 0 : fputs ("@gotntpoff", file);
15274 : break;
15275 1 : case UNSPEC_INDNTPOFF:
15276 1 : output_addr_const (file, op);
15277 1 : fputs ("@indntpoff", file);
15278 1 : break;
15279 0 : case UNSPEC_SECREL32:
15280 0 : output_addr_const (file, op);
15281 0 : fputs ("@secrel32", file);
15282 0 : break;
15283 : #if TARGET_MACHO
15284 : case UNSPEC_MACHOPIC_OFFSET:
15285 : output_addr_const (file, op);
15286 : putc ('-', file);
15287 : machopic_output_function_base_name (file);
15288 : break;
15289 : #endif
15290 :
15291 : default:
15292 : return false;
15293 : }
15294 :
15295 : return true;
15296 : }
15297 :
15298 :
15299 : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15300 : MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15301 : is the expression of the binary operation. The output may either be
15302 : emitted here, or returned to the caller, like all output_* functions.
15303 :
15304 : There is no guarantee that the operands are the same mode, as they
15305 : might be within FLOAT or FLOAT_EXTEND expressions. */
15306 :
15307 : #ifndef SYSV386_COMPAT
15308 : /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15309 : wants to fix the assemblers because that causes incompatibility
15310 : with gcc. No-one wants to fix gcc because that causes
15311 : incompatibility with assemblers... You can use the option of
15312 : -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15313 : #define SYSV386_COMPAT 1
15314 : #endif
15315 :
15316 : const char *
15317 606259 : output_387_binary_op (rtx_insn *insn, rtx *operands)
15318 : {
15319 606259 : static char buf[40];
15320 606259 : const char *p;
15321 606259 : bool is_sse
15322 606259 : = (SSE_REG_P (operands[0])
15323 661468 : || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
15324 :
15325 55209 : if (is_sse)
15326 : p = "%v";
15327 55209 : else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15328 55202 : || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15329 : p = "fi";
15330 : else
15331 606259 : p = "f";
15332 :
15333 606259 : strcpy (buf, p);
15334 :
15335 606259 : switch (GET_CODE (operands[3]))
15336 : {
15337 : case PLUS:
15338 : p = "add"; break;
15339 : case MINUS:
15340 : p = "sub"; break;
15341 94192 : case MULT:
15342 94192 : p = "mul"; break;
15343 27615 : case DIV:
15344 27615 : p = "div"; break;
15345 0 : default:
15346 0 : gcc_unreachable ();
15347 : }
15348 :
15349 606259 : strcat (buf, p);
15350 :
15351 606259 : if (is_sse)
15352 : {
15353 551050 : p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
15354 551050 : strcat (buf, p);
15355 :
15356 551050 : if (TARGET_AVX)
15357 : p = "\t{%2, %1, %0|%0, %1, %2}";
15358 : else
15359 534847 : p = "\t{%2, %0|%0, %2}";
15360 :
15361 551050 : strcat (buf, p);
15362 551050 : return buf;
15363 : }
15364 :
15365 : /* Even if we do not want to check the inputs, this documents input
15366 : constraints. Which helps in understanding the following code. */
15367 55209 : if (flag_checking)
15368 : {
15369 55208 : if (STACK_REG_P (operands[0])
15370 55208 : && ((REG_P (operands[1])
15371 53633 : && REGNO (operands[0]) == REGNO (operands[1])
15372 49645 : && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15373 5563 : || (REG_P (operands[2])
15374 5563 : && REGNO (operands[0]) == REGNO (operands[2])
15375 5563 : && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15376 110416 : && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15377 : ; /* ok */
15378 : else
15379 0 : gcc_unreachable ();
15380 : }
15381 :
15382 55209 : switch (GET_CODE (operands[3]))
15383 : {
15384 40398 : case MULT:
15385 40398 : case PLUS:
15386 40398 : if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15387 1984 : std::swap (operands[1], operands[2]);
15388 :
15389 : /* know operands[0] == operands[1]. */
15390 :
15391 40398 : if (MEM_P (operands[2]))
15392 : {
15393 : p = "%Z2\t%2";
15394 : break;
15395 : }
15396 :
15397 36040 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15398 : {
15399 21069 : if (STACK_TOP_P (operands[0]))
15400 : /* How is it that we are storing to a dead operand[2]?
15401 : Well, presumably operands[1] is dead too. We can't
15402 : store the result to st(0) as st(0) gets popped on this
15403 : instruction. Instead store to operands[2] (which I
15404 : think has to be st(1)). st(1) will be popped later.
15405 : gcc <= 2.8.1 didn't have this check and generated
15406 : assembly code that the Unixware assembler rejected. */
15407 : p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15408 : else
15409 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15410 : break;
15411 : }
15412 :
15413 14971 : if (STACK_TOP_P (operands[0]))
15414 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15415 : else
15416 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15417 : break;
15418 :
15419 14811 : case MINUS:
15420 14811 : case DIV:
15421 14811 : if (MEM_P (operands[1]))
15422 : {
15423 : p = "r%Z1\t%1";
15424 : break;
15425 : }
15426 :
15427 14375 : if (MEM_P (operands[2]))
15428 : {
15429 : p = "%Z2\t%2";
15430 : break;
15431 : }
15432 :
15433 12765 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15434 : {
15435 : #if SYSV386_COMPAT
15436 : /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15437 : derived assemblers, confusingly reverse the direction of
15438 : the operation for fsub{r} and fdiv{r} when the
15439 : destination register is not st(0). The Intel assembler
15440 : doesn't have this brain damage. Read !SYSV386_COMPAT to
15441 : figure out what the hardware really does. */
15442 6179 : if (STACK_TOP_P (operands[0]))
15443 : p = "{p\t%0, %2|rp\t%2, %0}";
15444 : else
15445 : p = "{rp\t%2, %0|p\t%0, %2}";
15446 : #else
15447 : if (STACK_TOP_P (operands[0]))
15448 : /* As above for fmul/fadd, we can't store to st(0). */
15449 : p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15450 : else
15451 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15452 : #endif
15453 : break;
15454 : }
15455 :
15456 6586 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15457 : {
15458 : #if SYSV386_COMPAT
15459 3074 : if (STACK_TOP_P (operands[0]))
15460 : p = "{rp\t%0, %1|p\t%1, %0}";
15461 : else
15462 : p = "{p\t%1, %0|rp\t%0, %1}";
15463 : #else
15464 : if (STACK_TOP_P (operands[0]))
15465 : p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15466 : else
15467 : p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15468 : #endif
15469 : break;
15470 : }
15471 :
15472 3512 : if (STACK_TOP_P (operands[0]))
15473 : {
15474 2670 : if (STACK_TOP_P (operands[1]))
15475 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15476 : else
15477 : p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15478 : break;
15479 : }
15480 842 : else if (STACK_TOP_P (operands[1]))
15481 : {
15482 : #if SYSV386_COMPAT
15483 : p = "{\t%1, %0|r\t%0, %1}";
15484 : #else
15485 : p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15486 : #endif
15487 : }
15488 : else
15489 : {
15490 : #if SYSV386_COMPAT
15491 : p = "{r\t%2, %0|\t%0, %2}";
15492 : #else
15493 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15494 : #endif
15495 : }
15496 : break;
15497 :
15498 0 : default:
15499 0 : gcc_unreachable ();
15500 : }
15501 :
15502 55209 : strcat (buf, p);
15503 55209 : return buf;
15504 : }
15505 :
15506 : /* Return needed mode for entity in optimize_mode_switching pass. */
15507 :
15508 : static int
15509 1656 : ix86_dirflag_mode_needed (rtx_insn *insn)
15510 : {
15511 1656 : if (CALL_P (insn))
15512 : {
15513 339 : if (cfun->machine->func_type == TYPE_NORMAL)
15514 : return X86_DIRFLAG_ANY;
15515 : else
15516 : /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15517 339 : return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15518 : }
15519 :
15520 1317 : if (recog_memoized (insn) < 0)
15521 : return X86_DIRFLAG_ANY;
15522 :
15523 1315 : if (get_attr_type (insn) == TYPE_STR)
15524 : {
15525 : /* Emit cld instruction if stringops are used in the function. */
15526 1 : if (cfun->machine->func_type == TYPE_NORMAL)
15527 0 : return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15528 : else
15529 : return X86_DIRFLAG_RESET;
15530 : }
15531 :
15532 : return X86_DIRFLAG_ANY;
15533 : }
15534 :
15535 : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15536 :
15537 : static bool
15538 2216644 : ix86_check_avx_upper_register (const_rtx exp)
15539 : {
15540 : /* construct_container may return a parallel with expr_list
15541 : which contains the real reg and mode */
15542 2216644 : subrtx_iterator::array_type array;
15543 8483328 : FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15544 : {
15545 6427854 : const_rtx x = *iter;
15546 2586431 : if (SSE_REG_P (x)
15547 837024 : && !EXT_REX_SSE_REG_P (x)
15548 8088882 : && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15549 161170 : return true;
15550 : }
15551 :
15552 2055474 : return false;
15553 2216644 : }
15554 :
15555 : /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15556 :
15557 : static void
15558 51713 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15559 : {
15560 51713 : if (SSE_REG_P (dest)
15561 12859 : && !EXT_REX_SSE_REG_P (dest)
15562 77431 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15563 : {
15564 760 : bool *used = (bool *) data;
15565 760 : *used = true;
15566 : }
15567 51713 : }
15568 :
15569 : /* Return needed mode for entity in optimize_mode_switching pass. */
15570 :
15571 : static int
15572 2070240 : ix86_avx_u128_mode_needed (rtx_insn *insn)
15573 : {
15574 2070240 : if (DEBUG_INSN_P (insn))
15575 : return AVX_U128_ANY;
15576 :
15577 2070240 : if (CALL_P (insn))
15578 : {
15579 49537 : rtx link;
15580 :
15581 : /* Needed mode is set to AVX_U128_CLEAN if there are
15582 : no 256bit or 512bit modes used in function arguments. */
15583 49537 : for (link = CALL_INSN_FUNCTION_USAGE (insn);
15584 134671 : link;
15585 85134 : link = XEXP (link, 1))
15586 : {
15587 86186 : if (GET_CODE (XEXP (link, 0)) == USE)
15588 : {
15589 84776 : rtx arg = XEXP (XEXP (link, 0), 0);
15590 :
15591 84776 : if (ix86_check_avx_upper_register (arg))
15592 : return AVX_U128_DIRTY;
15593 : }
15594 : }
15595 :
15596 : /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15597 : nor 512bit registers used in the function return register. */
15598 48485 : bool avx_upper_reg_found = false;
15599 48485 : note_stores (insn, ix86_check_avx_upper_stores,
15600 : &avx_upper_reg_found);
15601 48485 : if (avx_upper_reg_found)
15602 : return AVX_U128_DIRTY;
15603 :
15604 : /* If the function is known to preserve some SSE registers,
15605 : RA and previous passes can legitimately rely on that for
15606 : modes wider than 256 bits. It's only safe to issue a
15607 : vzeroupper if all SSE registers are clobbered. */
15608 48301 : const function_abi &abi = insn_callee_abi (insn);
15609 48301 : if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15610 : /* Should be safe to issue an vzeroupper before sibling_call_p.
15611 : Also there not mode_exit for sibling_call, so there could be
15612 : missing vzeroupper for that. */
15613 48301 : || !(SIBLING_CALL_P (insn)
15614 47015 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15615 47015 : abi.mode_clobbers (V4DImode))))
15616 8438 : return AVX_U128_ANY;
15617 :
15618 39863 : return AVX_U128_CLEAN;
15619 : }
15620 :
15621 2020703 : rtx set = single_set (insn);
15622 2020703 : if (set)
15623 : {
15624 1947296 : rtx dest = SET_DEST (set);
15625 1947296 : rtx src = SET_SRC (set);
15626 1466836 : if (SSE_REG_P (dest)
15627 554277 : && !EXT_REX_SSE_REG_P (dest)
15628 3043698 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15629 : {
15630 : /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15631 : source isn't zero. */
15632 169200 : if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15633 : return AVX_U128_DIRTY;
15634 : else
15635 : return AVX_U128_ANY;
15636 : }
15637 : else
15638 : {
15639 1778096 : if (ix86_check_avx_upper_register (src))
15640 : return AVX_U128_DIRTY;
15641 : }
15642 :
15643 : /* This isn't YMM/ZMM load/store. */
15644 : return AVX_U128_ANY;
15645 : }
15646 :
15647 : /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15648 : Hardware changes state only when a 256bit register is written to,
15649 : but we need to prevent the compiler from moving optimal insertion
15650 : point above eventual read from 256bit or 512 bit register. */
15651 73407 : if (ix86_check_avx_upper_register (PATTERN (insn)))
15652 : return AVX_U128_DIRTY;
15653 :
15654 : return AVX_U128_ANY;
15655 : }
15656 :
15657 : /* Return mode that i387 must be switched into
15658 : prior to the execution of insn. */
15659 :
15660 : static int
15661 417088 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
15662 : {
15663 417088 : enum attr_i387_cw mode;
15664 :
15665 : /* The mode UNINITIALIZED is used to store control word after a
15666 : function call or ASM pattern. The mode ANY specify that function
15667 : has no requirements on the control word and make no changes in the
15668 : bits we are interested in. */
15669 :
15670 417088 : if (CALL_P (insn)
15671 417088 : || (NONJUMP_INSN_P (insn)
15672 341280 : && (asm_noperands (PATTERN (insn)) >= 0
15673 341227 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15674 14637 : return I387_CW_UNINITIALIZED;
15675 :
15676 402451 : if (recog_memoized (insn) < 0)
15677 : return I387_CW_ANY;
15678 :
15679 401509 : mode = get_attr_i387_cw (insn);
15680 :
15681 401509 : switch (entity)
15682 : {
15683 0 : case I387_ROUNDEVEN:
15684 0 : if (mode == I387_CW_ROUNDEVEN)
15685 : return mode;
15686 : break;
15687 :
15688 396860 : case I387_TRUNC:
15689 396860 : if (mode == I387_CW_TRUNC)
15690 : return mode;
15691 : break;
15692 :
15693 3618 : case I387_FLOOR:
15694 3618 : if (mode == I387_CW_FLOOR)
15695 : return mode;
15696 : break;
15697 :
15698 1031 : case I387_CEIL:
15699 1031 : if (mode == I387_CW_CEIL)
15700 : return mode;
15701 : break;
15702 :
15703 0 : default:
15704 0 : gcc_unreachable ();
15705 : }
15706 :
15707 : return I387_CW_ANY;
15708 : }
15709 :
15710 : /* Return mode that entity must be switched into
15711 : prior to the execution of insn. */
15712 :
15713 : static int
15714 2488984 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15715 : {
15716 2488984 : switch (entity)
15717 : {
15718 1656 : case X86_DIRFLAG:
15719 1656 : return ix86_dirflag_mode_needed (insn);
15720 2070240 : case AVX_U128:
15721 2070240 : return ix86_avx_u128_mode_needed (insn);
15722 417088 : case I387_ROUNDEVEN:
15723 417088 : case I387_TRUNC:
15724 417088 : case I387_FLOOR:
15725 417088 : case I387_CEIL:
15726 417088 : return ix86_i387_mode_needed (entity, insn);
15727 0 : default:
15728 0 : gcc_unreachable ();
15729 : }
15730 : return 0;
15731 : }
15732 :
15733 : /* Calculate mode of upper 128bit AVX registers after the insn. */
15734 :
15735 : static int
15736 2070240 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15737 : {
15738 2070240 : rtx pat = PATTERN (insn);
15739 :
15740 2070240 : if (vzeroupper_pattern (pat, VOIDmode)
15741 2070240 : || vzeroall_pattern (pat, VOIDmode))
15742 243 : return AVX_U128_CLEAN;
15743 :
15744 : /* We know that state is clean after CALL insn if there are no
15745 : 256bit or 512bit registers used in the function return register. */
15746 2069997 : if (CALL_P (insn))
15747 : {
15748 49491 : bool avx_upper_reg_found = false;
15749 49491 : note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15750 :
15751 49491 : if (avx_upper_reg_found)
15752 : return AVX_U128_DIRTY;
15753 :
15754 : /* If the function doesn't clobber any sse registers or only clobber
15755 : 128-bit part, Then vzeroupper isn't issued before the function exit.
15756 : the status not CLEAN but ANY after the function. */
15757 48915 : const function_abi &abi = insn_callee_abi (insn);
15758 48915 : if (!(SIBLING_CALL_P (insn)
15759 47634 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15760 47634 : abi.mode_clobbers (V4DImode))))
15761 8734 : return AVX_U128_ANY;
15762 :
15763 40181 : return AVX_U128_CLEAN;
15764 : }
15765 :
15766 : /* Otherwise, return current mode. Remember that if insn
15767 : references AVX 256bit or 512bit registers, the mode was already
15768 : changed to DIRTY from MODE_NEEDED. */
15769 : return mode;
15770 : }
15771 :
15772 : /* Return the mode that an insn results in. */
15773 :
15774 : static int
15775 2488139 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15776 : {
15777 2488139 : switch (entity)
15778 : {
15779 : case X86_DIRFLAG:
15780 : return mode;
15781 2070240 : case AVX_U128:
15782 2070240 : return ix86_avx_u128_mode_after (mode, insn);
15783 : case I387_ROUNDEVEN:
15784 : case I387_TRUNC:
15785 : case I387_FLOOR:
15786 : case I387_CEIL:
15787 : return mode;
15788 0 : default:
15789 0 : gcc_unreachable ();
15790 : }
15791 : }
15792 :
15793 : static int
15794 120 : ix86_dirflag_mode_entry (void)
15795 : {
15796 : /* For TARGET_CLD or in the interrupt handler we can't assume
15797 : direction flag state at function entry. */
15798 120 : if (TARGET_CLD
15799 118 : || cfun->machine->func_type != TYPE_NORMAL)
15800 120 : return X86_DIRFLAG_ANY;
15801 :
15802 : return X86_DIRFLAG_RESET;
15803 : }
15804 :
15805 : static int
15806 123851 : ix86_avx_u128_mode_entry (void)
15807 : {
15808 123851 : tree arg;
15809 :
15810 : /* Entry mode is set to AVX_U128_DIRTY if there are
15811 : 256bit or 512bit modes used in function arguments. */
15812 311790 : for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15813 187939 : arg = TREE_CHAIN (arg))
15814 : {
15815 222016 : rtx incoming = DECL_INCOMING_RTL (arg);
15816 :
15817 222016 : if (incoming && ix86_check_avx_upper_register (incoming))
15818 : return AVX_U128_DIRTY;
15819 : }
15820 :
15821 : return AVX_U128_CLEAN;
15822 : }
15823 :
15824 : /* Return a mode that ENTITY is assumed to be
15825 : switched to at function entry. */
15826 :
15827 : static int
15828 76314 : ix86_mode_entry (int entity)
15829 : {
15830 76314 : switch (entity)
15831 : {
15832 120 : case X86_DIRFLAG:
15833 120 : return ix86_dirflag_mode_entry ();
15834 75059 : case AVX_U128:
15835 75059 : return ix86_avx_u128_mode_entry ();
15836 : case I387_ROUNDEVEN:
15837 : case I387_TRUNC:
15838 : case I387_FLOOR:
15839 : case I387_CEIL:
15840 : return I387_CW_ANY;
15841 0 : default:
15842 0 : gcc_unreachable ();
15843 : }
15844 : }
15845 :
15846 : static int
15847 73810 : ix86_avx_u128_mode_exit (void)
15848 : {
15849 73810 : rtx reg = crtl->return_rtx;
15850 :
15851 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15852 : or 512 bit modes used in the function return register. */
15853 73810 : if (reg && ix86_check_avx_upper_register (reg))
15854 : return AVX_U128_DIRTY;
15855 :
15856 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15857 : modes used in function arguments, otherwise return AVX_U128_CLEAN.
15858 : */
15859 48792 : return ix86_avx_u128_mode_entry ();
15860 : }
15861 :
15862 : /* Return a mode that ENTITY is assumed to be
15863 : switched to at function exit. */
15864 :
15865 : static int
15866 74920 : ix86_mode_exit (int entity)
15867 : {
15868 74920 : switch (entity)
15869 : {
15870 : case X86_DIRFLAG:
15871 : return X86_DIRFLAG_ANY;
15872 73810 : case AVX_U128:
15873 73810 : return ix86_avx_u128_mode_exit ();
15874 1076 : case I387_ROUNDEVEN:
15875 1076 : case I387_TRUNC:
15876 1076 : case I387_FLOOR:
15877 1076 : case I387_CEIL:
15878 1076 : return I387_CW_ANY;
15879 0 : default:
15880 0 : gcc_unreachable ();
15881 : }
15882 : }
15883 :
15884 : static int
15885 2173190 : ix86_mode_priority (int, int n)
15886 : {
15887 2173190 : return n;
15888 : }
15889 :
15890 : /* Output code to initialize control word copies used by trunc?f?i and
15891 : rounding patterns. CURRENT_MODE is set to current control word,
15892 : while NEW_MODE is set to new control word. */
15893 :
15894 : static void
15895 3296 : emit_i387_cw_initialization (int mode)
15896 : {
15897 3296 : rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15898 3296 : rtx new_mode;
15899 :
15900 3296 : enum ix86_stack_slot slot;
15901 :
15902 3296 : rtx reg = gen_reg_rtx (HImode);
15903 :
15904 3296 : emit_insn (gen_x86_fnstcw_1 (stored_mode));
15905 3296 : emit_move_insn (reg, copy_rtx (stored_mode));
15906 :
15907 3296 : switch (mode)
15908 : {
15909 0 : case I387_CW_ROUNDEVEN:
15910 : /* round to nearest */
15911 0 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15912 0 : slot = SLOT_CW_ROUNDEVEN;
15913 0 : break;
15914 :
15915 3100 : case I387_CW_TRUNC:
15916 : /* round toward zero (truncate) */
15917 3100 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15918 3100 : slot = SLOT_CW_TRUNC;
15919 3100 : break;
15920 :
15921 137 : case I387_CW_FLOOR:
15922 : /* round down toward -oo */
15923 137 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15924 137 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15925 137 : slot = SLOT_CW_FLOOR;
15926 137 : break;
15927 :
15928 59 : case I387_CW_CEIL:
15929 : /* round up toward +oo */
15930 59 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15931 59 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15932 59 : slot = SLOT_CW_CEIL;
15933 59 : break;
15934 :
15935 0 : default:
15936 0 : gcc_unreachable ();
15937 : }
15938 :
15939 3296 : gcc_assert (slot < MAX_386_STACK_LOCALS);
15940 :
15941 3296 : new_mode = assign_386_stack_local (HImode, slot);
15942 3296 : emit_move_insn (new_mode, reg);
15943 3296 : }
15944 :
15945 : /* Generate one or more insns to set ENTITY to MODE. */
15946 :
15947 : static void
15948 51598 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15949 : HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15950 : {
15951 51598 : switch (entity)
15952 : {
15953 265 : case X86_DIRFLAG:
15954 265 : if (mode == X86_DIRFLAG_RESET)
15955 265 : emit_insn (gen_cld ());
15956 : break;
15957 43157 : case AVX_U128:
15958 43157 : if (mode == AVX_U128_CLEAN)
15959 21818 : ix86_expand_avx_vzeroupper ();
15960 : break;
15961 8176 : case I387_ROUNDEVEN:
15962 8176 : case I387_TRUNC:
15963 8176 : case I387_FLOOR:
15964 8176 : case I387_CEIL:
15965 8176 : if (mode != I387_CW_ANY
15966 8176 : && mode != I387_CW_UNINITIALIZED)
15967 3296 : emit_i387_cw_initialization (mode);
15968 : break;
15969 0 : default:
15970 0 : gcc_unreachable ();
15971 : }
15972 51598 : }
15973 :
15974 : /* Output code for INSN to convert a float to a signed int. OPERANDS
15975 : are the insn operands. The output may be [HSD]Imode and the input
15976 : operand may be [SDX]Fmode. */
15977 :
15978 : const char *
15979 7437 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15980 : {
15981 7437 : bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15982 7437 : bool dimode_p = GET_MODE (operands[0]) == DImode;
15983 7437 : int round_mode = get_attr_i387_cw (insn);
15984 :
15985 7437 : static char buf[40];
15986 7437 : const char *p;
15987 :
15988 : /* Jump through a hoop or two for DImode, since the hardware has no
15989 : non-popping instruction. We used to do this a different way, but
15990 : that was somewhat fragile and broke with post-reload splitters. */
15991 7437 : if ((dimode_p || fisttp) && !stack_top_dies)
15992 25 : output_asm_insn ("fld\t%y1", operands);
15993 :
15994 7437 : gcc_assert (STACK_TOP_P (operands[1]));
15995 7437 : gcc_assert (MEM_P (operands[0]));
15996 7437 : gcc_assert (GET_MODE (operands[1]) != TFmode);
15997 :
15998 7437 : if (fisttp)
15999 : return "fisttp%Z0\t%0";
16000 :
16001 7436 : strcpy (buf, "fist");
16002 :
16003 7436 : if (round_mode != I387_CW_ANY)
16004 7392 : output_asm_insn ("fldcw\t%3", operands);
16005 :
16006 7436 : p = "p%Z0\t%0";
16007 7436 : strcat (buf, p + !(stack_top_dies || dimode_p));
16008 :
16009 7436 : output_asm_insn (buf, operands);
16010 :
16011 7436 : if (round_mode != I387_CW_ANY)
16012 7392 : output_asm_insn ("fldcw\t%2", operands);
16013 :
16014 : return "";
16015 : }
16016 :
16017 : /* Output code for x87 ffreep insn. The OPNO argument, which may only
16018 : have the values zero or one, indicates the ffreep insn's operand
16019 : from the OPERANDS array. */
16020 :
16021 : static const char *
16022 275148 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16023 : {
16024 0 : if (TARGET_USE_FFREEP)
16025 : #ifdef HAVE_AS_IX86_FFREEP
16026 0 : return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16027 : #else
16028 : {
16029 : static char retval[32];
16030 : int regno = REGNO (operands[opno]);
16031 :
16032 : gcc_assert (STACK_REGNO_P (regno));
16033 :
16034 : regno -= FIRST_STACK_REG;
16035 :
16036 : snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16037 : return retval;
16038 : }
16039 : #endif
16040 :
16041 0 : return opno ? "fstp\t%y1" : "fstp\t%y0";
16042 : }
16043 :
16044 :
16045 : /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16046 : should be used. UNORDERED_P is true when fucom should be used. */
16047 :
16048 : const char *
16049 107693 : output_fp_compare (rtx_insn *insn, rtx *operands,
16050 : bool eflags_p, bool unordered_p)
16051 : {
16052 107693 : rtx *xops = eflags_p ? &operands[0] : &operands[1];
16053 107693 : bool stack_top_dies;
16054 :
16055 107693 : static char buf[40];
16056 107693 : const char *p;
16057 :
16058 107693 : gcc_assert (STACK_TOP_P (xops[0]));
16059 :
16060 107693 : stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
16061 :
16062 107693 : if (eflags_p)
16063 : {
16064 107693 : p = unordered_p ? "fucomi" : "fcomi";
16065 107693 : strcpy (buf, p);
16066 :
16067 107693 : p = "p\t{%y1, %0|%0, %y1}";
16068 107693 : strcat (buf, p + !stack_top_dies);
16069 :
16070 107693 : return buf;
16071 : }
16072 :
16073 0 : if (STACK_REG_P (xops[1])
16074 0 : && stack_top_dies
16075 0 : && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
16076 : {
16077 0 : gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
16078 :
16079 : /* If both the top of the 387 stack die, and the other operand
16080 : is also a stack register that dies, then this must be a
16081 : `fcompp' float compare. */
16082 0 : p = unordered_p ? "fucompp" : "fcompp";
16083 0 : strcpy (buf, p);
16084 : }
16085 0 : else if (const0_operand (xops[1], VOIDmode))
16086 : {
16087 0 : gcc_assert (!unordered_p);
16088 0 : strcpy (buf, "ftst");
16089 : }
16090 : else
16091 : {
16092 0 : if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
16093 : {
16094 0 : gcc_assert (!unordered_p);
16095 : p = "ficom";
16096 : }
16097 : else
16098 0 : p = unordered_p ? "fucom" : "fcom";
16099 :
16100 0 : strcpy (buf, p);
16101 :
16102 0 : p = "p%Z2\t%y2";
16103 0 : strcat (buf, p + !stack_top_dies);
16104 : }
16105 :
16106 0 : output_asm_insn (buf, operands);
16107 0 : return "fnstsw\t%0";
16108 : }
16109 :
16110 : void
16111 130755 : ix86_output_addr_vec_elt (FILE *file, int value)
16112 : {
16113 130755 : const char *directive = ASM_LONG;
16114 :
16115 : #ifdef ASM_QUAD
16116 130755 : if (TARGET_LP64)
16117 119030 : directive = ASM_QUAD;
16118 : #else
16119 : gcc_assert (!TARGET_64BIT);
16120 : #endif
16121 :
16122 130755 : fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16123 130755 : }
16124 :
16125 : void
16126 27384 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16127 : {
16128 27384 : const char *directive = ASM_LONG;
16129 :
16130 : #ifdef ASM_QUAD
16131 41076 : if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16132 : directive = ASM_QUAD;
16133 : #else
16134 : gcc_assert (!TARGET_64BIT);
16135 : #endif
16136 : /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16137 27384 : if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
16138 13692 : fprintf (file, "%s%s%d-%s%d\n",
16139 : directive, LPREFIX, value, LPREFIX, rel);
16140 : #if TARGET_MACHO
16141 : else if (TARGET_MACHO)
16142 : {
16143 : fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16144 : machopic_output_function_base_name (file);
16145 : putc ('\n', file);
16146 : }
16147 : #endif
16148 13692 : else if (HAVE_AS_GOTOFF_IN_DATA)
16149 13692 : fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16150 : else
16151 : asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16152 : GOT_SYMBOL_NAME, LPREFIX, value);
16153 27384 : }
16154 :
16155 : #define LEA_MAX_STALL (3)
16156 : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16157 :
16158 : /* Increase given DISTANCE in half-cycles according to
16159 : dependencies between PREV and NEXT instructions.
16160 : Add 1 half-cycle if there is no dependency and
16161 : go to next cycle if there is some dependency. */
16162 :
16163 : static unsigned int
16164 2077 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
16165 : {
16166 2077 : df_ref def, use;
16167 :
16168 2077 : if (!prev || !next)
16169 727 : return distance + (distance & 1) + 2;
16170 :
16171 1350 : if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16172 226 : return distance + 1;
16173 :
16174 1887 : FOR_EACH_INSN_USE (use, next)
16175 2410 : FOR_EACH_INSN_DEF (def, prev)
16176 1647 : if (!DF_REF_IS_ARTIFICIAL (def)
16177 1647 : && DF_REF_REGNO (use) == DF_REF_REGNO (def))
16178 699 : return distance + (distance & 1) + 2;
16179 :
16180 425 : return distance + 1;
16181 : }
16182 :
16183 : /* Function checks if instruction INSN defines register number
16184 : REGNO1 or REGNO2. */
16185 :
16186 : bool
16187 2029 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
16188 : rtx_insn *insn)
16189 : {
16190 2029 : df_ref def;
16191 :
16192 3661 : FOR_EACH_INSN_DEF (def, insn)
16193 2030 : if (DF_REF_REG_DEF_P (def)
16194 2030 : && !DF_REF_IS_ARTIFICIAL (def)
16195 2030 : && (regno1 == DF_REF_REGNO (def)
16196 1648 : || regno2 == DF_REF_REGNO (def)))
16197 : return true;
16198 :
16199 : return false;
16200 : }
16201 :
16202 : /* Function checks if instruction INSN uses register number
16203 : REGNO as a part of address expression. */
16204 :
16205 : static bool
16206 1162 : insn_uses_reg_mem (unsigned int regno, rtx insn)
16207 : {
16208 1162 : df_ref use;
16209 :
16210 2444 : FOR_EACH_INSN_USE (use, insn)
16211 1365 : if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
16212 : return true;
16213 :
16214 : return false;
16215 : }
16216 :
16217 : /* Search backward for non-agu definition of register number REGNO1
16218 : or register number REGNO2 in basic block starting from instruction
16219 : START up to head of basic block or instruction INSN.
16220 :
16221 : Function puts true value into *FOUND var if definition was found
16222 : and false otherwise.
16223 :
16224 : Distance in half-cycles between START and found instruction or head
16225 : of BB is added to DISTANCE and returned. */
16226 :
16227 : static int
16228 611 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16229 : rtx_insn *insn, int distance,
16230 : rtx_insn *start, bool *found)
16231 : {
16232 611 : basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16233 611 : rtx_insn *prev = start;
16234 611 : rtx_insn *next = NULL;
16235 :
16236 611 : *found = false;
16237 :
16238 611 : while (prev
16239 1818 : && prev != insn
16240 1818 : && distance < LEA_SEARCH_THRESHOLD)
16241 : {
16242 1626 : if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16243 : {
16244 915 : distance = increase_distance (prev, next, distance);
16245 915 : if (insn_defines_reg (regno1, regno2, prev))
16246 : {
16247 239 : if (recog_memoized (prev) < 0
16248 239 : || get_attr_type (prev) != TYPE_LEA)
16249 : {
16250 197 : *found = true;
16251 197 : return distance;
16252 : }
16253 : }
16254 :
16255 : next = prev;
16256 : }
16257 1429 : if (prev == BB_HEAD (bb))
16258 : break;
16259 :
16260 1207 : prev = PREV_INSN (prev);
16261 : }
16262 :
16263 : return distance;
16264 : }
16265 :
16266 : /* Search backward for non-agu definition of register number REGNO1
16267 : or register number REGNO2 in INSN's basic block until
16268 : 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16269 : 2. Reach neighbor BBs boundary, or
16270 : 3. Reach agu definition.
16271 : Returns the distance between the non-agu definition point and INSN.
16272 : If no definition point, returns -1. */
16273 :
16274 : static int
16275 417 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16276 : rtx_insn *insn)
16277 : {
16278 417 : basic_block bb = BLOCK_FOR_INSN (insn);
16279 417 : int distance = 0;
16280 417 : bool found = false;
16281 :
16282 417 : if (insn != BB_HEAD (bb))
16283 417 : distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16284 : distance, PREV_INSN (insn),
16285 : &found);
16286 :
16287 417 : if (!found && distance < LEA_SEARCH_THRESHOLD)
16288 : {
16289 166 : edge e;
16290 166 : edge_iterator ei;
16291 166 : bool simple_loop = false;
16292 :
16293 334 : FOR_EACH_EDGE (e, ei, bb->preds)
16294 205 : if (e->src == bb)
16295 : {
16296 : simple_loop = true;
16297 : break;
16298 : }
16299 :
16300 166 : if (simple_loop)
16301 37 : distance = distance_non_agu_define_in_bb (regno1, regno2,
16302 : insn, distance,
16303 37 : BB_END (bb), &found);
16304 : else
16305 : {
16306 129 : int shortest_dist = -1;
16307 129 : bool found_in_bb = false;
16308 :
16309 286 : FOR_EACH_EDGE (e, ei, bb->preds)
16310 : {
16311 157 : int bb_dist
16312 314 : = distance_non_agu_define_in_bb (regno1, regno2,
16313 : insn, distance,
16314 157 : BB_END (e->src),
16315 : &found_in_bb);
16316 157 : if (found_in_bb)
16317 : {
16318 24 : if (shortest_dist < 0)
16319 : shortest_dist = bb_dist;
16320 0 : else if (bb_dist > 0)
16321 0 : shortest_dist = MIN (bb_dist, shortest_dist);
16322 :
16323 24 : found = true;
16324 : }
16325 : }
16326 :
16327 129 : distance = shortest_dist;
16328 : }
16329 : }
16330 :
16331 417 : if (!found)
16332 : return -1;
16333 :
16334 197 : return distance >> 1;
16335 : }
16336 :
16337 : /* Return the distance in half-cycles between INSN and the next
16338 : insn that uses register number REGNO in memory address added
16339 : to DISTANCE. Return -1 if REGNO0 is set.
16340 :
16341 : Put true value into *FOUND if register usage was found and
16342 : false otherwise.
16343 : Put true value into *REDEFINED if register redefinition was
16344 : found and false otherwise. */
16345 :
16346 : static int
16347 747 : distance_agu_use_in_bb (unsigned int regno,
16348 : rtx_insn *insn, int distance, rtx_insn *start,
16349 : bool *found, bool *redefined)
16350 : {
16351 747 : basic_block bb = NULL;
16352 747 : rtx_insn *next = start;
16353 747 : rtx_insn *prev = NULL;
16354 :
16355 747 : *found = false;
16356 747 : *redefined = false;
16357 :
16358 747 : if (start != NULL_RTX)
16359 : {
16360 730 : bb = BLOCK_FOR_INSN (start);
16361 730 : if (start != BB_HEAD (bb))
16362 : /* If insn and start belong to the same bb, set prev to insn,
16363 : so the call to increase_distance will increase the distance
16364 : between insns by 1. */
16365 400 : prev = insn;
16366 : }
16367 :
16368 2525 : while (next
16369 2525 : && next != insn
16370 2525 : && distance < LEA_SEARCH_THRESHOLD)
16371 : {
16372 2339 : if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16373 : {
16374 1162 : distance = increase_distance(prev, next, distance);
16375 1162 : if (insn_uses_reg_mem (regno, next))
16376 : {
16377 : /* Return DISTANCE if OP0 is used in memory
16378 : address in NEXT. */
16379 83 : *found = true;
16380 83 : return distance;
16381 : }
16382 :
16383 1079 : if (insn_defines_reg (regno, INVALID_REGNUM, next))
16384 : {
16385 : /* Return -1 if OP0 is set in NEXT. */
16386 154 : *redefined = true;
16387 154 : return -1;
16388 : }
16389 :
16390 : prev = next;
16391 : }
16392 :
16393 2102 : if (next == BB_END (bb))
16394 : break;
16395 :
16396 1778 : next = NEXT_INSN (next);
16397 : }
16398 :
16399 : return distance;
16400 : }
16401 :
16402 : /* Return the distance between INSN and the next insn that uses
16403 : register number REGNO0 in memory address. Return -1 if no such
16404 : a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16405 :
16406 : static int
16407 417 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
16408 : {
16409 417 : basic_block bb = BLOCK_FOR_INSN (insn);
16410 417 : int distance = 0;
16411 417 : bool found = false;
16412 417 : bool redefined = false;
16413 :
16414 417 : if (insn != BB_END (bb))
16415 400 : distance = distance_agu_use_in_bb (regno0, insn, distance,
16416 : NEXT_INSN (insn),
16417 : &found, &redefined);
16418 :
16419 417 : if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16420 : {
16421 242 : edge e;
16422 242 : edge_iterator ei;
16423 242 : bool simple_loop = false;
16424 :
16425 527 : FOR_EACH_EDGE (e, ei, bb->succs)
16426 347 : if (e->dest == bb)
16427 : {
16428 : simple_loop = true;
16429 : break;
16430 : }
16431 :
16432 242 : if (simple_loop)
16433 62 : distance = distance_agu_use_in_bb (regno0, insn,
16434 : distance, BB_HEAD (bb),
16435 : &found, &redefined);
16436 : else
16437 : {
16438 180 : int shortest_dist = -1;
16439 180 : bool found_in_bb = false;
16440 180 : bool redefined_in_bb = false;
16441 :
16442 465 : FOR_EACH_EDGE (e, ei, bb->succs)
16443 : {
16444 285 : int bb_dist
16445 570 : = distance_agu_use_in_bb (regno0, insn,
16446 285 : distance, BB_HEAD (e->dest),
16447 : &found_in_bb, &redefined_in_bb);
16448 285 : if (found_in_bb)
16449 : {
16450 17 : if (shortest_dist < 0)
16451 : shortest_dist = bb_dist;
16452 2 : else if (bb_dist > 0)
16453 2 : shortest_dist = MIN (bb_dist, shortest_dist);
16454 :
16455 17 : found = true;
16456 : }
16457 : }
16458 :
16459 180 : distance = shortest_dist;
16460 : }
16461 : }
16462 :
16463 417 : if (!found || redefined)
16464 : return -1;
16465 :
16466 81 : return distance >> 1;
16467 : }
16468 :
16469 : /* Define this macro to tune LEA priority vs ADD, it take effect when
16470 : there is a dilemma of choosing LEA or ADD
16471 : Negative value: ADD is more preferred than LEA
16472 : Zero: Neutral
16473 : Positive value: LEA is more preferred than ADD. */
16474 : #define IX86_LEA_PRIORITY 0
16475 :
16476 : /* Return true if usage of lea INSN has performance advantage
16477 : over a sequence of instructions. Instructions sequence has
16478 : SPLIT_COST cycles higher latency than lea latency. */
16479 :
16480 : static bool
16481 1617 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16482 : unsigned int regno2, int split_cost, bool has_scale)
16483 : {
16484 1617 : int dist_define, dist_use;
16485 :
16486 : /* For Atom processors newer than Bonnell, if using a 2-source or
16487 : 3-source LEA for non-destructive destination purposes, or due to
16488 : wanting ability to use SCALE, the use of LEA is justified. */
16489 1617 : if (!TARGET_CPU_P (BONNELL))
16490 : {
16491 1200 : if (has_scale)
16492 : return true;
16493 1181 : if (split_cost < 1)
16494 : return false;
16495 406 : if (regno0 == regno1 || regno0 == regno2)
16496 : return false;
16497 : return true;
16498 : }
16499 :
16500 : /* Remember recog_data content. */
16501 417 : struct recog_data_d recog_data_save = recog_data;
16502 :
16503 417 : dist_define = distance_non_agu_define (regno1, regno2, insn);
16504 417 : dist_use = distance_agu_use (regno0, insn);
16505 :
16506 : /* distance_non_agu_define can call get_attr_type which can call
16507 : recog_memoized, restore recog_data back to previous content. */
16508 417 : recog_data = recog_data_save;
16509 :
16510 417 : if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16511 : {
16512 : /* If there is no non AGU operand definition, no AGU
16513 : operand usage and split cost is 0 then both lea
16514 : and non lea variants have same priority. Currently
16515 : we prefer lea for 64 bit code and non lea on 32 bit
16516 : code. */
16517 223 : if (dist_use < 0 && split_cost == 0)
16518 98 : return TARGET_64BIT || IX86_LEA_PRIORITY;
16519 : else
16520 : return true;
16521 : }
16522 :
16523 : /* With longer definitions distance lea is more preferable.
16524 : Here we change it to take into account splitting cost and
16525 : lea priority. */
16526 194 : dist_define += split_cost + IX86_LEA_PRIORITY;
16527 :
16528 : /* If there is no use in memory address then we just check
16529 : that split cost exceeds AGU stall. */
16530 194 : if (dist_use < 0)
16531 190 : return dist_define > LEA_MAX_STALL;
16532 :
16533 : /* If this insn has both backward non-agu dependence and forward
16534 : agu dependence, the one with short distance takes effect. */
16535 4 : return dist_define >= dist_use;
16536 : }
16537 :
16538 : /* Return true if we need to split op0 = op1 + op2 into a sequence of
16539 : move and add to avoid AGU stalls. */
16540 :
16541 : bool
16542 9114182 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16543 : {
16544 9114182 : unsigned int regno0, regno1, regno2;
16545 :
16546 : /* Check if we need to optimize. */
16547 9114182 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16548 9113373 : return false;
16549 :
16550 809 : regno0 = true_regnum (operands[0]);
16551 809 : regno1 = true_regnum (operands[1]);
16552 809 : regno2 = true_regnum (operands[2]);
16553 :
16554 : /* We need to split only adds with non destructive
16555 : destination operand. */
16556 809 : if (regno0 == regno1 || regno0 == regno2)
16557 : return false;
16558 : else
16559 244 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
16560 : }
16561 :
16562 : /* Return true if we should emit lea instruction instead of mov
16563 : instruction. */
16564 :
16565 : bool
16566 29504460 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16567 : {
16568 29504460 : unsigned int regno0, regno1;
16569 :
16570 : /* Check if we need to optimize. */
16571 29504460 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16572 29502172 : return false;
16573 :
16574 : /* Use lea for reg to reg moves only. */
16575 2288 : if (!REG_P (operands[0]) || !REG_P (operands[1]))
16576 : return false;
16577 :
16578 463 : regno0 = true_regnum (operands[0]);
16579 463 : regno1 = true_regnum (operands[1]);
16580 :
16581 463 : return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
16582 : }
16583 :
16584 : /* Return true if we need to split lea into a sequence of
16585 : instructions to avoid AGU stalls during peephole2. */
16586 :
16587 : bool
16588 11055145 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16589 : {
16590 11055145 : unsigned int regno0, regno1, regno2;
16591 11055145 : int split_cost;
16592 11055145 : struct ix86_address parts;
16593 11055145 : int ok;
16594 :
16595 : /* The "at least two components" test below might not catch simple
16596 : move or zero extension insns if parts.base is non-NULL and parts.disp
16597 : is const0_rtx as the only components in the address, e.g. if the
16598 : register is %rbp or %r13. As this test is much cheaper and moves or
16599 : zero extensions are the common case, do this check first. */
16600 11055145 : if (REG_P (operands[1])
16601 11055145 : || (SImode_address_operand (operands[1], VOIDmode)
16602 144387 : && REG_P (XEXP (operands[1], 0))))
16603 4039459 : return false;
16604 :
16605 7015686 : ok = ix86_decompose_address (operands[1], &parts);
16606 7015686 : gcc_assert (ok);
16607 :
16608 : /* There should be at least two components in the address. */
16609 7015686 : if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16610 7015686 : + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16611 : return false;
16612 :
16613 : /* We should not split into add if non legitimate pic
16614 : operand is used as displacement. */
16615 2647412 : if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16616 : return false;
16617 :
16618 2597380 : regno0 = true_regnum (operands[0]) ;
16619 2597380 : regno1 = INVALID_REGNUM;
16620 2597380 : regno2 = INVALID_REGNUM;
16621 :
16622 2597380 : if (parts.base)
16623 2522032 : regno1 = true_regnum (parts.base);
16624 2597380 : if (parts.index)
16625 483542 : regno2 = true_regnum (parts.index);
16626 :
16627 : /* Use add for a = a + b and a = b + a since it is faster and shorter
16628 : than lea for most processors. For the processors like BONNELL, if
16629 : the destination register of LEA holds an actual address which will
16630 : be used soon, LEA is better and otherwise ADD is better. */
16631 2597380 : if (!TARGET_CPU_P (BONNELL)
16632 2597256 : && parts.scale == 1
16633 2353397 : && (!parts.disp || parts.disp == const0_rtx)
16634 177187 : && (regno0 == regno1 || regno0 == regno2))
16635 : return true;
16636 :
16637 : /* Split with -Oz if the encoding requires fewer bytes. */
16638 2591316 : if (optimize_size > 1
16639 27 : && parts.scale > 1
16640 4 : && !parts.base
16641 4 : && (!parts.disp || parts.disp == const0_rtx))
16642 : return true;
16643 :
16644 : /* Check we need to optimize. */
16645 2591312 : if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16646 2590976 : return false;
16647 :
16648 336 : split_cost = 0;
16649 :
16650 : /* Compute how many cycles we will add to execution time
16651 : if split lea into a sequence of instructions. */
16652 336 : if (parts.base || parts.index)
16653 : {
16654 : /* Have to use mov instruction if non destructive
16655 : destination form is used. */
16656 336 : if (regno1 != regno0 && regno2 != regno0)
16657 265 : split_cost += 1;
16658 :
16659 : /* Have to add index to base if both exist. */
16660 336 : if (parts.base && parts.index)
16661 53 : split_cost += 1;
16662 :
16663 : /* Have to use shift and adds if scale is 2 or greater. */
16664 336 : if (parts.scale > 1)
16665 : {
16666 29 : if (regno0 != regno1)
16667 23 : split_cost += 1;
16668 6 : else if (regno2 == regno0)
16669 0 : split_cost += 4;
16670 : else
16671 6 : split_cost += parts.scale;
16672 : }
16673 :
16674 : /* Have to use add instruction with immediate if
16675 : disp is non zero. */
16676 336 : if (parts.disp && parts.disp != const0_rtx)
16677 276 : split_cost += 1;
16678 :
16679 : /* Subtract the price of lea. */
16680 336 : split_cost -= 1;
16681 : }
16682 :
16683 336 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16684 336 : parts.scale > 1);
16685 : }
16686 :
16687 : /* Return true if it is ok to optimize an ADD operation to LEA
16688 : operation to avoid flag register consumation. For most processors,
16689 : ADD is faster than LEA. For the processors like BONNELL, if the
16690 : destination register of LEA holds an actual address which will be
16691 : used soon, LEA is better and otherwise ADD is better. */
16692 :
16693 : bool
16694 9172507 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16695 : {
16696 9172507 : unsigned int regno0 = true_regnum (operands[0]);
16697 9172507 : unsigned int regno1 = true_regnum (operands[1]);
16698 9172507 : unsigned int regno2 = true_regnum (operands[2]);
16699 :
16700 : /* If a = b + c, (a!=b && a!=c), must use lea form. */
16701 9172507 : if (regno0 != regno1 && regno0 != regno2)
16702 : return true;
16703 :
16704 7149274 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16705 7148700 : return false;
16706 :
16707 574 : return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16708 : }
16709 :
16710 : /* Return true if destination reg of SET_BODY is shift count of
16711 : USE_BODY. */
16712 :
16713 : static bool
16714 89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16715 : {
16716 89 : rtx set_dest;
16717 89 : rtx shift_rtx;
16718 89 : int i;
16719 :
16720 : /* Retrieve destination of SET_BODY. */
16721 89 : switch (GET_CODE (set_body))
16722 : {
16723 73 : case SET:
16724 73 : set_dest = SET_DEST (set_body);
16725 73 : if (!set_dest || !REG_P (set_dest))
16726 : return false;
16727 72 : break;
16728 8 : case PARALLEL:
16729 24 : for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16730 16 : if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16731 : use_body))
16732 : return true;
16733 : /* FALLTHROUGH */
16734 : default:
16735 : return false;
16736 : }
16737 :
16738 : /* Retrieve shift count of USE_BODY. */
16739 72 : switch (GET_CODE (use_body))
16740 : {
16741 24 : case SET:
16742 24 : shift_rtx = XEXP (use_body, 1);
16743 24 : break;
16744 24 : case PARALLEL:
16745 72 : for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16746 48 : if (ix86_dep_by_shift_count_body (set_body,
16747 48 : XVECEXP (use_body, 0, i)))
16748 : return true;
16749 : /* FALLTHROUGH */
16750 : default:
16751 : return false;
16752 : }
16753 :
16754 24 : if (shift_rtx
16755 24 : && (GET_CODE (shift_rtx) == ASHIFT
16756 21 : || GET_CODE (shift_rtx) == LSHIFTRT
16757 5 : || GET_CODE (shift_rtx) == ASHIFTRT
16758 0 : || GET_CODE (shift_rtx) == ROTATE
16759 0 : || GET_CODE (shift_rtx) == ROTATERT))
16760 : {
16761 24 : rtx shift_count = XEXP (shift_rtx, 1);
16762 :
16763 : /* Return true if shift count is dest of SET_BODY. */
16764 24 : if (REG_P (shift_count))
16765 : {
16766 : /* Add check since it can be invoked before register
16767 : allocation in pre-reload schedule. */
16768 0 : if (reload_completed
16769 0 : && true_regnum (set_dest) == true_regnum (shift_count))
16770 : return true;
16771 0 : else if (REGNO(set_dest) == REGNO(shift_count))
16772 : return true;
16773 : }
16774 : }
16775 :
16776 : return false;
16777 : }
16778 :
16779 : /* Return true if destination reg of SET_INSN is shift count of
16780 : USE_INSN. */
16781 :
16782 : bool
16783 25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16784 : {
16785 25 : return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16786 25 : PATTERN (use_insn));
16787 : }
16788 :
16789 : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16790 : are ok, keeping in mind the possible movddup alternative. */
16791 :
16792 : bool
16793 89884 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16794 : {
16795 89884 : if (MEM_P (operands[0]))
16796 2025 : return rtx_equal_p (operands[0], operands[1 + high]);
16797 87859 : if (MEM_P (operands[1]) && MEM_P (operands[2]))
16798 1009 : return false;
16799 : return true;
16800 : }
16801 :
16802 : /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16803 : then replicate the value for all elements of the vector
16804 : register. */
16805 :
16806 : rtx
16807 74842 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16808 : {
16809 74842 : int i, n_elt;
16810 74842 : rtvec v;
16811 74842 : machine_mode scalar_mode;
16812 :
16813 74842 : switch (mode)
16814 : {
16815 1292 : case E_V64QImode:
16816 1292 : case E_V32QImode:
16817 1292 : case E_V16QImode:
16818 1292 : case E_V32HImode:
16819 1292 : case E_V16HImode:
16820 1292 : case E_V8HImode:
16821 1292 : case E_V16SImode:
16822 1292 : case E_V8SImode:
16823 1292 : case E_V4SImode:
16824 1292 : case E_V2SImode:
16825 1292 : case E_V8DImode:
16826 1292 : case E_V4DImode:
16827 1292 : case E_V2DImode:
16828 1292 : gcc_assert (vect);
16829 : /* FALLTHRU */
16830 74842 : case E_V2HFmode:
16831 74842 : case E_V4HFmode:
16832 74842 : case E_V8HFmode:
16833 74842 : case E_V16HFmode:
16834 74842 : case E_V32HFmode:
16835 74842 : case E_V16SFmode:
16836 74842 : case E_V8SFmode:
16837 74842 : case E_V4SFmode:
16838 74842 : case E_V2SFmode:
16839 74842 : case E_V8DFmode:
16840 74842 : case E_V4DFmode:
16841 74842 : case E_V2DFmode:
16842 74842 : case E_V32BFmode:
16843 74842 : case E_V16BFmode:
16844 74842 : case E_V8BFmode:
16845 74842 : case E_V4BFmode:
16846 74842 : case E_V2BFmode:
16847 74842 : n_elt = GET_MODE_NUNITS (mode);
16848 74842 : v = rtvec_alloc (n_elt);
16849 74842 : scalar_mode = GET_MODE_INNER (mode);
16850 :
16851 74842 : RTVEC_ELT (v, 0) = value;
16852 :
16853 232146 : for (i = 1; i < n_elt; ++i)
16854 157304 : RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16855 :
16856 74842 : return gen_rtx_CONST_VECTOR (mode, v);
16857 :
16858 0 : default:
16859 0 : gcc_unreachable ();
16860 : }
16861 : }
16862 :
16863 : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16864 : and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16865 : for an SSE register. If VECT is true, then replicate the mask for
16866 : all elements of the vector register. If INVERT is true, then create
16867 : a mask excluding the sign bit. */
16868 :
16869 : rtx
16870 76227 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16871 : {
16872 76227 : machine_mode vec_mode, imode;
16873 76227 : wide_int w;
16874 76227 : rtx mask, v;
16875 :
16876 76227 : switch (mode)
16877 : {
16878 : case E_V2HFmode:
16879 : case E_V4HFmode:
16880 : case E_V8HFmode:
16881 : case E_V16HFmode:
16882 : case E_V32HFmode:
16883 : case E_V32BFmode:
16884 : case E_V16BFmode:
16885 : case E_V8BFmode:
16886 : case E_V4BFmode:
16887 : case E_V2BFmode:
16888 : vec_mode = mode;
16889 : imode = HImode;
16890 : break;
16891 :
16892 34172 : case E_V16SImode:
16893 34172 : case E_V16SFmode:
16894 34172 : case E_V8SImode:
16895 34172 : case E_V4SImode:
16896 34172 : case E_V8SFmode:
16897 34172 : case E_V4SFmode:
16898 34172 : case E_V2SFmode:
16899 34172 : case E_V2SImode:
16900 34172 : vec_mode = mode;
16901 34172 : imode = SImode;
16902 34172 : break;
16903 :
16904 39192 : case E_V8DImode:
16905 39192 : case E_V4DImode:
16906 39192 : case E_V2DImode:
16907 39192 : case E_V8DFmode:
16908 39192 : case E_V4DFmode:
16909 39192 : case E_V2DFmode:
16910 39192 : vec_mode = mode;
16911 39192 : imode = DImode;
16912 39192 : break;
16913 :
16914 2364 : case E_TImode:
16915 2364 : case E_TFmode:
16916 2364 : vec_mode = VOIDmode;
16917 2364 : imode = TImode;
16918 2364 : break;
16919 :
16920 0 : default:
16921 0 : gcc_unreachable ();
16922 : }
16923 :
16924 76227 : machine_mode inner_mode = GET_MODE_INNER (mode);
16925 152454 : w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16926 152454 : GET_MODE_BITSIZE (inner_mode));
16927 76227 : if (invert)
16928 39888 : w = wi::bit_not (w);
16929 :
16930 : /* Force this value into the low part of a fp vector constant. */
16931 76227 : mask = immed_wide_int_const (w, imode);
16932 76227 : mask = gen_lowpart (inner_mode, mask);
16933 :
16934 76227 : if (vec_mode == VOIDmode)
16935 2364 : return force_reg (inner_mode, mask);
16936 :
16937 73863 : v = ix86_build_const_vector (vec_mode, vect, mask);
16938 73863 : return force_reg (vec_mode, v);
16939 76227 : }
16940 :
16941 : /* Return HOST_WIDE_INT for const vector OP in MODE. */
16942 :
16943 : HOST_WIDE_INT
16944 134425 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16945 : {
16946 285073 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16947 0 : gcc_unreachable ();
16948 :
16949 134425 : int nunits = GET_MODE_NUNITS (mode);
16950 268850 : wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16951 134425 : machine_mode innermode = GET_MODE_INNER (mode);
16952 134425 : unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16953 :
16954 134425 : switch (mode)
16955 : {
16956 : case E_V2QImode:
16957 : case E_V4QImode:
16958 : case E_V2HImode:
16959 : case E_V8QImode:
16960 : case E_V4HImode:
16961 : case E_V2SImode:
16962 461903 : for (int i = 0; i < nunits; ++i)
16963 : {
16964 332172 : int v = INTVAL (XVECEXP (op, 0, i));
16965 332172 : wide_int wv = wi::shwi (v, innermode_bits);
16966 332172 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16967 332172 : }
16968 : break;
16969 88 : case E_V1SImode:
16970 88 : case E_V1DImode:
16971 88 : op = CONST_VECTOR_ELT (op, 0);
16972 88 : return INTVAL (op);
16973 : case E_V2HFmode:
16974 : case E_V2BFmode:
16975 : case E_V4HFmode:
16976 : case E_V4BFmode:
16977 : case E_V2SFmode:
16978 13840 : for (int i = 0; i < nunits; ++i)
16979 : {
16980 9234 : rtx x = XVECEXP (op, 0, i);
16981 9234 : int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16982 9234 : REAL_MODE_FORMAT (innermode));
16983 9234 : wide_int wv = wi::shwi (v, innermode_bits);
16984 9234 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16985 9234 : }
16986 : break;
16987 0 : default:
16988 0 : gcc_unreachable ();
16989 : }
16990 :
16991 134337 : return val.to_shwi ();
16992 134425 : }
16993 :
16994 32 : int ix86_get_flags_cc (rtx_code code)
16995 : {
16996 32 : switch (code)
16997 : {
16998 : case NE: return X86_CCNE;
16999 : case EQ: return X86_CCE;
17000 : case GE: return X86_CCNL;
17001 : case GT: return X86_CCNLE;
17002 : case LE: return X86_CCLE;
17003 : case LT: return X86_CCL;
17004 : case GEU: return X86_CCNB;
17005 : case GTU: return X86_CCNBE;
17006 : case LEU: return X86_CCBE;
17007 : case LTU: return X86_CCB;
17008 : default: return -1;
17009 : }
17010 : }
17011 :
17012 : /* Return TRUE or FALSE depending on whether the first SET in INSN
17013 : has source and destination with matching CC modes, and that the
17014 : CC mode is at least as constrained as REQ_MODE. */
17015 :
17016 : bool
17017 53324329 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
17018 : {
17019 53324329 : rtx set;
17020 53324329 : machine_mode set_mode;
17021 :
17022 53324329 : set = PATTERN (insn);
17023 53324329 : if (GET_CODE (set) == PARALLEL)
17024 495976 : set = XVECEXP (set, 0, 0);
17025 53324329 : gcc_assert (GET_CODE (set) == SET);
17026 53324329 : gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17027 :
17028 53324329 : set_mode = GET_MODE (SET_DEST (set));
17029 53324329 : switch (set_mode)
17030 : {
17031 1416404 : case E_CCNOmode:
17032 1416404 : if (req_mode != CCNOmode
17033 97262 : && (req_mode != CCmode
17034 0 : || XEXP (SET_SRC (set), 1) != const0_rtx))
17035 : return false;
17036 : break;
17037 5472747 : case E_CCmode:
17038 5472747 : if (req_mode == CCGCmode)
17039 : return false;
17040 : /* FALLTHRU */
17041 9075499 : case E_CCGCmode:
17042 9075499 : if (req_mode == CCGOCmode || req_mode == CCNOmode)
17043 : return false;
17044 : /* FALLTHRU */
17045 10131699 : case E_CCGOCmode:
17046 10131699 : if (req_mode == CCZmode)
17047 : return false;
17048 : /* FALLTHRU */
17049 : case E_CCZmode:
17050 : break;
17051 :
17052 0 : case E_CCGZmode:
17053 :
17054 0 : case E_CCAmode:
17055 0 : case E_CCCmode:
17056 0 : case E_CCOmode:
17057 0 : case E_CCPmode:
17058 0 : case E_CCSmode:
17059 0 : if (set_mode != req_mode)
17060 : return false;
17061 : break;
17062 :
17063 0 : default:
17064 0 : gcc_unreachable ();
17065 : }
17066 :
17067 53218385 : return GET_MODE (SET_SRC (set)) == set_mode;
17068 : }
17069 :
17070 : machine_mode
17071 13531609 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17072 : {
17073 13531609 : machine_mode mode = GET_MODE (op0);
17074 :
17075 13531609 : if (SCALAR_FLOAT_MODE_P (mode))
17076 : {
17077 143707 : gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17078 : return CCFPmode;
17079 : }
17080 :
17081 13387902 : switch (code)
17082 : {
17083 : /* Only zero flag is needed. */
17084 : case EQ: /* ZF=0 */
17085 : case NE: /* ZF!=0 */
17086 : return CCZmode;
17087 : /* Codes needing carry flag. */
17088 977403 : case GEU: /* CF=0 */
17089 977403 : case LTU: /* CF=1 */
17090 977403 : rtx geu;
17091 : /* Detect overflow checks. They need just the carry flag. */
17092 977403 : if (GET_CODE (op0) == PLUS
17093 977403 : && (rtx_equal_p (op1, XEXP (op0, 0))
17094 125414 : || rtx_equal_p (op1, XEXP (op0, 1))))
17095 17411 : return CCCmode;
17096 : /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
17097 : Match LTU of op0
17098 : (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
17099 : and op1
17100 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
17101 : where CC_CCC is either CC or CCC. */
17102 959992 : else if (code == LTU
17103 366638 : && GET_CODE (op0) == NEG
17104 18 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
17105 0 : && REG_P (XEXP (geu, 0))
17106 0 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
17107 0 : || GET_MODE (XEXP (geu, 0)) == CCmode)
17108 0 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
17109 0 : && XEXP (geu, 1) == const0_rtx
17110 0 : && GET_CODE (op1) == LTU
17111 0 : && REG_P (XEXP (op1, 0))
17112 0 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
17113 0 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
17114 959992 : && XEXP (op1, 1) == const0_rtx)
17115 : return CCCmode;
17116 : /* Similarly for *x86_cmc pattern.
17117 : Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
17118 : and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
17119 : It is sufficient to test that the operand modes are CCCmode. */
17120 959992 : else if (code == LTU
17121 366638 : && GET_CODE (op0) == NEG
17122 18 : && GET_CODE (XEXP (op0, 0)) == LTU
17123 0 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
17124 0 : && GET_CODE (op1) == GEU
17125 0 : && GET_MODE (XEXP (op1, 0)) == CCCmode)
17126 : return CCCmode;
17127 : /* Similarly for the comparison of addcarry/subborrow pattern. */
17128 366638 : else if (code == LTU
17129 366638 : && GET_CODE (op0) == ZERO_EXTEND
17130 15266 : && GET_CODE (op1) == PLUS
17131 11016 : && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
17132 11016 : && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
17133 : return CCCmode;
17134 : else
17135 948976 : return CCmode;
17136 : case GTU: /* CF=0 & ZF=0 */
17137 : case LEU: /* CF=1 | ZF=1 */
17138 : return CCmode;
17139 : /* Codes possibly doable only with sign flag when
17140 : comparing against zero. */
17141 788742 : case GE: /* SF=OF or SF=0 */
17142 788742 : case LT: /* SF<>OF or SF=1 */
17143 788742 : if (op1 == const0_rtx)
17144 : return CCGOCmode;
17145 : else
17146 : /* For other cases Carry flag is not required. */
17147 447606 : return CCGCmode;
17148 : /* Codes doable only with sign flag when comparing
17149 : against zero, but we miss jump instruction for it
17150 : so we need to use relational tests against overflow
17151 : that thus needs to be zero. */
17152 902213 : case GT: /* ZF=0 & SF=OF */
17153 902213 : case LE: /* ZF=1 | SF<>OF */
17154 902213 : if (op1 == const0_rtx)
17155 : return CCNOmode;
17156 : else
17157 599000 : return CCGCmode;
17158 : default:
17159 : /* CCmode should be used in all other cases. */
17160 : return CCmode;
17161 : }
17162 : }
17163 :
17164 : /* Return TRUE or FALSE depending on whether the ptest instruction
17165 : INSN has source and destination with suitable matching CC modes. */
17166 :
17167 : bool
17168 91731 : ix86_match_ptest_ccmode (rtx insn)
17169 : {
17170 91731 : rtx set, src;
17171 91731 : machine_mode set_mode;
17172 :
17173 91731 : set = PATTERN (insn);
17174 91731 : gcc_assert (GET_CODE (set) == SET);
17175 91731 : src = SET_SRC (set);
17176 91731 : gcc_assert (GET_CODE (src) == UNSPEC
17177 : && XINT (src, 1) == UNSPEC_PTEST);
17178 :
17179 91731 : set_mode = GET_MODE (src);
17180 91731 : if (set_mode != CCZmode
17181 : && set_mode != CCCmode
17182 : && set_mode != CCmode)
17183 : return false;
17184 91731 : return GET_MODE (SET_DEST (set)) == set_mode;
17185 : }
17186 :
17187 : /* Return the fixed registers used for condition codes. */
17188 :
17189 : static bool
17190 18543986 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17191 : {
17192 18543986 : *p1 = FLAGS_REG;
17193 18543986 : *p2 = INVALID_REGNUM;
17194 18543986 : return true;
17195 : }
17196 :
17197 : /* If two condition code modes are compatible, return a condition code
17198 : mode which is compatible with both. Otherwise, return
17199 : VOIDmode. */
17200 :
17201 : static machine_mode
17202 30486 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
17203 : {
17204 30486 : if (m1 == m2)
17205 : return m1;
17206 :
17207 29741 : if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17208 : return VOIDmode;
17209 :
17210 29741 : if ((m1 == CCGCmode && m2 == CCGOCmode)
17211 29741 : || (m1 == CCGOCmode && m2 == CCGCmode))
17212 : return CCGCmode;
17213 :
17214 29741 : if ((m1 == CCNOmode && m2 == CCGOCmode)
17215 29560 : || (m1 == CCGOCmode && m2 == CCNOmode))
17216 : return CCNOmode;
17217 :
17218 29448 : if (m1 == CCZmode
17219 15606 : && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
17220 : return m2;
17221 16972 : else if (m2 == CCZmode
17222 13586 : && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
17223 : return m1;
17224 :
17225 6589 : switch (m1)
17226 : {
17227 0 : default:
17228 0 : gcc_unreachable ();
17229 :
17230 6589 : case E_CCmode:
17231 6589 : case E_CCGCmode:
17232 6589 : case E_CCGOCmode:
17233 6589 : case E_CCNOmode:
17234 6589 : case E_CCAmode:
17235 6589 : case E_CCCmode:
17236 6589 : case E_CCOmode:
17237 6589 : case E_CCPmode:
17238 6589 : case E_CCSmode:
17239 6589 : case E_CCZmode:
17240 6589 : switch (m2)
17241 : {
17242 : default:
17243 : return VOIDmode;
17244 :
17245 : case E_CCmode:
17246 : case E_CCGCmode:
17247 : case E_CCGOCmode:
17248 : case E_CCNOmode:
17249 : case E_CCAmode:
17250 : case E_CCCmode:
17251 : case E_CCOmode:
17252 : case E_CCPmode:
17253 : case E_CCSmode:
17254 : case E_CCZmode:
17255 : return CCmode;
17256 : }
17257 :
17258 : case E_CCFPmode:
17259 : /* These are only compatible with themselves, which we already
17260 : checked above. */
17261 : return VOIDmode;
17262 : }
17263 : }
17264 :
17265 : /* Return strategy to use for floating-point. We assume that fcomi is always
17266 : preferable where available, since that is also true when looking at size
17267 : (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17268 :
17269 : enum ix86_fpcmp_strategy
17270 5547592 : ix86_fp_comparison_strategy (enum rtx_code)
17271 : {
17272 : /* Do fcomi/sahf based test when profitable. */
17273 :
17274 5547592 : if (TARGET_CMOVE)
17275 : return IX86_FPCMP_COMI;
17276 :
17277 0 : if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
17278 0 : return IX86_FPCMP_SAHF;
17279 :
17280 : return IX86_FPCMP_ARITH;
17281 : }
17282 :
17283 : /* Convert comparison codes we use to represent FP comparison to integer
17284 : code that will result in proper branch. Return UNKNOWN if no such code
17285 : is available. */
17286 :
17287 : enum rtx_code
17288 584771 : ix86_fp_compare_code_to_integer (enum rtx_code code)
17289 : {
17290 584771 : switch (code)
17291 : {
17292 : case GT:
17293 : return GTU;
17294 17891 : case GE:
17295 17891 : return GEU;
17296 : case ORDERED:
17297 : case UNORDERED:
17298 : return code;
17299 118982 : case UNEQ:
17300 118982 : return EQ;
17301 17396 : case UNLT:
17302 17396 : return LTU;
17303 31481 : case UNLE:
17304 31481 : return LEU;
17305 113696 : case LTGT:
17306 113696 : return NE;
17307 683 : case EQ:
17308 683 : case NE:
17309 683 : if (TARGET_AVX10_2)
17310 : return code;
17311 : /* FALLTHRU. */
17312 225 : default:
17313 225 : return UNKNOWN;
17314 : }
17315 : }
17316 :
17317 : /* Zero extend possibly SImode EXP to Pmode register. */
17318 : rtx
17319 44362 : ix86_zero_extend_to_Pmode (rtx exp)
17320 : {
17321 56194 : return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
17322 : }
17323 :
17324 : /* Return true if the function is called via PLT. */
17325 :
17326 : bool
17327 982043 : ix86_call_use_plt_p (rtx call_op)
17328 : {
17329 982043 : if (SYMBOL_REF_LOCAL_P (call_op))
17330 : {
17331 195837 : if (SYMBOL_REF_DECL (call_op)
17332 195837 : && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
17333 : {
17334 : /* NB: All ifunc functions must be called via PLT. */
17335 112609 : cgraph_node *node
17336 112609 : = cgraph_node::get (SYMBOL_REF_DECL (call_op));
17337 112609 : if (node && node->ifunc_resolver)
17338 : return true;
17339 : }
17340 195817 : return false;
17341 : }
17342 : return true;
17343 : }
17344 :
17345 : /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
17346 : the PLT entry will be used as the function address for local IFUNC
17347 : functions. When the PIC register is needed for PLT call, indirect
17348 : call via the PLT entry will fail since the PIC register may not be
17349 : set up properly for indirect call. In this case, we should return
17350 : false. */
17351 :
17352 : static bool
17353 765881824 : ix86_ifunc_ref_local_ok (void)
17354 : {
17355 765881824 : return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
17356 : }
17357 :
17358 : /* Return true if the function being called was marked with attribute
17359 : "noplt" or using -fno-plt and we are compiling for non-PIC. We need
17360 : to handle the non-PIC case in the backend because there is no easy
17361 : interface for the front-end to force non-PLT calls to use the GOT.
17362 : This is currently used only with 64-bit or 32-bit GOT32X ELF targets
17363 : to call the function marked "noplt" indirectly. */
17364 :
17365 : bool
17366 5897138 : ix86_nopic_noplt_attribute_p (rtx call_op)
17367 : {
17368 5411816 : if (flag_pic || ix86_cmodel == CM_LARGE
17369 : || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17370 : || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17371 11308954 : || SYMBOL_REF_LOCAL_P (call_op))
17372 : return false;
17373 :
17374 3785147 : tree symbol_decl = SYMBOL_REF_DECL (call_op);
17375 :
17376 3785147 : if (!flag_plt
17377 3785147 : || (symbol_decl != NULL_TREE
17378 3785115 : && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
17379 34 : return true;
17380 :
17381 : return false;
17382 : }
17383 :
17384 : /* Helper to output the jmp/call. */
17385 : static void
17386 33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17387 : {
17388 33 : if (thunk_name != NULL)
17389 : {
17390 22 : if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17391 1 : && ix86_indirect_branch_cs_prefix)
17392 1 : fprintf (asm_out_file, "\tcs\n");
17393 22 : fprintf (asm_out_file, "\tjmp\t");
17394 22 : assemble_name (asm_out_file, thunk_name);
17395 22 : putc ('\n', asm_out_file);
17396 22 : if ((ix86_harden_sls & harden_sls_indirect_jmp))
17397 2 : fputs ("\tint3\n", asm_out_file);
17398 : }
17399 : else
17400 11 : output_indirect_thunk (regno);
17401 33 : }
17402 :
17403 : /* Output indirect branch via a call and return thunk. CALL_OP is a
17404 : register which contains the branch target. XASM is the assembly
17405 : template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17406 : A normal call is converted to:
17407 :
17408 : call __x86_indirect_thunk_reg
17409 :
17410 : and a tail call is converted to:
17411 :
17412 : jmp __x86_indirect_thunk_reg
17413 : */
17414 :
17415 : static void
17416 50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17417 : {
17418 50 : char thunk_name_buf[32];
17419 50 : char *thunk_name;
17420 50 : enum indirect_thunk_prefix need_prefix
17421 50 : = indirect_thunk_need_prefix (current_output_insn);
17422 50 : int regno = REGNO (call_op);
17423 :
17424 50 : if (cfun->machine->indirect_branch_type
17425 50 : != indirect_branch_thunk_inline)
17426 : {
17427 39 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17428 16 : SET_HARD_REG_BIT (indirect_thunks_used, regno);
17429 :
17430 39 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17431 39 : thunk_name = thunk_name_buf;
17432 : }
17433 : else
17434 : thunk_name = NULL;
17435 :
17436 50 : if (sibcall_p)
17437 27 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17438 : else
17439 : {
17440 23 : if (thunk_name != NULL)
17441 : {
17442 17 : if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17443 1 : && ix86_indirect_branch_cs_prefix)
17444 1 : fprintf (asm_out_file, "\tcs\n");
17445 17 : fprintf (asm_out_file, "\tcall\t");
17446 17 : assemble_name (asm_out_file, thunk_name);
17447 17 : putc ('\n', asm_out_file);
17448 17 : return;
17449 : }
17450 :
17451 6 : char indirectlabel1[32];
17452 6 : char indirectlabel2[32];
17453 :
17454 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17455 : INDIRECT_LABEL,
17456 : indirectlabelno++);
17457 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17458 : INDIRECT_LABEL,
17459 : indirectlabelno++);
17460 :
17461 : /* Jump. */
17462 6 : fputs ("\tjmp\t", asm_out_file);
17463 6 : assemble_name_raw (asm_out_file, indirectlabel2);
17464 6 : fputc ('\n', asm_out_file);
17465 :
17466 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17467 :
17468 6 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17469 :
17470 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17471 :
17472 : /* Call. */
17473 6 : fputs ("\tcall\t", asm_out_file);
17474 6 : assemble_name_raw (asm_out_file, indirectlabel1);
17475 6 : fputc ('\n', asm_out_file);
17476 : }
17477 : }
17478 :
17479 : /* Output indirect branch via a call and return thunk. CALL_OP is
17480 : the branch target. XASM is the assembly template for CALL_OP.
17481 : Branch is a tail call if SIBCALL_P is true. A normal call is
17482 : converted to:
17483 :
17484 : jmp L2
17485 : L1:
17486 : push CALL_OP
17487 : jmp __x86_indirect_thunk
17488 : L2:
17489 : call L1
17490 :
17491 : and a tail call is converted to:
17492 :
17493 : push CALL_OP
17494 : jmp __x86_indirect_thunk
17495 : */
17496 :
17497 : static void
17498 0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17499 : bool sibcall_p)
17500 : {
17501 0 : char thunk_name_buf[32];
17502 0 : char *thunk_name;
17503 0 : char push_buf[64];
17504 0 : enum indirect_thunk_prefix need_prefix
17505 0 : = indirect_thunk_need_prefix (current_output_insn);
17506 0 : int regno = -1;
17507 :
17508 0 : if (cfun->machine->indirect_branch_type
17509 0 : != indirect_branch_thunk_inline)
17510 : {
17511 0 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17512 0 : indirect_thunk_needed = true;
17513 0 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17514 0 : thunk_name = thunk_name_buf;
17515 : }
17516 : else
17517 : thunk_name = NULL;
17518 :
17519 0 : snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
17520 0 : TARGET_64BIT ? 'q' : 'l', xasm);
17521 :
17522 0 : if (sibcall_p)
17523 : {
17524 0 : output_asm_insn (push_buf, &call_op);
17525 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17526 : }
17527 : else
17528 : {
17529 0 : char indirectlabel1[32];
17530 0 : char indirectlabel2[32];
17531 :
17532 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17533 : INDIRECT_LABEL,
17534 : indirectlabelno++);
17535 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17536 : INDIRECT_LABEL,
17537 : indirectlabelno++);
17538 :
17539 : /* Jump. */
17540 0 : fputs ("\tjmp\t", asm_out_file);
17541 0 : assemble_name_raw (asm_out_file, indirectlabel2);
17542 0 : fputc ('\n', asm_out_file);
17543 :
17544 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17545 :
17546 : /* An external function may be called via GOT, instead of PLT. */
17547 0 : if (MEM_P (call_op))
17548 : {
17549 0 : struct ix86_address parts;
17550 0 : rtx addr = XEXP (call_op, 0);
17551 0 : if (ix86_decompose_address (addr, &parts)
17552 0 : && parts.base == stack_pointer_rtx)
17553 : {
17554 : /* Since call will adjust stack by -UNITS_PER_WORD,
17555 : we must convert "disp(stack, index, scale)" to
17556 : "disp+UNITS_PER_WORD(stack, index, scale)". */
17557 0 : if (parts.index)
17558 : {
17559 0 : addr = gen_rtx_MULT (Pmode, parts.index,
17560 : GEN_INT (parts.scale));
17561 0 : addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17562 : addr);
17563 : }
17564 : else
17565 : addr = stack_pointer_rtx;
17566 :
17567 0 : rtx disp;
17568 0 : if (parts.disp != NULL_RTX)
17569 0 : disp = plus_constant (Pmode, parts.disp,
17570 0 : UNITS_PER_WORD);
17571 : else
17572 0 : disp = GEN_INT (UNITS_PER_WORD);
17573 :
17574 0 : addr = gen_rtx_PLUS (Pmode, addr, disp);
17575 0 : call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17576 : }
17577 : }
17578 :
17579 0 : output_asm_insn (push_buf, &call_op);
17580 :
17581 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17582 :
17583 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17584 :
17585 : /* Call. */
17586 0 : fputs ("\tcall\t", asm_out_file);
17587 0 : assemble_name_raw (asm_out_file, indirectlabel1);
17588 0 : fputc ('\n', asm_out_file);
17589 : }
17590 0 : }
17591 :
17592 : /* Output indirect branch via a call and return thunk. CALL_OP is
17593 : the branch target. XASM is the assembly template for CALL_OP.
17594 : Branch is a tail call if SIBCALL_P is true. */
17595 :
17596 : static void
17597 50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
17598 : bool sibcall_p)
17599 : {
17600 50 : if (REG_P (call_op))
17601 50 : ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17602 : else
17603 0 : ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17604 50 : }
17605 :
17606 : /* Output indirect jump. CALL_OP is the jump target. */
17607 :
17608 : const char *
17609 8466 : ix86_output_indirect_jmp (rtx call_op)
17610 : {
17611 8466 : if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17612 : {
17613 : /* We can't have red-zone since "call" in the indirect thunk
17614 : pushes the return address onto stack, destroying red-zone. */
17615 4 : if (ix86_red_zone_used)
17616 0 : gcc_unreachable ();
17617 :
17618 4 : ix86_output_indirect_branch (call_op, "%0", true);
17619 : }
17620 : else
17621 8462 : output_asm_insn ("%!jmp\t%A0", &call_op);
17622 8466 : return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17623 : }
17624 :
17625 : /* Output return instrumentation for current function if needed. */
17626 :
17627 : static void
17628 1715110 : output_return_instrumentation (void)
17629 : {
17630 1715110 : if (ix86_instrument_return != instrument_return_none
17631 6 : && flag_fentry
17632 1715116 : && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17633 : {
17634 5 : if (ix86_flag_record_return)
17635 5 : fprintf (asm_out_file, "1:\n");
17636 5 : switch (ix86_instrument_return)
17637 : {
17638 2 : case instrument_return_call:
17639 2 : fprintf (asm_out_file, "\tcall\t__return__\n");
17640 2 : break;
17641 3 : case instrument_return_nop5:
17642 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17643 3 : fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17644 3 : break;
17645 : case instrument_return_none:
17646 : break;
17647 : }
17648 :
17649 5 : if (ix86_flag_record_return)
17650 : {
17651 5 : fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17652 5 : fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17653 5 : fprintf (asm_out_file, "\t.previous\n");
17654 : }
17655 : }
17656 1715110 : }
17657 :
17658 : /* Output function return. CALL_OP is the jump target. Add a REP
17659 : prefix to RET if LONG_P is true and function return is kept. */
17660 :
17661 : const char *
17662 1588444 : ix86_output_function_return (bool long_p)
17663 : {
17664 1588444 : output_return_instrumentation ();
17665 :
17666 1588444 : if (cfun->machine->function_return_type != indirect_branch_keep)
17667 : {
17668 17 : char thunk_name[32];
17669 17 : enum indirect_thunk_prefix need_prefix
17670 17 : = indirect_thunk_need_prefix (current_output_insn);
17671 :
17672 17 : if (cfun->machine->function_return_type
17673 17 : != indirect_branch_thunk_inline)
17674 : {
17675 12 : bool need_thunk = (cfun->machine->function_return_type
17676 : == indirect_branch_thunk);
17677 12 : indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17678 : true);
17679 12 : indirect_return_needed |= need_thunk;
17680 12 : fprintf (asm_out_file, "\tjmp\t");
17681 12 : assemble_name (asm_out_file, thunk_name);
17682 12 : putc ('\n', asm_out_file);
17683 : }
17684 : else
17685 5 : output_indirect_thunk (INVALID_REGNUM);
17686 :
17687 17 : return "";
17688 : }
17689 :
17690 3176365 : output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17691 1588427 : return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17692 : }
17693 :
17694 : /* Output indirect function return. RET_OP is the function return
17695 : target. */
17696 :
17697 : const char *
17698 17 : ix86_output_indirect_function_return (rtx ret_op)
17699 : {
17700 17 : if (cfun->machine->function_return_type != indirect_branch_keep)
17701 : {
17702 0 : char thunk_name[32];
17703 0 : enum indirect_thunk_prefix need_prefix
17704 0 : = indirect_thunk_need_prefix (current_output_insn);
17705 0 : unsigned int regno = REGNO (ret_op);
17706 0 : gcc_assert (regno == CX_REG);
17707 :
17708 0 : if (cfun->machine->function_return_type
17709 0 : != indirect_branch_thunk_inline)
17710 : {
17711 0 : bool need_thunk = (cfun->machine->function_return_type
17712 : == indirect_branch_thunk);
17713 0 : indirect_thunk_name (thunk_name, regno, need_prefix, true);
17714 :
17715 0 : if (need_thunk)
17716 : {
17717 0 : indirect_return_via_cx = true;
17718 0 : SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17719 : }
17720 0 : fprintf (asm_out_file, "\tjmp\t");
17721 0 : assemble_name (asm_out_file, thunk_name);
17722 0 : putc ('\n', asm_out_file);
17723 : }
17724 : else
17725 0 : output_indirect_thunk (regno);
17726 : }
17727 : else
17728 : {
17729 17 : output_asm_insn ("%!jmp\t%A0", &ret_op);
17730 17 : if (ix86_harden_sls & harden_sls_indirect_jmp)
17731 1 : fputs ("\tint3\n", asm_out_file);
17732 : }
17733 17 : return "";
17734 : }
17735 :
17736 : /* Output the assembly for a call instruction. */
17737 :
17738 : const char *
17739 6077204 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17740 : {
17741 6077204 : bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17742 6077204 : bool output_indirect_p
17743 : = (!TARGET_SEH
17744 6077204 : && cfun->machine->indirect_branch_type != indirect_branch_keep);
17745 6077204 : bool seh_nop_p = false;
17746 6077204 : const char *xasm;
17747 :
17748 6077204 : if (SIBLING_CALL_P (insn))
17749 : {
17750 126666 : output_return_instrumentation ();
17751 126666 : if (direct_p)
17752 : {
17753 117060 : if (ix86_nopic_noplt_attribute_p (call_op))
17754 : {
17755 4 : direct_p = false;
17756 4 : if (TARGET_64BIT)
17757 : {
17758 4 : if (output_indirect_p)
17759 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17760 : else
17761 4 : xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17762 : }
17763 : else
17764 : {
17765 0 : if (output_indirect_p)
17766 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17767 : else
17768 0 : xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17769 : }
17770 : }
17771 : else
17772 : xasm = "%!jmp\t%P0";
17773 : }
17774 : /* SEH epilogue detection requires the indirect branch case
17775 : to include REX.W. */
17776 9606 : else if (TARGET_SEH)
17777 : xasm = "%!rex.W jmp\t%A0";
17778 : else
17779 : {
17780 9606 : if (output_indirect_p)
17781 : xasm = "%0";
17782 : else
17783 9583 : xasm = "%!jmp\t%A0";
17784 : }
17785 :
17786 126666 : if (output_indirect_p && !direct_p)
17787 23 : ix86_output_indirect_branch (call_op, xasm, true);
17788 : else
17789 : {
17790 126643 : output_asm_insn (xasm, &call_op);
17791 126643 : if (!direct_p
17792 9587 : && (ix86_harden_sls & harden_sls_indirect_jmp))
17793 : return "int3";
17794 : }
17795 126665 : return "";
17796 : }
17797 :
17798 : /* SEH unwinding can require an extra nop to be emitted in several
17799 : circumstances. Determine if we have one of those. */
17800 5950538 : if (TARGET_SEH)
17801 : {
17802 : rtx_insn *i;
17803 :
17804 : for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17805 : {
17806 : /* Prevent a catch region from being adjacent to a jump that would
17807 : be interpreted as an epilogue sequence by the unwinder. */
17808 : if (JUMP_P(i) && CROSSING_JUMP_P (i))
17809 : {
17810 : seh_nop_p = true;
17811 : break;
17812 : }
17813 :
17814 : /* If we get to another real insn, we don't need the nop. */
17815 : if (INSN_P (i))
17816 : break;
17817 :
17818 : /* If we get to the epilogue note, prevent a catch region from
17819 : being adjacent to the standard epilogue sequence. Note that,
17820 : if non-call exceptions are enabled, we already did it during
17821 : epilogue expansion, or else, if the insn can throw internally,
17822 : we already did it during the reorg pass. */
17823 : if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17824 : && !flag_non_call_exceptions
17825 : && !can_throw_internal (insn))
17826 : {
17827 : seh_nop_p = true;
17828 : break;
17829 : }
17830 : }
17831 :
17832 : /* If we didn't find a real insn following the call, prevent the
17833 : unwinder from looking into the next function. */
17834 : if (i == NULL)
17835 : seh_nop_p = true;
17836 : }
17837 :
17838 5950538 : if (direct_p)
17839 : {
17840 5779056 : if (ix86_nopic_noplt_attribute_p (call_op))
17841 : {
17842 6 : direct_p = false;
17843 6 : if (TARGET_64BIT)
17844 : {
17845 6 : if (output_indirect_p)
17846 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17847 : else
17848 6 : xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17849 : }
17850 : else
17851 : {
17852 0 : if (output_indirect_p)
17853 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17854 : else
17855 0 : xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17856 : }
17857 : }
17858 : else
17859 : xasm = "%!call\t%P0";
17860 : }
17861 : else
17862 : {
17863 171482 : if (output_indirect_p)
17864 : xasm = "%0";
17865 : else
17866 171459 : xasm = "%!call\t%A0";
17867 : }
17868 :
17869 5950538 : if (output_indirect_p && !direct_p)
17870 23 : ix86_output_indirect_branch (call_op, xasm, false);
17871 : else
17872 5950515 : output_asm_insn (xasm, &call_op);
17873 :
17874 : if (seh_nop_p)
17875 : return "nop";
17876 :
17877 : return "";
17878 : }
17879 :
17880 : /* Return a MEM corresponding to a stack slot with mode MODE.
17881 : Allocate a new slot if necessary.
17882 :
17883 : The RTL for a function can have several slots available: N is
17884 : which slot to use. */
17885 :
17886 : rtx
17887 22370 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17888 : {
17889 22370 : struct stack_local_entry *s;
17890 :
17891 22370 : gcc_assert (n < MAX_386_STACK_LOCALS);
17892 :
17893 33733 : for (s = ix86_stack_locals; s; s = s->next)
17894 31150 : if (s->mode == mode && s->n == n)
17895 19787 : return validize_mem (copy_rtx (s->rtl));
17896 :
17897 2583 : int align = 0;
17898 : /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17899 : alignment with -m32 -mpreferred-stack-boundary=2. */
17900 2583 : if (mode == DImode
17901 329 : && !TARGET_64BIT
17902 329 : && n == SLOT_FLOATxFDI_387
17903 2912 : && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17904 : align = 32;
17905 2583 : s = ggc_alloc<stack_local_entry> ();
17906 2583 : s->n = n;
17907 2583 : s->mode = mode;
17908 5166 : s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17909 :
17910 2583 : s->next = ix86_stack_locals;
17911 2583 : ix86_stack_locals = s;
17912 2583 : return validize_mem (copy_rtx (s->rtl));
17913 : }
17914 :
17915 : static void
17916 1488365 : ix86_instantiate_decls (void)
17917 : {
17918 1488365 : struct stack_local_entry *s;
17919 :
17920 1488365 : for (s = ix86_stack_locals; s; s = s->next)
17921 0 : if (s->rtl != NULL_RTX)
17922 0 : instantiate_decl_rtl (s->rtl);
17923 1488365 : }
17924 :
17925 : /* Check whether x86 address PARTS is a pc-relative address. */
17926 :
17927 : bool
17928 26363425 : ix86_rip_relative_addr_p (struct ix86_address *parts)
17929 : {
17930 26363425 : rtx base, index, disp;
17931 :
17932 26363425 : base = parts->base;
17933 26363425 : index = parts->index;
17934 26363425 : disp = parts->disp;
17935 :
17936 26363425 : if (disp && !base && !index)
17937 : {
17938 24606788 : if (TARGET_64BIT)
17939 : {
17940 22941988 : rtx symbol = disp;
17941 :
17942 22941988 : if (GET_CODE (disp) == CONST)
17943 6880660 : symbol = XEXP (disp, 0);
17944 22941988 : if (GET_CODE (symbol) == PLUS
17945 6366369 : && CONST_INT_P (XEXP (symbol, 1)))
17946 6366369 : symbol = XEXP (symbol, 0);
17947 :
17948 22941988 : if (LABEL_REF_P (symbol)
17949 22934350 : || (SYMBOL_REF_P (symbol)
17950 21669546 : && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17951 24206792 : || (GET_CODE (symbol) == UNSPEC
17952 533179 : && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17953 : || XINT (symbol, 1) == UNSPEC_PCREL
17954 : || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17955 22182814 : return true;
17956 : }
17957 : }
17958 : return false;
17959 : }
17960 :
17961 : /* Calculate the length of the memory address in the instruction encoding.
17962 : Includes addr32 prefix, does not include the one-byte modrm, opcode,
17963 : or other prefixes. We never generate addr32 prefix for LEA insn. */
17964 :
17965 : int
17966 270573577 : memory_address_length (rtx addr, bool lea)
17967 : {
17968 270573577 : struct ix86_address parts;
17969 270573577 : rtx base, index, disp;
17970 270573577 : int len;
17971 270573577 : int ok;
17972 :
17973 270573577 : if (GET_CODE (addr) == PRE_DEC
17974 262010413 : || GET_CODE (addr) == POST_INC
17975 257510338 : || GET_CODE (addr) == PRE_MODIFY
17976 257510338 : || GET_CODE (addr) == POST_MODIFY)
17977 : return 0;
17978 :
17979 257510338 : ok = ix86_decompose_address (addr, &parts);
17980 257510338 : gcc_assert (ok);
17981 :
17982 257510338 : len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17983 :
17984 : /* If this is not LEA instruction, add the length of addr32 prefix. */
17985 218909428 : if (TARGET_64BIT && !lea
17986 451501233 : && (SImode_address_operand (addr, VOIDmode)
17987 193990742 : || (parts.base && GET_MODE (parts.base) == SImode)
17988 193980512 : || (parts.index && GET_MODE (parts.index) == SImode)))
17989 10383 : len++;
17990 :
17991 257510338 : base = parts.base;
17992 257510338 : index = parts.index;
17993 257510338 : disp = parts.disp;
17994 :
17995 257510338 : if (base && SUBREG_P (base))
17996 2 : base = SUBREG_REG (base);
17997 257510338 : if (index && SUBREG_P (index))
17998 0 : index = SUBREG_REG (index);
17999 :
18000 257510338 : gcc_assert (base == NULL_RTX || REG_P (base));
18001 257510338 : gcc_assert (index == NULL_RTX || REG_P (index));
18002 :
18003 : /* Rule of thumb:
18004 : - esp as the base always wants an index,
18005 : - ebp as the base always wants a displacement,
18006 : - r12 as the base always wants an index,
18007 : - r13 as the base always wants a displacement. */
18008 :
18009 : /* Register Indirect. */
18010 257510338 : if (base && !index && !disp)
18011 : {
18012 : /* esp (for its index) and ebp (for its displacement) need
18013 : the two-byte modrm form. Similarly for r12 and r13 in 64-bit
18014 : code. */
18015 16713556 : if (base == arg_pointer_rtx
18016 16713556 : || base == frame_pointer_rtx
18017 16713556 : || REGNO (base) == SP_REG
18018 9988927 : || REGNO (base) == BP_REG
18019 9988927 : || REGNO (base) == R12_REG
18020 26255190 : || REGNO (base) == R13_REG)
18021 7171922 : len++;
18022 : }
18023 :
18024 : /* Direct Addressing. In 64-bit mode mod 00 r/m 5
18025 : is not disp32, but disp32(%rip), so for disp32
18026 : SIB byte is needed, unless print_operand_address
18027 : optimizes it into disp32(%rip) or (%rip) is implied
18028 : by UNSPEC. */
18029 240796782 : else if (disp && !base && !index)
18030 : {
18031 23631298 : len += 4;
18032 23631298 : if (!ix86_rip_relative_addr_p (&parts))
18033 1851259 : len++;
18034 : }
18035 : else
18036 : {
18037 : /* Find the length of the displacement constant. */
18038 217165484 : if (disp)
18039 : {
18040 213086325 : if (base && satisfies_constraint_K (disp))
18041 123270110 : len += 1;
18042 : else
18043 89816215 : len += 4;
18044 : }
18045 : /* ebp always wants a displacement. Similarly r13. */
18046 4079159 : else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
18047 9299 : len++;
18048 :
18049 : /* An index requires the two-byte modrm form.... */
18050 217165484 : if (index
18051 : /* ...like esp (or r12), which always wants an index. */
18052 206436840 : || base == arg_pointer_rtx
18053 206436840 : || base == frame_pointer_rtx
18054 423602324 : || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
18055 155379764 : len++;
18056 : }
18057 :
18058 : return len;
18059 : }
18060 :
18061 : /* Compute default value for "length_immediate" attribute. When SHORTFORM
18062 : is set, expect that insn have 8bit immediate alternative. */
18063 : int
18064 315708902 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
18065 : {
18066 315708902 : int len = 0;
18067 315708902 : int i;
18068 315708902 : extract_insn_cached (insn);
18069 985380920 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18070 669672018 : if (CONSTANT_P (recog_data.operand[i]))
18071 : {
18072 138576588 : enum attr_mode mode = get_attr_mode (insn);
18073 :
18074 138576588 : gcc_assert (!len);
18075 138576588 : if (shortform && CONST_INT_P (recog_data.operand[i]))
18076 : {
18077 37440632 : HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
18078 37440632 : switch (mode)
18079 : {
18080 1281157 : case MODE_QI:
18081 1281157 : len = 1;
18082 1281157 : continue;
18083 436587 : case MODE_HI:
18084 436587 : ival = trunc_int_for_mode (ival, HImode);
18085 436587 : break;
18086 15895465 : case MODE_SI:
18087 15895465 : ival = trunc_int_for_mode (ival, SImode);
18088 15895465 : break;
18089 : default:
18090 : break;
18091 : }
18092 36159475 : if (IN_RANGE (ival, -128, 127))
18093 : {
18094 32052059 : len = 1;
18095 32052059 : continue;
18096 : }
18097 : }
18098 105243372 : switch (mode)
18099 : {
18100 : case MODE_QI:
18101 : len = 1;
18102 : break;
18103 : case MODE_HI:
18104 669672018 : len = 2;
18105 : break;
18106 : case MODE_SI:
18107 99864582 : len = 4;
18108 : break;
18109 : /* Immediates for DImode instructions are encoded
18110 : as 32bit sign extended values. */
18111 : case MODE_DI:
18112 99864582 : len = 4;
18113 : break;
18114 0 : default:
18115 0 : fatal_insn ("unknown insn mode", insn);
18116 : }
18117 : }
18118 315708902 : return len;
18119 : }
18120 :
18121 : /* Compute default value for "length_address" attribute. */
18122 : int
18123 443218305 : ix86_attr_length_address_default (rtx_insn *insn)
18124 : {
18125 443218305 : int i;
18126 :
18127 443218305 : if (get_attr_type (insn) == TYPE_LEA)
18128 : {
18129 27626149 : rtx set = PATTERN (insn), addr;
18130 :
18131 27626149 : if (GET_CODE (set) == PARALLEL)
18132 86956 : set = XVECEXP (set, 0, 0);
18133 :
18134 27626149 : gcc_assert (GET_CODE (set) == SET);
18135 :
18136 27626149 : addr = SET_SRC (set);
18137 :
18138 27626149 : return memory_address_length (addr, true);
18139 : }
18140 :
18141 415592156 : extract_insn_cached (insn);
18142 954527493 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18143 : {
18144 781597049 : rtx op = recog_data.operand[i];
18145 781597049 : if (MEM_P (op))
18146 : {
18147 242940755 : constrain_operands_cached (insn, reload_completed);
18148 242940755 : if (which_alternative != -1)
18149 : {
18150 242940755 : const char *constraints = recog_data.constraints[i];
18151 242940755 : int alt = which_alternative;
18152 :
18153 385076158 : while (*constraints == '=' || *constraints == '+')
18154 142135403 : constraints++;
18155 1102394327 : while (alt-- > 0)
18156 2108641635 : while (*constraints++ != ',')
18157 : ;
18158 : /* Skip ignored operands. */
18159 242940755 : if (*constraints == 'X')
18160 279043 : continue;
18161 : }
18162 :
18163 242661712 : int len = memory_address_length (XEXP (op, 0), false);
18164 :
18165 : /* Account for segment prefix for non-default addr spaces. */
18166 255846796 : if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
18167 784055 : len++;
18168 :
18169 242661712 : return len;
18170 : }
18171 : }
18172 : return 0;
18173 : }
18174 :
18175 : /* Compute default value for "length_vex" attribute. It includes
18176 : 2 or 3 byte VEX prefix and 1 opcode byte. */
18177 :
18178 : int
18179 5079429 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
18180 : bool has_vex_w)
18181 : {
18182 5079429 : int i, reg_only = 2 + 1;
18183 5079429 : bool has_mem = false;
18184 :
18185 : /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18186 : byte VEX prefix. */
18187 5079429 : if (!has_0f_opcode || has_vex_w)
18188 : return 3 + 1;
18189 :
18190 : /* We can always use 2 byte VEX prefix in 32bit. */
18191 4641315 : if (!TARGET_64BIT)
18192 : return 2 + 1;
18193 :
18194 3526158 : extract_insn_cached (insn);
18195 :
18196 11051946 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18197 7828093 : if (REG_P (recog_data.operand[i]))
18198 : {
18199 : /* REX.W bit uses 3 byte VEX prefix.
18200 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18201 5158101 : if (GET_MODE (recog_data.operand[i]) == DImode
18202 5158101 : && GENERAL_REG_P (recog_data.operand[i]))
18203 : return 3 + 1;
18204 :
18205 : /* REX.B bit requires 3-byte VEX. Right here we don't know which
18206 : operand will be encoded using VEX.B, so be conservative.
18207 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18208 5145004 : if (REX_INT_REGNO_P (recog_data.operand[i])
18209 5145004 : || REX2_INT_REGNO_P (recog_data.operand[i])
18210 5145004 : || REX_SSE_REGNO_P (recog_data.operand[i]))
18211 0 : reg_only = 3 + 1;
18212 : }
18213 2669992 : else if (MEM_P (recog_data.operand[i]))
18214 : {
18215 : /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
18216 2043400 : if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
18217 : return 4;
18218 :
18219 : /* REX.X or REX.B bits use 3 byte VEX prefix. */
18220 2043159 : if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
18221 : return 3 + 1;
18222 :
18223 : has_mem = true;
18224 : }
18225 :
18226 3223853 : return has_mem ? 2 + 1 : reg_only;
18227 : }
18228 :
18229 :
18230 : static bool
18231 : ix86_class_likely_spilled_p (reg_class_t);
18232 :
18233 : /* Returns true if lhs of insn is HW function argument register and set up
18234 : is_spilled to true if it is likely spilled HW register. */
18235 : static bool
18236 1149 : insn_is_function_arg (rtx insn, bool* is_spilled)
18237 : {
18238 1149 : rtx dst;
18239 :
18240 1149 : if (!NONDEBUG_INSN_P (insn))
18241 : return false;
18242 : /* Call instructions are not movable, ignore it. */
18243 1149 : if (CALL_P (insn))
18244 : return false;
18245 1075 : insn = PATTERN (insn);
18246 1075 : if (GET_CODE (insn) == PARALLEL)
18247 73 : insn = XVECEXP (insn, 0, 0);
18248 1075 : if (GET_CODE (insn) != SET)
18249 : return false;
18250 1075 : dst = SET_DEST (insn);
18251 979 : if (REG_P (dst) && HARD_REGISTER_P (dst)
18252 1948 : && ix86_function_arg_regno_p (REGNO (dst)))
18253 : {
18254 : /* Is it likely spilled HW register? */
18255 873 : if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
18256 873 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
18257 829 : *is_spilled = true;
18258 873 : return true;
18259 : }
18260 : return false;
18261 : }
18262 :
18263 : /* Add output dependencies for chain of function adjacent arguments if only
18264 : there is a move to likely spilled HW register. Return first argument
18265 : if at least one dependence was added or NULL otherwise. */
18266 : static rtx_insn *
18267 415 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
18268 : {
18269 415 : rtx_insn *insn;
18270 415 : rtx_insn *last = call;
18271 415 : rtx_insn *first_arg = NULL;
18272 415 : bool is_spilled = false;
18273 :
18274 415 : head = PREV_INSN (head);
18275 :
18276 : /* Find nearest to call argument passing instruction. */
18277 415 : while (true)
18278 : {
18279 415 : last = PREV_INSN (last);
18280 415 : if (last == head)
18281 : return NULL;
18282 415 : if (!NONDEBUG_INSN_P (last))
18283 0 : continue;
18284 415 : if (insn_is_function_arg (last, &is_spilled))
18285 : break;
18286 : return NULL;
18287 : }
18288 :
18289 : first_arg = last;
18290 1054 : while (true)
18291 : {
18292 1054 : insn = PREV_INSN (last);
18293 1054 : if (!INSN_P (insn))
18294 : break;
18295 956 : if (insn == head)
18296 : break;
18297 915 : if (!NONDEBUG_INSN_P (insn))
18298 : {
18299 181 : last = insn;
18300 181 : continue;
18301 : }
18302 734 : if (insn_is_function_arg (insn, &is_spilled))
18303 : {
18304 : /* Add output dependence between two function arguments if chain
18305 : of output arguments contains likely spilled HW registers. */
18306 466 : if (is_spilled)
18307 466 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18308 : first_arg = last = insn;
18309 : }
18310 : else
18311 : break;
18312 : }
18313 407 : if (!is_spilled)
18314 : return NULL;
18315 : return first_arg;
18316 : }
18317 :
18318 : /* Add output or anti dependency from insn to first_arg to restrict its code
18319 : motion. */
18320 : static void
18321 2333 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
18322 : {
18323 2333 : rtx set;
18324 2333 : rtx tmp;
18325 :
18326 2333 : set = single_set (insn);
18327 2333 : if (!set)
18328 : return;
18329 1453 : tmp = SET_DEST (set);
18330 1453 : if (REG_P (tmp))
18331 : {
18332 : /* Add output dependency to the first function argument. */
18333 1258 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18334 1258 : return;
18335 : }
18336 : /* Add anti dependency. */
18337 195 : add_dependence (first_arg, insn, REG_DEP_ANTI);
18338 : }
18339 :
18340 : /* Avoid cross block motion of function argument through adding dependency
18341 : from the first non-jump instruction in bb. */
18342 : static void
18343 68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
18344 : {
18345 68 : rtx_insn *insn = BB_END (bb);
18346 :
18347 134 : while (insn)
18348 : {
18349 134 : if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
18350 : {
18351 67 : rtx set = single_set (insn);
18352 67 : if (set)
18353 : {
18354 67 : avoid_func_arg_motion (arg, insn);
18355 67 : return;
18356 : }
18357 : }
18358 67 : if (insn == BB_HEAD (bb))
18359 : return;
18360 66 : insn = PREV_INSN (insn);
18361 : }
18362 : }
18363 :
18364 : /* Hook for pre-reload schedule - avoid motion of function arguments
18365 : passed in likely spilled HW registers. */
18366 : static void
18367 10162244 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18368 : {
18369 10162244 : rtx_insn *insn;
18370 10162244 : rtx_insn *first_arg = NULL;
18371 10162244 : if (reload_completed)
18372 : return;
18373 2268 : while (head != tail && DEBUG_INSN_P (head))
18374 766 : head = NEXT_INSN (head);
18375 10882 : for (insn = tail; insn != head; insn = PREV_INSN (insn))
18376 9516 : if (INSN_P (insn) && CALL_P (insn))
18377 : {
18378 415 : first_arg = add_parameter_dependencies (insn, head);
18379 415 : if (first_arg)
18380 : {
18381 : /* Add dependee for first argument to predecessors if only
18382 : region contains more than one block. */
18383 407 : basic_block bb = BLOCK_FOR_INSN (insn);
18384 407 : int rgn = CONTAINING_RGN (bb->index);
18385 407 : int nr_blks = RGN_NR_BLOCKS (rgn);
18386 : /* Skip trivial regions and region head blocks that can have
18387 : predecessors outside of region. */
18388 407 : if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18389 : {
18390 67 : edge e;
18391 67 : edge_iterator ei;
18392 :
18393 : /* Regions are SCCs with the exception of selective
18394 : scheduling with pipelining of outer blocks enabled.
18395 : So also check that immediate predecessors of a non-head
18396 : block are in the same region. */
18397 137 : FOR_EACH_EDGE (e, ei, bb->preds)
18398 : {
18399 : /* Avoid creating of loop-carried dependencies through
18400 : using topological ordering in the region. */
18401 70 : if (rgn == CONTAINING_RGN (e->src->index)
18402 69 : && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18403 68 : add_dependee_for_func_arg (first_arg, e->src);
18404 : }
18405 : }
18406 407 : insn = first_arg;
18407 407 : if (insn == head)
18408 : break;
18409 : }
18410 : }
18411 9101 : else if (first_arg)
18412 2266 : avoid_func_arg_motion (first_arg, insn);
18413 : }
18414 :
18415 : /* Hook for pre-reload schedule - set priority of moves from likely spilled
18416 : HW registers to maximum, to schedule them at soon as possible. These are
18417 : moves from function argument registers at the top of the function entry
18418 : and moves from function return value registers after call. */
18419 : static int
18420 106666117 : ix86_adjust_priority (rtx_insn *insn, int priority)
18421 : {
18422 106666117 : rtx set;
18423 :
18424 106666117 : if (reload_completed)
18425 : return priority;
18426 :
18427 14746 : if (!NONDEBUG_INSN_P (insn))
18428 : return priority;
18429 :
18430 12688 : set = single_set (insn);
18431 12688 : if (set)
18432 : {
18433 12080 : rtx tmp = SET_SRC (set);
18434 12080 : if (REG_P (tmp)
18435 2532 : && HARD_REGISTER_P (tmp)
18436 500 : && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18437 12080 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18438 449 : return current_sched_info->sched_max_insns_priority;
18439 : }
18440 :
18441 : return priority;
18442 : }
18443 :
18444 : /* Prepare for scheduling pass. */
18445 : static void
18446 961800 : ix86_sched_init_global (FILE *, int, int)
18447 : {
18448 : /* Install scheduling hooks for current CPU. Some of these hooks are used
18449 : in time-critical parts of the scheduler, so we only set them up when
18450 : they are actually used. */
18451 961800 : switch (ix86_tune)
18452 : {
18453 915249 : case PROCESSOR_CORE2:
18454 915249 : case PROCESSOR_NEHALEM:
18455 915249 : case PROCESSOR_SANDYBRIDGE:
18456 915249 : case PROCESSOR_HASWELL:
18457 915249 : case PROCESSOR_TREMONT:
18458 915249 : case PROCESSOR_ALDERLAKE:
18459 915249 : case PROCESSOR_GENERIC:
18460 : /* Do not perform multipass scheduling for pre-reload schedule
18461 : to save compile time. */
18462 915249 : if (reload_completed)
18463 : {
18464 914762 : ix86_core2i7_init_hooks ();
18465 914762 : break;
18466 : }
18467 : /* Fall through. */
18468 47038 : default:
18469 47038 : targetm.sched.dfa_post_advance_cycle = NULL;
18470 47038 : targetm.sched.first_cycle_multipass_init = NULL;
18471 47038 : targetm.sched.first_cycle_multipass_begin = NULL;
18472 47038 : targetm.sched.first_cycle_multipass_issue = NULL;
18473 47038 : targetm.sched.first_cycle_multipass_backtrack = NULL;
18474 47038 : targetm.sched.first_cycle_multipass_end = NULL;
18475 47038 : targetm.sched.first_cycle_multipass_fini = NULL;
18476 47038 : break;
18477 : }
18478 961800 : }
18479 :
18480 :
18481 : /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18482 :
18483 : static HOST_WIDE_INT
18484 725219 : ix86_static_rtx_alignment (machine_mode mode)
18485 : {
18486 725219 : if (mode == DFmode)
18487 : return 64;
18488 : if (ALIGN_MODE_128 (mode))
18489 155571 : return MAX (128, GET_MODE_ALIGNMENT (mode));
18490 484289 : return GET_MODE_ALIGNMENT (mode);
18491 : }
18492 :
18493 : /* Implement TARGET_CONSTANT_ALIGNMENT. */
18494 :
18495 : static HOST_WIDE_INT
18496 6760707 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18497 : {
18498 6760707 : if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18499 : || TREE_CODE (exp) == INTEGER_CST)
18500 : {
18501 370552 : machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18502 370552 : HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18503 370552 : return MAX (mode_align, align);
18504 : }
18505 6247724 : else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18506 9366687 : && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18507 : return BITS_PER_WORD;
18508 :
18509 : return align;
18510 : }
18511 :
18512 : /* Implement TARGET_EMPTY_RECORD_P. */
18513 :
18514 : static bool
18515 1455060077 : ix86_is_empty_record (const_tree type)
18516 : {
18517 1455060077 : if (!TARGET_64BIT)
18518 : return false;
18519 1424107672 : return default_is_empty_record (type);
18520 : }
18521 :
18522 : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18523 :
18524 : static void
18525 15174088 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18526 : {
18527 15174088 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
18528 :
18529 15174088 : if (!cum->warn_empty)
18530 : return;
18531 :
18532 13095379 : if (!TYPE_EMPTY_P (type))
18533 : return;
18534 :
18535 : /* Don't warn if the function isn't visible outside of the TU. */
18536 15038 : if (cum->decl && !TREE_PUBLIC (cum->decl))
18537 : return;
18538 :
18539 13554 : tree decl = cum->decl;
18540 13554 : if (!decl)
18541 : /* If we don't know the target, look at the current TU. */
18542 39 : decl = current_function_decl;
18543 :
18544 13554 : const_tree ctx = get_ultimate_context (decl);
18545 13554 : if (ctx == NULL_TREE
18546 27074 : || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18547 : return;
18548 :
18549 : /* If the actual size of the type is zero, then there is no change
18550 : in how objects of this size are passed. */
18551 72 : if (int_size_in_bytes (type) == 0)
18552 : return;
18553 :
18554 66 : warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18555 : "changes in %<-fabi-version=12%> (GCC 8)", type);
18556 :
18557 : /* Only warn once. */
18558 66 : cum->warn_empty = false;
18559 : }
18560 :
18561 : /* This hook returns name of multilib ABI. */
18562 :
18563 : static const char *
18564 3429975 : ix86_get_multilib_abi_name (void)
18565 : {
18566 3429975 : if (!(TARGET_64BIT_P (ix86_isa_flags)))
18567 : return "i386";
18568 3386019 : else if (TARGET_X32_P (ix86_isa_flags))
18569 : return "x32";
18570 : else
18571 3386019 : return "x86_64";
18572 : }
18573 :
18574 : /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18575 : the data type, and ALIGN is the alignment that the object would
18576 : ordinarily have. */
18577 :
18578 : static int
18579 0 : iamcu_alignment (tree type, int align)
18580 : {
18581 0 : machine_mode mode;
18582 :
18583 0 : if (align < 32 || TYPE_USER_ALIGN (type))
18584 : return align;
18585 :
18586 : /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18587 : bytes. */
18588 0 : type = strip_array_types (type);
18589 0 : if (TYPE_ATOMIC (type))
18590 : return align;
18591 :
18592 0 : mode = TYPE_MODE (type);
18593 0 : switch (GET_MODE_CLASS (mode))
18594 : {
18595 : case MODE_INT:
18596 : case MODE_COMPLEX_INT:
18597 : case MODE_COMPLEX_FLOAT:
18598 : case MODE_FLOAT:
18599 : case MODE_DECIMAL_FLOAT:
18600 : return 32;
18601 : default:
18602 : return align;
18603 : }
18604 : }
18605 :
18606 : /* Compute the alignment for a static variable.
18607 : TYPE is the data type, and ALIGN is the alignment that
18608 : the object would ordinarily have. The value of this function is used
18609 : instead of that alignment to align the object. */
18610 :
18611 : int
18612 12060313 : ix86_data_alignment (tree type, unsigned int align, bool opt)
18613 : {
18614 : /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18615 : for symbols from other compilation units or symbols that don't need
18616 : to bind locally. In order to preserve some ABI compatibility with
18617 : those compilers, ensure we don't decrease alignment from what we
18618 : used to assume. */
18619 :
18620 12060313 : unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18621 :
18622 : /* A data structure, equal or greater than the size of a cache line
18623 : (64 bytes in the Pentium 4 and other recent Intel processors, including
18624 : processors based on Intel Core microarchitecture) should be aligned
18625 : so that its base address is a multiple of a cache line size. */
18626 :
18627 24120626 : unsigned int max_align
18628 12060313 : = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18629 :
18630 14650129 : if (max_align < BITS_PER_WORD)
18631 0 : max_align = BITS_PER_WORD;
18632 :
18633 12060313 : switch (ix86_align_data_type)
18634 : {
18635 12060313 : case ix86_align_data_type_abi: opt = false; break;
18636 12060293 : case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18637 : case ix86_align_data_type_cacheline: break;
18638 : }
18639 :
18640 12060313 : if (TARGET_IAMCU)
18641 0 : align = iamcu_alignment (type, align);
18642 :
18643 12060313 : if (opt
18644 5805152 : && AGGREGATE_TYPE_P (type)
18645 3721870 : && TYPE_SIZE (type)
18646 15782131 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18647 : {
18648 6742085 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18649 3721818 : && align < max_align_compat)
18650 701551 : align = max_align_compat;
18651 7380499 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18652 3721818 : && align < max_align)
18653 63137 : align = max_align;
18654 : }
18655 :
18656 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18657 : to 16byte boundary. */
18658 12060313 : if (TARGET_64BIT)
18659 : {
18660 4959493 : if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18661 3273501 : && TYPE_SIZE (type)
18662 3273439 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18663 10908123 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18664 11524296 : && align < 128)
18665 616173 : return 128;
18666 : }
18667 :
18668 11444140 : if (!opt)
18669 6059608 : return align;
18670 :
18671 5384532 : if (TREE_CODE (type) == ARRAY_TYPE)
18672 : {
18673 1105579 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18674 : return 64;
18675 1105579 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18676 : return 128;
18677 : }
18678 4278953 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18679 : {
18680 :
18681 12972 : if (TYPE_MODE (type) == DCmode && align < 64)
18682 : return 64;
18683 12972 : if ((TYPE_MODE (type) == XCmode
18684 12972 : || TYPE_MODE (type) == TCmode) && align < 128)
18685 : return 128;
18686 : }
18687 4265981 : else if (RECORD_OR_UNION_TYPE_P (type)
18688 4265981 : && TYPE_FIELDS (type))
18689 : {
18690 2193924 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18691 : return 64;
18692 2193924 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18693 : return 128;
18694 : }
18695 2072057 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18696 : || TREE_CODE (type) == INTEGER_TYPE)
18697 : {
18698 1921513 : if (TYPE_MODE (type) == DFmode && align < 64)
18699 : return 64;
18700 1921513 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18701 : return 128;
18702 : }
18703 :
18704 5384419 : return align;
18705 : }
18706 :
18707 : /* Implement TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18708 : static void
18709 31537804 : ix86_lower_local_decl_alignment (tree decl)
18710 : {
18711 31537804 : unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18712 31537804 : DECL_ALIGN (decl), true);
18713 31537804 : if (new_align < DECL_ALIGN (decl))
18714 0 : SET_DECL_ALIGN (decl, new_align);
18715 31537804 : }
18716 :
18717 : /* Compute the alignment for a local variable or a stack slot. EXP is
18718 : the data type or decl itself, MODE is the widest mode available and
18719 : ALIGN is the alignment that the object would ordinarily have. The
18720 : value of this macro is used instead of that alignment to align the
18721 : object. */
18722 :
18723 : unsigned int
18724 48915663 : ix86_local_alignment (tree exp, machine_mode mode,
18725 : unsigned int align, bool may_lower)
18726 : {
18727 48915663 : tree type, decl;
18728 :
18729 48915663 : if (exp && DECL_P (exp))
18730 : {
18731 46747659 : type = TREE_TYPE (exp);
18732 46747659 : decl = exp;
18733 : }
18734 : else
18735 : {
18736 : type = exp;
18737 : decl = NULL;
18738 : }
18739 :
18740 : /* Don't do dynamic stack realignment for long long objects with
18741 : -mpreferred-stack-boundary=2. */
18742 48915663 : if (may_lower
18743 31537804 : && !TARGET_64BIT
18744 249417 : && align == 64
18745 38964 : && ix86_preferred_stack_boundary < 64
18746 0 : && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18747 0 : && (!type || (!TYPE_USER_ALIGN (type)
18748 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18749 48915663 : && (!decl || !DECL_USER_ALIGN (decl)))
18750 : align = 32;
18751 :
18752 : /* If TYPE is NULL, we are allocating a stack slot for caller-save
18753 : register in MODE. We will return the largest alignment of XF
18754 : and DF. */
18755 48915663 : if (!type)
18756 : {
18757 1408554 : if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18758 1517 : align = GET_MODE_ALIGNMENT (DFmode);
18759 1408554 : return align;
18760 : }
18761 :
18762 : /* Don't increase alignment for Intel MCU psABI. */
18763 47507109 : if (TARGET_IAMCU)
18764 : return align;
18765 :
18766 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18767 : to 16byte boundary. Exact wording is:
18768 :
18769 : An array uses the same alignment as its elements, except that a local or
18770 : global array variable of length at least 16 bytes or
18771 : a C99 variable-length array variable always has alignment of at least 16 bytes.
18772 :
18773 : This was added to allow use of aligned SSE instructions at arrays. This
18774 : rule is meant for static storage (where compiler cannot do the analysis
18775 : by itself). We follow it for automatic variables only when convenient.
18776 : We fully control everything in the function compiled and functions from
18777 : other unit cannot rely on the alignment.
18778 :
18779 : Exclude va_list type. It is the common case of local array where
18780 : we cannot benefit from the alignment.
18781 :
18782 : TODO: Probably one should optimize for size only when var is not escaping. */
18783 44672681 : if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18784 91823379 : && TARGET_SSE)
18785 : {
18786 44276599 : if (AGGREGATE_TYPE_P (type)
18787 9322160 : && (va_list_type_node == NULL_TREE
18788 9322160 : || (TYPE_MAIN_VARIANT (type)
18789 9322160 : != TYPE_MAIN_VARIANT (va_list_type_node)))
18790 9223320 : && TYPE_SIZE (type)
18791 9223320 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18792 45341826 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18793 51105538 : && align < 128)
18794 5763712 : return 128;
18795 : }
18796 41743397 : if (TREE_CODE (type) == ARRAY_TYPE)
18797 : {
18798 794057 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18799 : return 64;
18800 794057 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18801 : return 128;
18802 : }
18803 40949340 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18804 : {
18805 154329 : if (TYPE_MODE (type) == DCmode && align < 64)
18806 : return 64;
18807 154329 : if ((TYPE_MODE (type) == XCmode
18808 154329 : || TYPE_MODE (type) == TCmode) && align < 128)
18809 : return 128;
18810 : }
18811 40795011 : else if (RECORD_OR_UNION_TYPE_P (type)
18812 40795011 : && TYPE_FIELDS (type))
18813 : {
18814 4739374 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18815 : return 64;
18816 4736269 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18817 : return 128;
18818 : }
18819 36055637 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18820 : || TREE_CODE (type) == INTEGER_TYPE)
18821 : {
18822 :
18823 29732676 : if (TYPE_MODE (type) == DFmode && align < 64)
18824 : return 64;
18825 29732676 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18826 : return 128;
18827 : }
18828 : return align;
18829 : }
18830 :
18831 : /* Compute the minimum required alignment for dynamic stack realignment
18832 : purposes for a local variable, parameter or a stack slot. EXP is
18833 : the data type or decl itself, MODE is its mode and ALIGN is the
18834 : alignment that the object would ordinarily have. */
18835 :
18836 : unsigned int
18837 47724870 : ix86_minimum_alignment (tree exp, machine_mode mode,
18838 : unsigned int align)
18839 : {
18840 47724870 : tree type, decl;
18841 :
18842 47724870 : if (exp && DECL_P (exp))
18843 : {
18844 15065590 : type = TREE_TYPE (exp);
18845 15065590 : decl = exp;
18846 : }
18847 : else
18848 : {
18849 : type = exp;
18850 : decl = NULL;
18851 : }
18852 :
18853 47724870 : if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18854 : return align;
18855 :
18856 : /* Don't do dynamic stack realignment for long long objects with
18857 : -mpreferred-stack-boundary=2. */
18858 0 : if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18859 0 : && (!type || (!TYPE_USER_ALIGN (type)
18860 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18861 0 : && (!decl || !DECL_USER_ALIGN (decl)))
18862 : {
18863 0 : gcc_checking_assert (!TARGET_STV);
18864 : return 32;
18865 : }
18866 :
18867 : return align;
18868 : }
18869 :
18870 : /* Find a location for the static chain incoming to a nested function.
18871 : This is a register, unless all free registers are used by arguments. */
18872 :
18873 : static rtx
18874 270266 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18875 : {
18876 270266 : unsigned regno;
18877 :
18878 270266 : if (TARGET_64BIT)
18879 : {
18880 : /* We always use R10 in 64-bit mode. */
18881 : regno = R10_REG;
18882 : }
18883 : else
18884 : {
18885 88535 : const_tree fntype, fndecl;
18886 88535 : unsigned int ccvt;
18887 :
18888 : /* By default in 32-bit mode we use ECX to pass the static chain. */
18889 88535 : regno = CX_REG;
18890 :
18891 88535 : if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18892 : {
18893 78559 : fntype = TREE_TYPE (fndecl_or_type);
18894 78559 : fndecl = fndecl_or_type;
18895 : }
18896 : else
18897 : {
18898 : fntype = fndecl_or_type;
18899 : fndecl = NULL;
18900 : }
18901 :
18902 88535 : ccvt = ix86_get_callcvt (fntype);
18903 88535 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18904 : {
18905 : /* Fastcall functions use ecx/edx for arguments, which leaves
18906 : us with EAX for the static chain.
18907 : Thiscall functions use ecx for arguments, which also
18908 : leaves us with EAX for the static chain. */
18909 : regno = AX_REG;
18910 : }
18911 88535 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18912 : {
18913 : /* Thiscall functions use ecx for arguments, which leaves
18914 : us with EAX and EDX for the static chain.
18915 : We are using for abi-compatibility EAX. */
18916 : regno = AX_REG;
18917 : }
18918 88535 : else if (ix86_function_regparm (fntype, fndecl) == 3)
18919 : {
18920 : /* For regparm 3, we have no free call-clobbered registers in
18921 : which to store the static chain. In order to implement this,
18922 : we have the trampoline push the static chain to the stack.
18923 : However, we can't push a value below the return address when
18924 : we call the nested function directly, so we have to use an
18925 : alternate entry point. For this we use ESI, and have the
18926 : alternate entry point push ESI, so that things appear the
18927 : same once we're executing the nested function. */
18928 0 : if (incoming_p)
18929 : {
18930 0 : if (fndecl == current_function_decl
18931 0 : && !ix86_static_chain_on_stack)
18932 : {
18933 0 : gcc_assert (!reload_completed);
18934 0 : ix86_static_chain_on_stack = true;
18935 : }
18936 0 : return gen_frame_mem (SImode,
18937 0 : plus_constant (Pmode,
18938 : arg_pointer_rtx, -8));
18939 : }
18940 : regno = SI_REG;
18941 : }
18942 : }
18943 :
18944 358814 : return gen_rtx_REG (Pmode, regno);
18945 : }
18946 :
18947 : /* Emit RTL insns to initialize the variable parts of a trampoline.
18948 : FNDECL is the decl of the target address; M_TRAMP is a MEM for
18949 : the trampoline, and CHAIN_VALUE is an RTX for the static chain
18950 : to be passed to the target function. */
18951 :
18952 : static void
18953 305 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18954 : {
18955 305 : rtx mem, fnaddr;
18956 305 : int opcode;
18957 305 : int offset = 0;
18958 305 : bool need_endbr = (flag_cf_protection & CF_BRANCH);
18959 :
18960 305 : fnaddr = XEXP (DECL_RTL (fndecl), 0);
18961 :
18962 305 : if (TARGET_64BIT)
18963 : {
18964 305 : int size;
18965 :
18966 305 : if (need_endbr)
18967 : {
18968 : /* Insert ENDBR64. */
18969 1 : mem = adjust_address (m_tramp, SImode, offset);
18970 1 : emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18971 1 : offset += 4;
18972 : }
18973 :
18974 : /* Load the function address to r11. Try to load address using
18975 : the shorter movl instead of movabs. We may want to support
18976 : movq for kernel mode, but kernel does not use trampolines at
18977 : the moment. FNADDR is a 32bit address and may not be in
18978 : DImode when ptr_mode == SImode. Always use movl in this
18979 : case. */
18980 305 : if (ptr_mode == SImode
18981 305 : || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18982 : {
18983 273 : fnaddr = copy_addr_to_reg (fnaddr);
18984 :
18985 273 : mem = adjust_address (m_tramp, HImode, offset);
18986 273 : emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18987 :
18988 273 : mem = adjust_address (m_tramp, SImode, offset + 2);
18989 273 : emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18990 273 : offset += 6;
18991 : }
18992 : else
18993 : {
18994 32 : mem = adjust_address (m_tramp, HImode, offset);
18995 32 : emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18996 :
18997 32 : mem = adjust_address (m_tramp, DImode, offset + 2);
18998 32 : emit_move_insn (mem, fnaddr);
18999 32 : offset += 10;
19000 : }
19001 :
19002 : /* Load static chain using movabs to r10. Use the shorter movl
19003 : instead of movabs when ptr_mode == SImode. */
19004 305 : if (ptr_mode == SImode)
19005 : {
19006 : opcode = 0xba41;
19007 : size = 6;
19008 : }
19009 : else
19010 : {
19011 305 : opcode = 0xba49;
19012 305 : size = 10;
19013 : }
19014 :
19015 305 : mem = adjust_address (m_tramp, HImode, offset);
19016 305 : emit_move_insn (mem, gen_int_mode (opcode, HImode));
19017 :
19018 305 : mem = adjust_address (m_tramp, ptr_mode, offset + 2);
19019 305 : emit_move_insn (mem, chain_value);
19020 305 : offset += size;
19021 :
19022 : /* Jump to r11; the last (unused) byte is a nop, only there to
19023 : pad the write out to a single 32-bit store. */
19024 305 : mem = adjust_address (m_tramp, SImode, offset);
19025 305 : emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
19026 305 : offset += 4;
19027 : }
19028 : else
19029 : {
19030 0 : rtx disp, chain;
19031 :
19032 : /* Depending on the static chain location, either load a register
19033 : with a constant, or push the constant to the stack. All of the
19034 : instructions are the same size. */
19035 0 : chain = ix86_static_chain (fndecl, true);
19036 0 : if (REG_P (chain))
19037 : {
19038 0 : switch (REGNO (chain))
19039 : {
19040 : case AX_REG:
19041 : opcode = 0xb8; break;
19042 0 : case CX_REG:
19043 0 : opcode = 0xb9; break;
19044 0 : default:
19045 0 : gcc_unreachable ();
19046 : }
19047 : }
19048 : else
19049 : opcode = 0x68;
19050 :
19051 0 : if (need_endbr)
19052 : {
19053 : /* Insert ENDBR32. */
19054 0 : mem = adjust_address (m_tramp, SImode, offset);
19055 0 : emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
19056 0 : offset += 4;
19057 : }
19058 :
19059 0 : mem = adjust_address (m_tramp, QImode, offset);
19060 0 : emit_move_insn (mem, gen_int_mode (opcode, QImode));
19061 :
19062 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19063 0 : emit_move_insn (mem, chain_value);
19064 0 : offset += 5;
19065 :
19066 0 : mem = adjust_address (m_tramp, QImode, offset);
19067 0 : emit_move_insn (mem, gen_int_mode (0xe9, QImode));
19068 :
19069 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19070 :
19071 : /* Compute offset from the end of the jmp to the target function.
19072 : In the case in which the trampoline stores the static chain on
19073 : the stack, we need to skip the first insn which pushes the
19074 : (call-saved) register static chain; this push is 1 byte. */
19075 0 : offset += 5;
19076 0 : int skip = MEM_P (chain) ? 1 : 0;
19077 : /* Skip ENDBR32 at the entry of the target function. */
19078 0 : if (need_endbr
19079 0 : && !cgraph_node::get (fndecl)->only_called_directly_p ())
19080 0 : skip += 4;
19081 0 : disp = expand_binop (SImode, sub_optab, fnaddr,
19082 0 : plus_constant (Pmode, XEXP (m_tramp, 0),
19083 0 : offset - skip),
19084 : NULL_RTX, 1, OPTAB_DIRECT);
19085 0 : emit_move_insn (mem, disp);
19086 : }
19087 :
19088 305 : gcc_assert (offset <= TRAMPOLINE_SIZE);
19089 :
19090 : #ifdef HAVE_ENABLE_EXECUTE_STACK
19091 : #ifdef CHECK_EXECUTE_STACK_ENABLED
19092 : if (CHECK_EXECUTE_STACK_ENABLED)
19093 : #endif
19094 : emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19095 : LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
19096 : #endif
19097 305 : }
19098 :
19099 : static bool
19100 53922445 : ix86_allocate_stack_slots_for_args (void)
19101 : {
19102 : /* Naked functions should not allocate stack slots for arguments. */
19103 53922445 : return !ix86_function_naked (current_function_decl);
19104 : }
19105 :
19106 : static bool
19107 37939568 : ix86_warn_func_return (tree decl)
19108 : {
19109 : /* Naked functions are implemented entirely in assembly, including the
19110 : return sequence, so suppress warnings about this. */
19111 37939568 : return !ix86_function_naked (decl);
19112 : }
19113 :
19114 : /* Return the shift count of a vector by scalar shift builtin second argument
19115 : ARG1. */
19116 : static tree
19117 14142 : ix86_vector_shift_count (tree arg1)
19118 : {
19119 14142 : if (tree_fits_uhwi_p (arg1))
19120 : return arg1;
19121 8316 : else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
19122 : {
19123 : /* The count argument is weird, passed in as various 128-bit
19124 : (or 64-bit) vectors, the low 64 bits from it are the count. */
19125 162 : unsigned char buf[16];
19126 162 : int len = native_encode_expr (arg1, buf, 16);
19127 162 : if (len == 0)
19128 162 : return NULL_TREE;
19129 162 : tree t = native_interpret_expr (uint64_type_node, buf, len);
19130 162 : if (t && tree_fits_uhwi_p (t))
19131 : return t;
19132 : }
19133 : return NULL_TREE;
19134 : }
19135 :
19136 : /* Return true if arg_mask is all ones, ELEMS is elements number of
19137 : corresponding vector. */
19138 : static bool
19139 25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
19140 : {
19141 25042 : if (TREE_CODE (arg_mask) != INTEGER_CST)
19142 : return false;
19143 :
19144 7462 : unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
19145 7462 : if (elems == HOST_BITS_PER_WIDE_INT)
19146 33 : return mask == HOST_WIDE_INT_M1U;
19147 7429 : if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
19148 2681 : return false;
19149 :
19150 : return true;
19151 : }
19152 :
19153 : static tree
19154 68154418 : ix86_fold_builtin (tree fndecl, int n_args,
19155 : tree *args, bool ignore ATTRIBUTE_UNUSED)
19156 : {
19157 68154418 : if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
19158 : {
19159 68154418 : enum ix86_builtins fn_code
19160 68154418 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19161 68154418 : enum rtx_code rcode;
19162 68154418 : bool is_vshift;
19163 68154418 : enum tree_code tcode;
19164 68154418 : bool is_scalar;
19165 68154418 : unsigned HOST_WIDE_INT mask;
19166 :
19167 68154418 : switch (fn_code)
19168 : {
19169 8883 : case IX86_BUILTIN_CPU_IS:
19170 8883 : case IX86_BUILTIN_CPU_SUPPORTS:
19171 8883 : gcc_assert (n_args == 1);
19172 8883 : return fold_builtin_cpu (fndecl, args);
19173 :
19174 25133 : case IX86_BUILTIN_NANQ:
19175 25133 : case IX86_BUILTIN_NANSQ:
19176 25133 : {
19177 25133 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19178 25133 : const char *str = c_getstr (*args);
19179 25133 : int quiet = fn_code == IX86_BUILTIN_NANQ;
19180 25133 : REAL_VALUE_TYPE real;
19181 :
19182 25133 : if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
19183 25133 : return build_real (type, real);
19184 0 : return NULL_TREE;
19185 : }
19186 :
19187 108 : case IX86_BUILTIN_INFQ:
19188 108 : case IX86_BUILTIN_HUGE_VALQ:
19189 108 : {
19190 108 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19191 108 : REAL_VALUE_TYPE inf;
19192 108 : real_inf (&inf);
19193 108 : return build_real (type, inf);
19194 : }
19195 :
19196 62447 : case IX86_BUILTIN_TZCNT16:
19197 62447 : case IX86_BUILTIN_CTZS:
19198 62447 : case IX86_BUILTIN_TZCNT32:
19199 62447 : case IX86_BUILTIN_TZCNT64:
19200 62447 : gcc_assert (n_args == 1);
19201 62447 : if (TREE_CODE (args[0]) == INTEGER_CST)
19202 : {
19203 45 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19204 45 : tree arg = args[0];
19205 45 : if (fn_code == IX86_BUILTIN_TZCNT16
19206 45 : || fn_code == IX86_BUILTIN_CTZS)
19207 3 : arg = fold_convert (short_unsigned_type_node, arg);
19208 45 : if (integer_zerop (arg))
19209 6 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19210 : else
19211 39 : return fold_const_call (CFN_CTZ, type, arg);
19212 : }
19213 : break;
19214 :
19215 52002 : case IX86_BUILTIN_LZCNT16:
19216 52002 : case IX86_BUILTIN_CLZS:
19217 52002 : case IX86_BUILTIN_LZCNT32:
19218 52002 : case IX86_BUILTIN_LZCNT64:
19219 52002 : gcc_assert (n_args == 1);
19220 52002 : if (TREE_CODE (args[0]) == INTEGER_CST)
19221 : {
19222 54 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19223 54 : tree arg = args[0];
19224 54 : if (fn_code == IX86_BUILTIN_LZCNT16
19225 54 : || fn_code == IX86_BUILTIN_CLZS)
19226 18 : arg = fold_convert (short_unsigned_type_node, arg);
19227 54 : if (integer_zerop (arg))
19228 3 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19229 : else
19230 51 : return fold_const_call (CFN_CLZ, type, arg);
19231 : }
19232 : break;
19233 :
19234 61227 : case IX86_BUILTIN_BEXTR32:
19235 61227 : case IX86_BUILTIN_BEXTR64:
19236 61227 : case IX86_BUILTIN_BEXTRI32:
19237 61227 : case IX86_BUILTIN_BEXTRI64:
19238 61227 : gcc_assert (n_args == 2);
19239 61227 : if (tree_fits_uhwi_p (args[1]))
19240 : {
19241 152 : unsigned HOST_WIDE_INT res = 0;
19242 152 : unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
19243 152 : unsigned int start = tree_to_uhwi (args[1]);
19244 152 : unsigned int len = (start & 0xff00) >> 8;
19245 152 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19246 152 : start &= 0xff;
19247 152 : if (start >= prec || len == 0)
19248 111 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19249 : args[0]);
19250 41 : else if (!tree_fits_uhwi_p (args[0]))
19251 : break;
19252 : else
19253 24 : res = tree_to_uhwi (args[0]) >> start;
19254 24 : if (len > prec)
19255 : len = prec;
19256 24 : if (len < HOST_BITS_PER_WIDE_INT)
19257 15 : res &= (HOST_WIDE_INT_1U << len) - 1;
19258 24 : return build_int_cstu (lhs_type, res);
19259 : }
19260 : break;
19261 :
19262 21034 : case IX86_BUILTIN_BZHI32:
19263 21034 : case IX86_BUILTIN_BZHI64:
19264 21034 : gcc_assert (n_args == 2);
19265 21034 : if (tree_fits_uhwi_p (args[1]))
19266 : {
19267 190 : unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
19268 190 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19269 190 : if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
19270 : return args[0];
19271 190 : if (idx == 0)
19272 52 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19273 : args[0]);
19274 138 : if (!tree_fits_uhwi_p (args[0]))
19275 : break;
19276 12 : unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
19277 12 : res &= ~(HOST_WIDE_INT_M1U << idx);
19278 12 : return build_int_cstu (lhs_type, res);
19279 : }
19280 : break;
19281 :
19282 20792 : case IX86_BUILTIN_PDEP32:
19283 20792 : case IX86_BUILTIN_PDEP64:
19284 20792 : gcc_assert (n_args == 2);
19285 20792 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19286 : {
19287 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19288 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19289 46 : unsigned HOST_WIDE_INT res = 0;
19290 46 : unsigned HOST_WIDE_INT m, k = 1;
19291 2990 : for (m = 1; m; m <<= 1)
19292 2944 : if ((mask & m) != 0)
19293 : {
19294 1440 : if ((src & k) != 0)
19295 789 : res |= m;
19296 1440 : k <<= 1;
19297 : }
19298 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19299 : }
19300 : break;
19301 :
19302 20794 : case IX86_BUILTIN_PEXT32:
19303 20794 : case IX86_BUILTIN_PEXT64:
19304 20794 : gcc_assert (n_args == 2);
19305 20794 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19306 : {
19307 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19308 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19309 46 : unsigned HOST_WIDE_INT res = 0;
19310 46 : unsigned HOST_WIDE_INT m, k = 1;
19311 2990 : for (m = 1; m; m <<= 1)
19312 2944 : if ((mask & m) != 0)
19313 : {
19314 2016 : if ((src & m) != 0)
19315 1063 : res |= k;
19316 2016 : k <<= 1;
19317 : }
19318 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19319 : }
19320 : break;
19321 :
19322 100831 : case IX86_BUILTIN_MOVMSKPS:
19323 100831 : case IX86_BUILTIN_PMOVMSKB:
19324 100831 : case IX86_BUILTIN_MOVMSKPD:
19325 100831 : case IX86_BUILTIN_PMOVMSKB128:
19326 100831 : case IX86_BUILTIN_MOVMSKPD256:
19327 100831 : case IX86_BUILTIN_MOVMSKPS256:
19328 100831 : case IX86_BUILTIN_PMOVMSKB256:
19329 100831 : gcc_assert (n_args == 1);
19330 100831 : if (TREE_CODE (args[0]) == VECTOR_CST)
19331 : {
19332 : HOST_WIDE_INT res = 0;
19333 1492 : for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
19334 : {
19335 1242 : tree e = VECTOR_CST_ELT (args[0], i);
19336 1242 : if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
19337 : {
19338 624 : if (wi::neg_p (wi::to_wide (e)))
19339 575 : res |= HOST_WIDE_INT_1 << i;
19340 : }
19341 618 : else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
19342 : {
19343 618 : if (TREE_REAL_CST (e).sign)
19344 517 : res |= HOST_WIDE_INT_1 << i;
19345 : }
19346 : else
19347 : return NULL_TREE;
19348 : }
19349 250 : return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
19350 : }
19351 : break;
19352 :
19353 659648 : case IX86_BUILTIN_PSLLD:
19354 659648 : case IX86_BUILTIN_PSLLD128:
19355 659648 : case IX86_BUILTIN_PSLLD128_MASK:
19356 659648 : case IX86_BUILTIN_PSLLD256:
19357 659648 : case IX86_BUILTIN_PSLLD256_MASK:
19358 659648 : case IX86_BUILTIN_PSLLD512:
19359 659648 : case IX86_BUILTIN_PSLLDI:
19360 659648 : case IX86_BUILTIN_PSLLDI128:
19361 659648 : case IX86_BUILTIN_PSLLDI128_MASK:
19362 659648 : case IX86_BUILTIN_PSLLDI256:
19363 659648 : case IX86_BUILTIN_PSLLDI256_MASK:
19364 659648 : case IX86_BUILTIN_PSLLDI512:
19365 659648 : case IX86_BUILTIN_PSLLQ:
19366 659648 : case IX86_BUILTIN_PSLLQ128:
19367 659648 : case IX86_BUILTIN_PSLLQ128_MASK:
19368 659648 : case IX86_BUILTIN_PSLLQ256:
19369 659648 : case IX86_BUILTIN_PSLLQ256_MASK:
19370 659648 : case IX86_BUILTIN_PSLLQ512:
19371 659648 : case IX86_BUILTIN_PSLLQI:
19372 659648 : case IX86_BUILTIN_PSLLQI128:
19373 659648 : case IX86_BUILTIN_PSLLQI128_MASK:
19374 659648 : case IX86_BUILTIN_PSLLQI256:
19375 659648 : case IX86_BUILTIN_PSLLQI256_MASK:
19376 659648 : case IX86_BUILTIN_PSLLQI512:
19377 659648 : case IX86_BUILTIN_PSLLW:
19378 659648 : case IX86_BUILTIN_PSLLW128:
19379 659648 : case IX86_BUILTIN_PSLLW128_MASK:
19380 659648 : case IX86_BUILTIN_PSLLW256:
19381 659648 : case IX86_BUILTIN_PSLLW256_MASK:
19382 659648 : case IX86_BUILTIN_PSLLW512_MASK:
19383 659648 : case IX86_BUILTIN_PSLLWI:
19384 659648 : case IX86_BUILTIN_PSLLWI128:
19385 659648 : case IX86_BUILTIN_PSLLWI128_MASK:
19386 659648 : case IX86_BUILTIN_PSLLWI256:
19387 659648 : case IX86_BUILTIN_PSLLWI256_MASK:
19388 659648 : case IX86_BUILTIN_PSLLWI512_MASK:
19389 659648 : rcode = ASHIFT;
19390 659648 : is_vshift = false;
19391 659648 : goto do_shift;
19392 601251 : case IX86_BUILTIN_PSRAD:
19393 601251 : case IX86_BUILTIN_PSRAD128:
19394 601251 : case IX86_BUILTIN_PSRAD128_MASK:
19395 601251 : case IX86_BUILTIN_PSRAD256:
19396 601251 : case IX86_BUILTIN_PSRAD256_MASK:
19397 601251 : case IX86_BUILTIN_PSRAD512:
19398 601251 : case IX86_BUILTIN_PSRADI:
19399 601251 : case IX86_BUILTIN_PSRADI128:
19400 601251 : case IX86_BUILTIN_PSRADI128_MASK:
19401 601251 : case IX86_BUILTIN_PSRADI256:
19402 601251 : case IX86_BUILTIN_PSRADI256_MASK:
19403 601251 : case IX86_BUILTIN_PSRADI512:
19404 601251 : case IX86_BUILTIN_PSRAQ128_MASK:
19405 601251 : case IX86_BUILTIN_PSRAQ256_MASK:
19406 601251 : case IX86_BUILTIN_PSRAQ512:
19407 601251 : case IX86_BUILTIN_PSRAQI128_MASK:
19408 601251 : case IX86_BUILTIN_PSRAQI256_MASK:
19409 601251 : case IX86_BUILTIN_PSRAQI512:
19410 601251 : case IX86_BUILTIN_PSRAW:
19411 601251 : case IX86_BUILTIN_PSRAW128:
19412 601251 : case IX86_BUILTIN_PSRAW128_MASK:
19413 601251 : case IX86_BUILTIN_PSRAW256:
19414 601251 : case IX86_BUILTIN_PSRAW256_MASK:
19415 601251 : case IX86_BUILTIN_PSRAW512:
19416 601251 : case IX86_BUILTIN_PSRAWI:
19417 601251 : case IX86_BUILTIN_PSRAWI128:
19418 601251 : case IX86_BUILTIN_PSRAWI128_MASK:
19419 601251 : case IX86_BUILTIN_PSRAWI256:
19420 601251 : case IX86_BUILTIN_PSRAWI256_MASK:
19421 601251 : case IX86_BUILTIN_PSRAWI512:
19422 601251 : rcode = ASHIFTRT;
19423 601251 : is_vshift = false;
19424 601251 : goto do_shift;
19425 633525 : case IX86_BUILTIN_PSRLD:
19426 633525 : case IX86_BUILTIN_PSRLD128:
19427 633525 : case IX86_BUILTIN_PSRLD128_MASK:
19428 633525 : case IX86_BUILTIN_PSRLD256:
19429 633525 : case IX86_BUILTIN_PSRLD256_MASK:
19430 633525 : case IX86_BUILTIN_PSRLD512:
19431 633525 : case IX86_BUILTIN_PSRLDI:
19432 633525 : case IX86_BUILTIN_PSRLDI128:
19433 633525 : case IX86_BUILTIN_PSRLDI128_MASK:
19434 633525 : case IX86_BUILTIN_PSRLDI256:
19435 633525 : case IX86_BUILTIN_PSRLDI256_MASK:
19436 633525 : case IX86_BUILTIN_PSRLDI512:
19437 633525 : case IX86_BUILTIN_PSRLQ:
19438 633525 : case IX86_BUILTIN_PSRLQ128:
19439 633525 : case IX86_BUILTIN_PSRLQ128_MASK:
19440 633525 : case IX86_BUILTIN_PSRLQ256:
19441 633525 : case IX86_BUILTIN_PSRLQ256_MASK:
19442 633525 : case IX86_BUILTIN_PSRLQ512:
19443 633525 : case IX86_BUILTIN_PSRLQI:
19444 633525 : case IX86_BUILTIN_PSRLQI128:
19445 633525 : case IX86_BUILTIN_PSRLQI128_MASK:
19446 633525 : case IX86_BUILTIN_PSRLQI256:
19447 633525 : case IX86_BUILTIN_PSRLQI256_MASK:
19448 633525 : case IX86_BUILTIN_PSRLQI512:
19449 633525 : case IX86_BUILTIN_PSRLW:
19450 633525 : case IX86_BUILTIN_PSRLW128:
19451 633525 : case IX86_BUILTIN_PSRLW128_MASK:
19452 633525 : case IX86_BUILTIN_PSRLW256:
19453 633525 : case IX86_BUILTIN_PSRLW256_MASK:
19454 633525 : case IX86_BUILTIN_PSRLW512:
19455 633525 : case IX86_BUILTIN_PSRLWI:
19456 633525 : case IX86_BUILTIN_PSRLWI128:
19457 633525 : case IX86_BUILTIN_PSRLWI128_MASK:
19458 633525 : case IX86_BUILTIN_PSRLWI256:
19459 633525 : case IX86_BUILTIN_PSRLWI256_MASK:
19460 633525 : case IX86_BUILTIN_PSRLWI512:
19461 633525 : rcode = LSHIFTRT;
19462 633525 : is_vshift = false;
19463 633525 : goto do_shift;
19464 276009 : case IX86_BUILTIN_PSLLVV16HI:
19465 276009 : case IX86_BUILTIN_PSLLVV16SI:
19466 276009 : case IX86_BUILTIN_PSLLVV2DI:
19467 276009 : case IX86_BUILTIN_PSLLVV2DI_MASK:
19468 276009 : case IX86_BUILTIN_PSLLVV32HI:
19469 276009 : case IX86_BUILTIN_PSLLVV4DI:
19470 276009 : case IX86_BUILTIN_PSLLVV4DI_MASK:
19471 276009 : case IX86_BUILTIN_PSLLVV4SI:
19472 276009 : case IX86_BUILTIN_PSLLVV4SI_MASK:
19473 276009 : case IX86_BUILTIN_PSLLVV8DI:
19474 276009 : case IX86_BUILTIN_PSLLVV8HI:
19475 276009 : case IX86_BUILTIN_PSLLVV8SI:
19476 276009 : case IX86_BUILTIN_PSLLVV8SI_MASK:
19477 276009 : rcode = ASHIFT;
19478 276009 : is_vshift = true;
19479 276009 : goto do_shift;
19480 275588 : case IX86_BUILTIN_PSRAVQ128:
19481 275588 : case IX86_BUILTIN_PSRAVQ256:
19482 275588 : case IX86_BUILTIN_PSRAVV16HI:
19483 275588 : case IX86_BUILTIN_PSRAVV16SI:
19484 275588 : case IX86_BUILTIN_PSRAVV32HI:
19485 275588 : case IX86_BUILTIN_PSRAVV4SI:
19486 275588 : case IX86_BUILTIN_PSRAVV4SI_MASK:
19487 275588 : case IX86_BUILTIN_PSRAVV8DI:
19488 275588 : case IX86_BUILTIN_PSRAVV8HI:
19489 275588 : case IX86_BUILTIN_PSRAVV8SI:
19490 275588 : case IX86_BUILTIN_PSRAVV8SI_MASK:
19491 275588 : rcode = ASHIFTRT;
19492 275588 : is_vshift = true;
19493 275588 : goto do_shift;
19494 276000 : case IX86_BUILTIN_PSRLVV16HI:
19495 276000 : case IX86_BUILTIN_PSRLVV16SI:
19496 276000 : case IX86_BUILTIN_PSRLVV2DI:
19497 276000 : case IX86_BUILTIN_PSRLVV2DI_MASK:
19498 276000 : case IX86_BUILTIN_PSRLVV32HI:
19499 276000 : case IX86_BUILTIN_PSRLVV4DI:
19500 276000 : case IX86_BUILTIN_PSRLVV4DI_MASK:
19501 276000 : case IX86_BUILTIN_PSRLVV4SI:
19502 276000 : case IX86_BUILTIN_PSRLVV4SI_MASK:
19503 276000 : case IX86_BUILTIN_PSRLVV8DI:
19504 276000 : case IX86_BUILTIN_PSRLVV8HI:
19505 276000 : case IX86_BUILTIN_PSRLVV8SI:
19506 276000 : case IX86_BUILTIN_PSRLVV8SI_MASK:
19507 276000 : rcode = LSHIFTRT;
19508 276000 : is_vshift = true;
19509 276000 : goto do_shift;
19510 :
19511 2722021 : do_shift:
19512 2722021 : gcc_assert (n_args >= 2);
19513 2722021 : if (TREE_CODE (args[0]) != VECTOR_CST)
19514 : break;
19515 927 : mask = HOST_WIDE_INT_M1U;
19516 927 : if (n_args > 2)
19517 : {
19518 : /* This is masked shift. */
19519 678 : if (!tree_fits_uhwi_p (args[n_args - 1])
19520 678 : || TREE_SIDE_EFFECTS (args[n_args - 2]))
19521 : break;
19522 678 : mask = tree_to_uhwi (args[n_args - 1]);
19523 678 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19524 678 : mask |= HOST_WIDE_INT_M1U << elems;
19525 678 : if (mask != HOST_WIDE_INT_M1U
19526 567 : && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19527 : break;
19528 633 : if (mask == (HOST_WIDE_INT_M1U << elems))
19529 : return args[n_args - 2];
19530 : }
19531 879 : if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19532 : break;
19533 879 : if (tree tem = (is_vshift ? integer_one_node
19534 879 : : ix86_vector_shift_count (args[1])))
19535 : {
19536 558 : unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19537 558 : unsigned HOST_WIDE_INT prec
19538 558 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19539 558 : if (count == 0 && mask == HOST_WIDE_INT_M1U)
19540 : return args[0];
19541 558 : if (count >= prec)
19542 : {
19543 72 : if (rcode == ASHIFTRT)
19544 27 : count = prec - 1;
19545 45 : else if (mask == HOST_WIDE_INT_M1U)
19546 3 : return build_zero_cst (TREE_TYPE (args[0]));
19547 : }
19548 555 : tree countt = NULL_TREE;
19549 555 : if (!is_vshift)
19550 : {
19551 377 : if (count >= prec)
19552 42 : countt = integer_zero_node;
19553 : else
19554 335 : countt = build_int_cst (integer_type_node, count);
19555 : }
19556 555 : tree_vector_builder builder;
19557 555 : if (mask != HOST_WIDE_INT_M1U || is_vshift)
19558 392 : builder.new_vector (TREE_TYPE (args[0]),
19559 784 : TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19560 : 1);
19561 : else
19562 163 : builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
19563 : false);
19564 555 : unsigned int cnt = builder.encoded_nelts ();
19565 5967 : for (unsigned int i = 0; i < cnt; ++i)
19566 : {
19567 5412 : tree elt = VECTOR_CST_ELT (args[0], i);
19568 5412 : if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19569 0 : return NULL_TREE;
19570 5412 : tree type = TREE_TYPE (elt);
19571 5412 : if (rcode == LSHIFTRT)
19572 2040 : elt = fold_convert (unsigned_type_for (type), elt);
19573 5412 : if (is_vshift)
19574 : {
19575 1846 : countt = VECTOR_CST_ELT (args[1], i);
19576 1846 : if (TREE_CODE (countt) != INTEGER_CST
19577 1846 : || TREE_OVERFLOW (countt))
19578 : return NULL_TREE;
19579 1846 : if (wi::neg_p (wi::to_wide (countt))
19580 3610 : || wi::to_widest (countt) >= prec)
19581 : {
19582 325 : if (rcode == ASHIFTRT)
19583 108 : countt = build_int_cst (TREE_TYPE (countt),
19584 108 : prec - 1);
19585 : else
19586 : {
19587 217 : elt = build_zero_cst (TREE_TYPE (elt));
19588 217 : countt = build_zero_cst (TREE_TYPE (countt));
19589 : }
19590 : }
19591 : }
19592 3566 : else if (count >= prec)
19593 504 : elt = build_zero_cst (TREE_TYPE (elt));
19594 8950 : elt = const_binop (rcode == ASHIFT
19595 : ? LSHIFT_EXPR : RSHIFT_EXPR,
19596 5412 : TREE_TYPE (elt), elt, countt);
19597 5412 : if (!elt || TREE_CODE (elt) != INTEGER_CST)
19598 : return NULL_TREE;
19599 5412 : if (rcode == LSHIFTRT)
19600 2040 : elt = fold_convert (type, elt);
19601 5412 : if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19602 : {
19603 1566 : elt = VECTOR_CST_ELT (args[n_args - 2], i);
19604 1566 : if (TREE_CODE (elt) != INTEGER_CST
19605 1566 : || TREE_OVERFLOW (elt))
19606 : return NULL_TREE;
19607 : }
19608 5412 : builder.quick_push (elt);
19609 : }
19610 555 : return builder.build ();
19611 555 : }
19612 : break;
19613 :
19614 32718 : case IX86_BUILTIN_MINSS:
19615 32718 : case IX86_BUILTIN_MINSH_MASK:
19616 32718 : tcode = LT_EXPR;
19617 32718 : is_scalar = true;
19618 32718 : goto do_minmax;
19619 :
19620 32718 : case IX86_BUILTIN_MAXSS:
19621 32718 : case IX86_BUILTIN_MAXSH_MASK:
19622 32718 : tcode = GT_EXPR;
19623 32718 : is_scalar = true;
19624 32718 : goto do_minmax;
19625 :
19626 350576 : case IX86_BUILTIN_MINPS:
19627 350576 : case IX86_BUILTIN_MINPD:
19628 350576 : case IX86_BUILTIN_MINPS256:
19629 350576 : case IX86_BUILTIN_MINPD256:
19630 350576 : case IX86_BUILTIN_MINPS512:
19631 350576 : case IX86_BUILTIN_MINPD512:
19632 350576 : case IX86_BUILTIN_MINPS128_MASK:
19633 350576 : case IX86_BUILTIN_MINPD128_MASK:
19634 350576 : case IX86_BUILTIN_MINPS256_MASK:
19635 350576 : case IX86_BUILTIN_MINPD256_MASK:
19636 350576 : case IX86_BUILTIN_MINPH128_MASK:
19637 350576 : case IX86_BUILTIN_MINPH256_MASK:
19638 350576 : case IX86_BUILTIN_MINPH512_MASK:
19639 350576 : tcode = LT_EXPR;
19640 350576 : is_scalar = false;
19641 350576 : goto do_minmax;
19642 :
19643 : case IX86_BUILTIN_MAXPS:
19644 : case IX86_BUILTIN_MAXPD:
19645 : case IX86_BUILTIN_MAXPS256:
19646 : case IX86_BUILTIN_MAXPD256:
19647 : case IX86_BUILTIN_MAXPS512:
19648 : case IX86_BUILTIN_MAXPD512:
19649 : case IX86_BUILTIN_MAXPS128_MASK:
19650 : case IX86_BUILTIN_MAXPD128_MASK:
19651 : case IX86_BUILTIN_MAXPS256_MASK:
19652 : case IX86_BUILTIN_MAXPD256_MASK:
19653 : case IX86_BUILTIN_MAXPH128_MASK:
19654 : case IX86_BUILTIN_MAXPH256_MASK:
19655 : case IX86_BUILTIN_MAXPH512_MASK:
19656 : tcode = GT_EXPR;
19657 : is_scalar = false;
19658 766608 : do_minmax:
19659 766608 : gcc_assert (n_args >= 2);
19660 766608 : if (TREE_CODE (args[0]) != VECTOR_CST
19661 76 : || TREE_CODE (args[1]) != VECTOR_CST)
19662 : break;
19663 76 : mask = HOST_WIDE_INT_M1U;
19664 76 : if (n_args > 2)
19665 : {
19666 36 : gcc_assert (n_args >= 4);
19667 : /* This is masked minmax. */
19668 36 : if (TREE_CODE (args[3]) != INTEGER_CST
19669 36 : || TREE_SIDE_EFFECTS (args[2]))
19670 : break;
19671 36 : mask = TREE_INT_CST_LOW (args[3]);
19672 36 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19673 36 : mask |= HOST_WIDE_INT_M1U << elems;
19674 36 : if (mask != HOST_WIDE_INT_M1U
19675 32 : && TREE_CODE (args[2]) != VECTOR_CST)
19676 : break;
19677 36 : if (n_args >= 5)
19678 : {
19679 20 : if (!tree_fits_uhwi_p (args[4]))
19680 : break;
19681 20 : if (tree_to_uhwi (args[4]) != 4
19682 0 : && tree_to_uhwi (args[4]) != 8)
19683 : break;
19684 : }
19685 36 : if (mask == (HOST_WIDE_INT_M1U << elems))
19686 : return args[2];
19687 : }
19688 : /* Punt on NaNs, unless exceptions are disabled. */
19689 76 : if (HONOR_NANS (args[0])
19690 76 : && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19691 184 : for (int i = 0; i < 2; ++i)
19692 : {
19693 134 : unsigned count = vector_cst_encoded_nelts (args[i]);
19694 957 : for (unsigned j = 0; j < count; ++j)
19695 849 : if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19696 : return NULL_TREE;
19697 : }
19698 50 : {
19699 50 : tree res = const_binop (tcode,
19700 50 : truth_type_for (TREE_TYPE (args[0])),
19701 : args[0], args[1]);
19702 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19703 : break;
19704 50 : res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19705 : args[0], args[1]);
19706 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19707 : break;
19708 50 : if (mask != HOST_WIDE_INT_M1U)
19709 : {
19710 32 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19711 32 : vec_perm_builder sel (nelts, nelts, 1);
19712 328 : for (unsigned int i = 0; i < nelts; i++)
19713 296 : if (mask & (HOST_WIDE_INT_1U << i))
19714 160 : sel.quick_push (i);
19715 : else
19716 136 : sel.quick_push (nelts + i);
19717 32 : vec_perm_indices indices (sel, 2, nelts);
19718 32 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19719 : indices);
19720 32 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19721 : break;
19722 32 : }
19723 50 : if (is_scalar)
19724 : {
19725 10 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19726 10 : vec_perm_builder sel (nelts, nelts, 1);
19727 10 : sel.quick_push (0);
19728 40 : for (unsigned int i = 1; i < nelts; i++)
19729 30 : sel.quick_push (nelts + i);
19730 10 : vec_perm_indices indices (sel, 2, nelts);
19731 10 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19732 : indices);
19733 10 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19734 : break;
19735 10 : }
19736 50 : return res;
19737 : }
19738 :
19739 : default:
19740 : break;
19741 : }
19742 : }
19743 :
19744 : #ifdef SUBTARGET_FOLD_BUILTIN
19745 : return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19746 : #endif
19747 :
19748 : return NULL_TREE;
19749 : }
19750 :
19751 : /* Fold a MD builtin (use ix86_fold_builtin for folding into
19752 : constant) in GIMPLE. */
19753 :
19754 : bool
19755 1131057 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19756 : {
19757 1131057 : gimple *stmt = gsi_stmt (*gsi), *g;
19758 1131057 : gimple_seq stmts = NULL;
19759 1131057 : tree fndecl = gimple_call_fndecl (stmt);
19760 1131057 : gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19761 1131057 : int n_args = gimple_call_num_args (stmt);
19762 1131057 : enum ix86_builtins fn_code
19763 1131057 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19764 1131057 : tree decl = NULL_TREE;
19765 1131057 : tree arg0, arg1, arg2;
19766 1131057 : enum rtx_code rcode;
19767 1131057 : enum tree_code tcode;
19768 1131057 : unsigned HOST_WIDE_INT count;
19769 1131057 : bool is_vshift;
19770 1131057 : unsigned HOST_WIDE_INT elems;
19771 1131057 : location_t loc;
19772 :
19773 : /* Don't fold when there's isa mismatch. */
19774 1131057 : if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19775 : return false;
19776 :
19777 1130930 : switch (fn_code)
19778 : {
19779 288 : case IX86_BUILTIN_TZCNT32:
19780 288 : decl = builtin_decl_implicit (BUILT_IN_CTZ);
19781 288 : goto fold_tzcnt_lzcnt;
19782 :
19783 237 : case IX86_BUILTIN_TZCNT64:
19784 237 : decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19785 237 : goto fold_tzcnt_lzcnt;
19786 :
19787 215 : case IX86_BUILTIN_LZCNT32:
19788 215 : decl = builtin_decl_implicit (BUILT_IN_CLZ);
19789 215 : goto fold_tzcnt_lzcnt;
19790 :
19791 224 : case IX86_BUILTIN_LZCNT64:
19792 224 : decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19793 224 : goto fold_tzcnt_lzcnt;
19794 :
19795 964 : fold_tzcnt_lzcnt:
19796 964 : gcc_assert (n_args == 1);
19797 964 : arg0 = gimple_call_arg (stmt, 0);
19798 964 : if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19799 : {
19800 799 : int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19801 : /* If arg0 is provably non-zero, optimize into generic
19802 : __builtin_c[tl]z{,ll} function the middle-end handles
19803 : better. */
19804 799 : if (!expr_not_equal_to (arg0, wi::zero (prec)))
19805 : return false;
19806 :
19807 9 : loc = gimple_location (stmt);
19808 9 : g = gimple_build_call (decl, 1, arg0);
19809 9 : gimple_set_location (g, loc);
19810 9 : tree lhs = make_ssa_name (integer_type_node);
19811 9 : gimple_call_set_lhs (g, lhs);
19812 9 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
19813 9 : g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19814 9 : gimple_set_location (g, loc);
19815 9 : gsi_replace (gsi, g, false);
19816 9 : return true;
19817 : }
19818 : break;
19819 :
19820 491 : case IX86_BUILTIN_BZHI32:
19821 491 : case IX86_BUILTIN_BZHI64:
19822 491 : gcc_assert (n_args == 2);
19823 491 : arg1 = gimple_call_arg (stmt, 1);
19824 491 : if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19825 : {
19826 195 : unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19827 195 : arg0 = gimple_call_arg (stmt, 0);
19828 195 : if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19829 : break;
19830 31 : loc = gimple_location (stmt);
19831 31 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19832 31 : gimple_set_location (g, loc);
19833 31 : gsi_replace (gsi, g, false);
19834 31 : return true;
19835 : }
19836 : break;
19837 :
19838 502 : case IX86_BUILTIN_PDEP32:
19839 502 : case IX86_BUILTIN_PDEP64:
19840 502 : case IX86_BUILTIN_PEXT32:
19841 502 : case IX86_BUILTIN_PEXT64:
19842 502 : gcc_assert (n_args == 2);
19843 502 : arg1 = gimple_call_arg (stmt, 1);
19844 502 : if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19845 : {
19846 4 : loc = gimple_location (stmt);
19847 4 : arg0 = gimple_call_arg (stmt, 0);
19848 4 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19849 4 : gimple_set_location (g, loc);
19850 4 : gsi_replace (gsi, g, false);
19851 4 : return true;
19852 : }
19853 : break;
19854 :
19855 145 : case IX86_BUILTIN_PBLENDVB256:
19856 145 : case IX86_BUILTIN_BLENDVPS256:
19857 145 : case IX86_BUILTIN_BLENDVPD256:
19858 : /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19859 : to scalar operations and not combined back. */
19860 145 : if (!TARGET_AVX2)
19861 : break;
19862 :
19863 : /* FALLTHRU. */
19864 112 : case IX86_BUILTIN_BLENDVPD:
19865 : /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19866 : w/o sse4.2, it's veclowered to scalar operations and
19867 : not combined back. */
19868 112 : if (!TARGET_SSE4_2)
19869 : break;
19870 : /* FALLTHRU. */
19871 166 : case IX86_BUILTIN_PBLENDVB128:
19872 166 : case IX86_BUILTIN_BLENDVPS:
19873 166 : gcc_assert (n_args == 3);
19874 166 : arg0 = gimple_call_arg (stmt, 0);
19875 166 : arg1 = gimple_call_arg (stmt, 1);
19876 166 : arg2 = gimple_call_arg (stmt, 2);
19877 166 : if (gimple_call_lhs (stmt))
19878 : {
19879 166 : loc = gimple_location (stmt);
19880 166 : tree type = TREE_TYPE (arg2);
19881 166 : if (VECTOR_FLOAT_TYPE_P (type))
19882 : {
19883 73 : tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19884 73 : ? intSI_type_node : intDI_type_node;
19885 73 : type = get_same_sized_vectype (itype, type);
19886 : }
19887 : else
19888 93 : type = signed_type_for (type);
19889 166 : arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19890 166 : tree zero_vec = build_zero_cst (type);
19891 166 : tree cmp_type = truth_type_for (type);
19892 166 : tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19893 166 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19894 166 : g = gimple_build_assign (gimple_call_lhs (stmt),
19895 : VEC_COND_EXPR, cmp,
19896 : arg1, arg0);
19897 166 : gimple_set_location (g, loc);
19898 166 : gsi_replace (gsi, g, false);
19899 : }
19900 : else
19901 0 : gsi_replace (gsi, gimple_build_nop (), false);
19902 : return true;
19903 :
19904 :
19905 16 : case IX86_BUILTIN_PCMPEQB128:
19906 16 : case IX86_BUILTIN_PCMPEQW128:
19907 16 : case IX86_BUILTIN_PCMPEQD128:
19908 16 : case IX86_BUILTIN_PCMPEQQ:
19909 16 : case IX86_BUILTIN_PCMPEQB256:
19910 16 : case IX86_BUILTIN_PCMPEQW256:
19911 16 : case IX86_BUILTIN_PCMPEQD256:
19912 16 : case IX86_BUILTIN_PCMPEQQ256:
19913 16 : tcode = EQ_EXPR;
19914 16 : goto do_cmp;
19915 :
19916 : case IX86_BUILTIN_PCMPGTB128:
19917 : case IX86_BUILTIN_PCMPGTW128:
19918 : case IX86_BUILTIN_PCMPGTD128:
19919 : case IX86_BUILTIN_PCMPGTQ:
19920 : case IX86_BUILTIN_PCMPGTB256:
19921 : case IX86_BUILTIN_PCMPGTW256:
19922 : case IX86_BUILTIN_PCMPGTD256:
19923 : case IX86_BUILTIN_PCMPGTQ256:
19924 : tcode = GT_EXPR;
19925 :
19926 33 : do_cmp:
19927 33 : gcc_assert (n_args == 2);
19928 33 : arg0 = gimple_call_arg (stmt, 0);
19929 33 : arg1 = gimple_call_arg (stmt, 1);
19930 33 : if (gimple_call_lhs (stmt))
19931 : {
19932 32 : loc = gimple_location (stmt);
19933 32 : tree type = TREE_TYPE (arg0);
19934 32 : tree zero_vec = build_zero_cst (type);
19935 32 : tree minus_one_vec = build_minus_one_cst (type);
19936 32 : tree cmp_type = truth_type_for (type);
19937 32 : tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19938 32 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19939 32 : g = gimple_build_assign (gimple_call_lhs (stmt),
19940 : VEC_COND_EXPR, cmp,
19941 : minus_one_vec, zero_vec);
19942 32 : gimple_set_location (g, loc);
19943 32 : gsi_replace (gsi, g, false);
19944 : }
19945 : else
19946 1 : gsi_replace (gsi, gimple_build_nop (), false);
19947 : return true;
19948 :
19949 9297 : case IX86_BUILTIN_PSLLD:
19950 9297 : case IX86_BUILTIN_PSLLD128:
19951 9297 : case IX86_BUILTIN_PSLLD128_MASK:
19952 9297 : case IX86_BUILTIN_PSLLD256:
19953 9297 : case IX86_BUILTIN_PSLLD256_MASK:
19954 9297 : case IX86_BUILTIN_PSLLD512:
19955 9297 : case IX86_BUILTIN_PSLLDI:
19956 9297 : case IX86_BUILTIN_PSLLDI128:
19957 9297 : case IX86_BUILTIN_PSLLDI128_MASK:
19958 9297 : case IX86_BUILTIN_PSLLDI256:
19959 9297 : case IX86_BUILTIN_PSLLDI256_MASK:
19960 9297 : case IX86_BUILTIN_PSLLDI512:
19961 9297 : case IX86_BUILTIN_PSLLQ:
19962 9297 : case IX86_BUILTIN_PSLLQ128:
19963 9297 : case IX86_BUILTIN_PSLLQ128_MASK:
19964 9297 : case IX86_BUILTIN_PSLLQ256:
19965 9297 : case IX86_BUILTIN_PSLLQ256_MASK:
19966 9297 : case IX86_BUILTIN_PSLLQ512:
19967 9297 : case IX86_BUILTIN_PSLLQI:
19968 9297 : case IX86_BUILTIN_PSLLQI128:
19969 9297 : case IX86_BUILTIN_PSLLQI128_MASK:
19970 9297 : case IX86_BUILTIN_PSLLQI256:
19971 9297 : case IX86_BUILTIN_PSLLQI256_MASK:
19972 9297 : case IX86_BUILTIN_PSLLQI512:
19973 9297 : case IX86_BUILTIN_PSLLW:
19974 9297 : case IX86_BUILTIN_PSLLW128:
19975 9297 : case IX86_BUILTIN_PSLLW128_MASK:
19976 9297 : case IX86_BUILTIN_PSLLW256:
19977 9297 : case IX86_BUILTIN_PSLLW256_MASK:
19978 9297 : case IX86_BUILTIN_PSLLW512_MASK:
19979 9297 : case IX86_BUILTIN_PSLLWI:
19980 9297 : case IX86_BUILTIN_PSLLWI128:
19981 9297 : case IX86_BUILTIN_PSLLWI128_MASK:
19982 9297 : case IX86_BUILTIN_PSLLWI256:
19983 9297 : case IX86_BUILTIN_PSLLWI256_MASK:
19984 9297 : case IX86_BUILTIN_PSLLWI512_MASK:
19985 9297 : rcode = ASHIFT;
19986 9297 : is_vshift = false;
19987 9297 : goto do_shift;
19988 6495 : case IX86_BUILTIN_PSRAD:
19989 6495 : case IX86_BUILTIN_PSRAD128:
19990 6495 : case IX86_BUILTIN_PSRAD128_MASK:
19991 6495 : case IX86_BUILTIN_PSRAD256:
19992 6495 : case IX86_BUILTIN_PSRAD256_MASK:
19993 6495 : case IX86_BUILTIN_PSRAD512:
19994 6495 : case IX86_BUILTIN_PSRADI:
19995 6495 : case IX86_BUILTIN_PSRADI128:
19996 6495 : case IX86_BUILTIN_PSRADI128_MASK:
19997 6495 : case IX86_BUILTIN_PSRADI256:
19998 6495 : case IX86_BUILTIN_PSRADI256_MASK:
19999 6495 : case IX86_BUILTIN_PSRADI512:
20000 6495 : case IX86_BUILTIN_PSRAQ128_MASK:
20001 6495 : case IX86_BUILTIN_PSRAQ256_MASK:
20002 6495 : case IX86_BUILTIN_PSRAQ512:
20003 6495 : case IX86_BUILTIN_PSRAQI128_MASK:
20004 6495 : case IX86_BUILTIN_PSRAQI256_MASK:
20005 6495 : case IX86_BUILTIN_PSRAQI512:
20006 6495 : case IX86_BUILTIN_PSRAW:
20007 6495 : case IX86_BUILTIN_PSRAW128:
20008 6495 : case IX86_BUILTIN_PSRAW128_MASK:
20009 6495 : case IX86_BUILTIN_PSRAW256:
20010 6495 : case IX86_BUILTIN_PSRAW256_MASK:
20011 6495 : case IX86_BUILTIN_PSRAW512:
20012 6495 : case IX86_BUILTIN_PSRAWI:
20013 6495 : case IX86_BUILTIN_PSRAWI128:
20014 6495 : case IX86_BUILTIN_PSRAWI128_MASK:
20015 6495 : case IX86_BUILTIN_PSRAWI256:
20016 6495 : case IX86_BUILTIN_PSRAWI256_MASK:
20017 6495 : case IX86_BUILTIN_PSRAWI512:
20018 6495 : rcode = ASHIFTRT;
20019 6495 : is_vshift = false;
20020 6495 : goto do_shift;
20021 7960 : case IX86_BUILTIN_PSRLD:
20022 7960 : case IX86_BUILTIN_PSRLD128:
20023 7960 : case IX86_BUILTIN_PSRLD128_MASK:
20024 7960 : case IX86_BUILTIN_PSRLD256:
20025 7960 : case IX86_BUILTIN_PSRLD256_MASK:
20026 7960 : case IX86_BUILTIN_PSRLD512:
20027 7960 : case IX86_BUILTIN_PSRLDI:
20028 7960 : case IX86_BUILTIN_PSRLDI128:
20029 7960 : case IX86_BUILTIN_PSRLDI128_MASK:
20030 7960 : case IX86_BUILTIN_PSRLDI256:
20031 7960 : case IX86_BUILTIN_PSRLDI256_MASK:
20032 7960 : case IX86_BUILTIN_PSRLDI512:
20033 7960 : case IX86_BUILTIN_PSRLQ:
20034 7960 : case IX86_BUILTIN_PSRLQ128:
20035 7960 : case IX86_BUILTIN_PSRLQ128_MASK:
20036 7960 : case IX86_BUILTIN_PSRLQ256:
20037 7960 : case IX86_BUILTIN_PSRLQ256_MASK:
20038 7960 : case IX86_BUILTIN_PSRLQ512:
20039 7960 : case IX86_BUILTIN_PSRLQI:
20040 7960 : case IX86_BUILTIN_PSRLQI128:
20041 7960 : case IX86_BUILTIN_PSRLQI128_MASK:
20042 7960 : case IX86_BUILTIN_PSRLQI256:
20043 7960 : case IX86_BUILTIN_PSRLQI256_MASK:
20044 7960 : case IX86_BUILTIN_PSRLQI512:
20045 7960 : case IX86_BUILTIN_PSRLW:
20046 7960 : case IX86_BUILTIN_PSRLW128:
20047 7960 : case IX86_BUILTIN_PSRLW128_MASK:
20048 7960 : case IX86_BUILTIN_PSRLW256:
20049 7960 : case IX86_BUILTIN_PSRLW256_MASK:
20050 7960 : case IX86_BUILTIN_PSRLW512:
20051 7960 : case IX86_BUILTIN_PSRLWI:
20052 7960 : case IX86_BUILTIN_PSRLWI128:
20053 7960 : case IX86_BUILTIN_PSRLWI128_MASK:
20054 7960 : case IX86_BUILTIN_PSRLWI256:
20055 7960 : case IX86_BUILTIN_PSRLWI256_MASK:
20056 7960 : case IX86_BUILTIN_PSRLWI512:
20057 7960 : rcode = LSHIFTRT;
20058 7960 : is_vshift = false;
20059 7960 : goto do_shift;
20060 2384 : case IX86_BUILTIN_PSLLVV16HI:
20061 2384 : case IX86_BUILTIN_PSLLVV16SI:
20062 2384 : case IX86_BUILTIN_PSLLVV2DI:
20063 2384 : case IX86_BUILTIN_PSLLVV2DI_MASK:
20064 2384 : case IX86_BUILTIN_PSLLVV32HI:
20065 2384 : case IX86_BUILTIN_PSLLVV4DI:
20066 2384 : case IX86_BUILTIN_PSLLVV4DI_MASK:
20067 2384 : case IX86_BUILTIN_PSLLVV4SI:
20068 2384 : case IX86_BUILTIN_PSLLVV4SI_MASK:
20069 2384 : case IX86_BUILTIN_PSLLVV8DI:
20070 2384 : case IX86_BUILTIN_PSLLVV8HI:
20071 2384 : case IX86_BUILTIN_PSLLVV8SI:
20072 2384 : case IX86_BUILTIN_PSLLVV8SI_MASK:
20073 2384 : rcode = ASHIFT;
20074 2384 : is_vshift = true;
20075 2384 : goto do_shift;
20076 2341 : case IX86_BUILTIN_PSRAVQ128:
20077 2341 : case IX86_BUILTIN_PSRAVQ256:
20078 2341 : case IX86_BUILTIN_PSRAVV16HI:
20079 2341 : case IX86_BUILTIN_PSRAVV16SI:
20080 2341 : case IX86_BUILTIN_PSRAVV32HI:
20081 2341 : case IX86_BUILTIN_PSRAVV4SI:
20082 2341 : case IX86_BUILTIN_PSRAVV4SI_MASK:
20083 2341 : case IX86_BUILTIN_PSRAVV8DI:
20084 2341 : case IX86_BUILTIN_PSRAVV8HI:
20085 2341 : case IX86_BUILTIN_PSRAVV8SI:
20086 2341 : case IX86_BUILTIN_PSRAVV8SI_MASK:
20087 2341 : rcode = ASHIFTRT;
20088 2341 : is_vshift = true;
20089 2341 : goto do_shift;
20090 2380 : case IX86_BUILTIN_PSRLVV16HI:
20091 2380 : case IX86_BUILTIN_PSRLVV16SI:
20092 2380 : case IX86_BUILTIN_PSRLVV2DI:
20093 2380 : case IX86_BUILTIN_PSRLVV2DI_MASK:
20094 2380 : case IX86_BUILTIN_PSRLVV32HI:
20095 2380 : case IX86_BUILTIN_PSRLVV4DI:
20096 2380 : case IX86_BUILTIN_PSRLVV4DI_MASK:
20097 2380 : case IX86_BUILTIN_PSRLVV4SI:
20098 2380 : case IX86_BUILTIN_PSRLVV4SI_MASK:
20099 2380 : case IX86_BUILTIN_PSRLVV8DI:
20100 2380 : case IX86_BUILTIN_PSRLVV8HI:
20101 2380 : case IX86_BUILTIN_PSRLVV8SI:
20102 2380 : case IX86_BUILTIN_PSRLVV8SI_MASK:
20103 2380 : rcode = LSHIFTRT;
20104 2380 : is_vshift = true;
20105 2380 : goto do_shift;
20106 :
20107 30857 : do_shift:
20108 30857 : gcc_assert (n_args >= 2);
20109 30857 : if (!gimple_call_lhs (stmt))
20110 : {
20111 1 : gsi_replace (gsi, gimple_build_nop (), false);
20112 1 : return true;
20113 : }
20114 30856 : arg0 = gimple_call_arg (stmt, 0);
20115 30856 : arg1 = gimple_call_arg (stmt, 1);
20116 30856 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20117 : /* For masked shift, only optimize if the mask is all ones. */
20118 30856 : if (n_args > 2
20119 30856 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20120 : break;
20121 16081 : if (is_vshift)
20122 : {
20123 2640 : if (TREE_CODE (arg1) != VECTOR_CST)
20124 : break;
20125 69 : count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
20126 69 : if (integer_zerop (arg1))
20127 27 : count = 0;
20128 42 : else if (rcode == ASHIFTRT)
20129 : break;
20130 : else
20131 230 : for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
20132 : {
20133 212 : tree elt = VECTOR_CST_ELT (arg1, i);
20134 212 : if (!wi::neg_p (wi::to_wide (elt))
20135 375 : && wi::to_widest (elt) < count)
20136 16 : return false;
20137 : }
20138 : }
20139 : else
20140 : {
20141 13441 : arg1 = ix86_vector_shift_count (arg1);
20142 13441 : if (!arg1)
20143 : break;
20144 5608 : count = tree_to_uhwi (arg1);
20145 : }
20146 5653 : if (count == 0)
20147 : {
20148 : /* Just return the first argument for shift by 0. */
20149 93 : loc = gimple_location (stmt);
20150 93 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
20151 93 : gimple_set_location (g, loc);
20152 93 : gsi_replace (gsi, g, false);
20153 93 : return true;
20154 : }
20155 5560 : if (rcode != ASHIFTRT
20156 5560 : && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
20157 : {
20158 : /* For shift counts equal or greater than precision, except for
20159 : arithmetic right shift the result is zero. */
20160 78 : loc = gimple_location (stmt);
20161 78 : g = gimple_build_assign (gimple_call_lhs (stmt),
20162 78 : build_zero_cst (TREE_TYPE (arg0)));
20163 78 : gimple_set_location (g, loc);
20164 78 : gsi_replace (gsi, g, false);
20165 78 : return true;
20166 : }
20167 : break;
20168 :
20169 531 : case IX86_BUILTIN_SHUFPD512:
20170 531 : case IX86_BUILTIN_SHUFPS512:
20171 531 : case IX86_BUILTIN_SHUFPD:
20172 531 : case IX86_BUILTIN_SHUFPD256:
20173 531 : case IX86_BUILTIN_SHUFPS:
20174 531 : case IX86_BUILTIN_SHUFPS256:
20175 531 : arg0 = gimple_call_arg (stmt, 0);
20176 531 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20177 : /* This is masked shuffle. Only optimize if the mask is all ones. */
20178 531 : if (n_args > 3
20179 895 : && !ix86_masked_all_ones (elems,
20180 364 : gimple_call_arg (stmt, n_args - 1)))
20181 : break;
20182 203 : arg2 = gimple_call_arg (stmt, 2);
20183 203 : if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
20184 : {
20185 146 : unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
20186 : /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
20187 146 : if (shuffle_mask > 255)
20188 : return false;
20189 :
20190 144 : machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
20191 144 : loc = gimple_location (stmt);
20192 144 : tree itype = (imode == E_DFmode
20193 144 : ? long_long_integer_type_node : integer_type_node);
20194 144 : tree vtype = build_vector_type (itype, elems);
20195 144 : tree_vector_builder elts (vtype, elems, 1);
20196 :
20197 :
20198 : /* Transform integer shuffle_mask to vector perm_mask which
20199 : is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
20200 840 : for (unsigned i = 0; i != elems; i++)
20201 : {
20202 696 : unsigned sel_idx;
20203 : /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
20204 : provide 2 select controls for each element of the
20205 : destination. */
20206 696 : if (imode == E_DFmode)
20207 240 : sel_idx = (i & 1) * elems + (i & ~1)
20208 240 : + ((shuffle_mask >> i) & 1);
20209 : else
20210 : {
20211 : /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
20212 : controls for each element of the destination. */
20213 456 : unsigned j = i % 4;
20214 456 : sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
20215 456 : + ((shuffle_mask >> 2 * j) & 3);
20216 : }
20217 696 : elts.quick_push (build_int_cst (itype, sel_idx));
20218 : }
20219 :
20220 144 : tree perm_mask = elts.build ();
20221 144 : arg1 = gimple_call_arg (stmt, 1);
20222 144 : g = gimple_build_assign (gimple_call_lhs (stmt),
20223 : VEC_PERM_EXPR,
20224 : arg0, arg1, perm_mask);
20225 144 : gimple_set_location (g, loc);
20226 144 : gsi_replace (gsi, g, false);
20227 144 : return true;
20228 144 : }
20229 : // Do not error yet, the constant could be propagated later?
20230 : break;
20231 :
20232 48 : case IX86_BUILTIN_PABSB:
20233 48 : case IX86_BUILTIN_PABSW:
20234 48 : case IX86_BUILTIN_PABSD:
20235 : /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
20236 48 : if (!TARGET_MMX_WITH_SSE)
20237 : break;
20238 : /* FALLTHRU. */
20239 2189 : case IX86_BUILTIN_PABSB128:
20240 2189 : case IX86_BUILTIN_PABSB256:
20241 2189 : case IX86_BUILTIN_PABSB512:
20242 2189 : case IX86_BUILTIN_PABSW128:
20243 2189 : case IX86_BUILTIN_PABSW256:
20244 2189 : case IX86_BUILTIN_PABSW512:
20245 2189 : case IX86_BUILTIN_PABSD128:
20246 2189 : case IX86_BUILTIN_PABSD256:
20247 2189 : case IX86_BUILTIN_PABSD512:
20248 2189 : case IX86_BUILTIN_PABSQ128:
20249 2189 : case IX86_BUILTIN_PABSQ256:
20250 2189 : case IX86_BUILTIN_PABSQ512:
20251 2189 : case IX86_BUILTIN_PABSB128_MASK:
20252 2189 : case IX86_BUILTIN_PABSB256_MASK:
20253 2189 : case IX86_BUILTIN_PABSW128_MASK:
20254 2189 : case IX86_BUILTIN_PABSW256_MASK:
20255 2189 : case IX86_BUILTIN_PABSD128_MASK:
20256 2189 : case IX86_BUILTIN_PABSD256_MASK:
20257 2189 : gcc_assert (n_args >= 1);
20258 2189 : if (!gimple_call_lhs (stmt))
20259 : {
20260 1 : gsi_replace (gsi, gimple_build_nop (), false);
20261 1 : return true;
20262 : }
20263 2188 : arg0 = gimple_call_arg (stmt, 0);
20264 2188 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20265 : /* For masked ABS, only optimize if the mask is all ones. */
20266 2188 : if (n_args > 1
20267 2188 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20268 : break;
20269 228 : {
20270 228 : tree utype, ures, vce;
20271 228 : utype = unsigned_type_for (TREE_TYPE (arg0));
20272 : /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
20273 : instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
20274 228 : ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
20275 228 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20276 228 : loc = gimple_location (stmt);
20277 228 : vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
20278 228 : g = gimple_build_assign (gimple_call_lhs (stmt),
20279 : VIEW_CONVERT_EXPR, vce);
20280 228 : gsi_replace (gsi, g, false);
20281 : }
20282 228 : return true;
20283 :
20284 2225 : case IX86_BUILTIN_MINPS:
20285 2225 : case IX86_BUILTIN_MINPD:
20286 2225 : case IX86_BUILTIN_MINPS256:
20287 2225 : case IX86_BUILTIN_MINPD256:
20288 2225 : case IX86_BUILTIN_MINPS512:
20289 2225 : case IX86_BUILTIN_MINPD512:
20290 2225 : case IX86_BUILTIN_MINPS128_MASK:
20291 2225 : case IX86_BUILTIN_MINPD128_MASK:
20292 2225 : case IX86_BUILTIN_MINPS256_MASK:
20293 2225 : case IX86_BUILTIN_MINPD256_MASK:
20294 2225 : case IX86_BUILTIN_MINPH128_MASK:
20295 2225 : case IX86_BUILTIN_MINPH256_MASK:
20296 2225 : case IX86_BUILTIN_MINPH512_MASK:
20297 2225 : tcode = LT_EXPR;
20298 2225 : goto do_minmax;
20299 :
20300 : case IX86_BUILTIN_MAXPS:
20301 : case IX86_BUILTIN_MAXPD:
20302 : case IX86_BUILTIN_MAXPS256:
20303 : case IX86_BUILTIN_MAXPD256:
20304 : case IX86_BUILTIN_MAXPS512:
20305 : case IX86_BUILTIN_MAXPD512:
20306 : case IX86_BUILTIN_MAXPS128_MASK:
20307 : case IX86_BUILTIN_MAXPD128_MASK:
20308 : case IX86_BUILTIN_MAXPS256_MASK:
20309 : case IX86_BUILTIN_MAXPD256_MASK:
20310 : case IX86_BUILTIN_MAXPH128_MASK:
20311 : case IX86_BUILTIN_MAXPH256_MASK:
20312 : case IX86_BUILTIN_MAXPH512_MASK:
20313 : tcode = GT_EXPR;
20314 4435 : do_minmax:
20315 4435 : gcc_assert (n_args >= 2);
20316 : /* Without SSE4.1 we often aren't able to pattern match it back to the
20317 : desired instruction. */
20318 4435 : if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
20319 : break;
20320 3865 : arg0 = gimple_call_arg (stmt, 0);
20321 3865 : arg1 = gimple_call_arg (stmt, 1);
20322 3865 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20323 : /* For masked minmax, only optimize if the mask is all ones. */
20324 3865 : if (n_args > 2
20325 3865 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
20326 : break;
20327 647 : if (n_args >= 5)
20328 : {
20329 436 : tree arg4 = gimple_call_arg (stmt, 4);
20330 436 : if (!tree_fits_uhwi_p (arg4))
20331 : break;
20332 424 : if (tree_to_uhwi (arg4) == 4)
20333 : /* Ok. */;
20334 416 : else if (tree_to_uhwi (arg4) != 8)
20335 : /* Invalid round argument. */
20336 : break;
20337 416 : else if (HONOR_NANS (arg0))
20338 : /* Lowering to comparison would raise exceptions which
20339 : shouldn't be raised. */
20340 : break;
20341 : }
20342 219 : {
20343 219 : tree type = truth_type_for (TREE_TYPE (arg0));
20344 219 : tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
20345 219 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20346 219 : g = gimple_build_assign (gimple_call_lhs (stmt),
20347 : VEC_COND_EXPR, cmpres, arg0, arg1);
20348 219 : gsi_replace (gsi, g, false);
20349 : }
20350 219 : return true;
20351 :
20352 : default:
20353 : break;
20354 : }
20355 :
20356 : return false;
20357 : }
20358 :
20359 : /* Handler for an SVML-style interface to
20360 : a library with vectorized intrinsics. */
20361 :
20362 : tree
20363 10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
20364 : {
20365 10 : char name[20];
20366 10 : tree fntype, new_fndecl, args;
20367 10 : unsigned arity;
20368 10 : const char *bname;
20369 10 : machine_mode el_mode, in_mode;
20370 10 : int n, in_n;
20371 :
20372 : /* The SVML is suitable for unsafe math only. */
20373 10 : if (!flag_unsafe_math_optimizations)
20374 : return NULL_TREE;
20375 :
20376 10 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20377 10 : n = TYPE_VECTOR_SUBPARTS (type_out);
20378 10 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20379 10 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20380 10 : if (el_mode != in_mode
20381 10 : || n != in_n)
20382 : return NULL_TREE;
20383 :
20384 10 : switch (fn)
20385 : {
20386 10 : CASE_CFN_EXP:
20387 10 : CASE_CFN_LOG:
20388 10 : CASE_CFN_LOG10:
20389 10 : CASE_CFN_POW:
20390 10 : CASE_CFN_TANH:
20391 10 : CASE_CFN_TAN:
20392 10 : CASE_CFN_ATAN:
20393 10 : CASE_CFN_ATAN2:
20394 10 : CASE_CFN_ATANH:
20395 10 : CASE_CFN_CBRT:
20396 10 : CASE_CFN_SINH:
20397 10 : CASE_CFN_SIN:
20398 10 : CASE_CFN_ASINH:
20399 10 : CASE_CFN_ASIN:
20400 10 : CASE_CFN_COSH:
20401 10 : CASE_CFN_COS:
20402 10 : CASE_CFN_ACOSH:
20403 10 : CASE_CFN_ACOS:
20404 10 : if ((el_mode != DFmode || n != 2)
20405 8 : && (el_mode != SFmode || n != 4))
20406 : return NULL_TREE;
20407 6 : break;
20408 :
20409 : default:
20410 : return NULL_TREE;
20411 : }
20412 :
20413 6 : tree fndecl = mathfn_built_in (el_mode == DFmode
20414 : ? double_type_node : float_type_node, fn);
20415 6 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20416 :
20417 6 : if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
20418 2 : strcpy (name, "vmlsLn4");
20419 4 : else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
20420 0 : strcpy (name, "vmldLn2");
20421 4 : else if (n == 4)
20422 : {
20423 2 : sprintf (name, "vmls%s", bname+10);
20424 2 : name[strlen (name)-1] = '4';
20425 : }
20426 : else
20427 2 : sprintf (name, "vmld%s2", bname+10);
20428 :
20429 : /* Convert to uppercase. */
20430 6 : name[4] &= ~0x20;
20431 :
20432 6 : arity = 0;
20433 6 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20434 0 : arity++;
20435 :
20436 6 : if (arity == 1)
20437 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20438 : else
20439 6 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20440 :
20441 : /* Build a function declaration for the vectorized function. */
20442 6 : new_fndecl = build_decl (BUILTINS_LOCATION,
20443 : FUNCTION_DECL, get_identifier (name), fntype);
20444 6 : TREE_PUBLIC (new_fndecl) = 1;
20445 6 : DECL_EXTERNAL (new_fndecl) = 1;
20446 6 : DECL_IS_NOVOPS (new_fndecl) = 1;
20447 6 : TREE_READONLY (new_fndecl) = 1;
20448 :
20449 6 : return new_fndecl;
20450 : }
20451 :
20452 : /* Handler for an ACML-style interface to
20453 : a library with vectorized intrinsics. */
20454 :
20455 : tree
20456 3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20457 : {
20458 3 : char name[20] = "__vr.._";
20459 3 : tree fntype, new_fndecl, args;
20460 3 : unsigned arity;
20461 3 : const char *bname;
20462 3 : machine_mode el_mode, in_mode;
20463 3 : int n, in_n;
20464 :
20465 : /* The ACML is 64bits only and suitable for unsafe math only as
20466 : it does not correctly support parts of IEEE with the required
20467 : precision such as denormals. */
20468 3 : if (!TARGET_64BIT
20469 3 : || !flag_unsafe_math_optimizations)
20470 : return NULL_TREE;
20471 :
20472 3 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20473 3 : n = TYPE_VECTOR_SUBPARTS (type_out);
20474 3 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20475 3 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20476 3 : if (el_mode != in_mode
20477 3 : || n != in_n)
20478 : return NULL_TREE;
20479 :
20480 3 : switch (fn)
20481 : {
20482 3 : CASE_CFN_SIN:
20483 3 : CASE_CFN_COS:
20484 3 : CASE_CFN_EXP:
20485 3 : CASE_CFN_LOG:
20486 3 : CASE_CFN_LOG2:
20487 3 : CASE_CFN_LOG10:
20488 3 : if (el_mode == DFmode && n == 2)
20489 : {
20490 3 : name[4] = 'd';
20491 3 : name[5] = '2';
20492 : }
20493 0 : else if (el_mode == SFmode && n == 4)
20494 : {
20495 0 : name[4] = 's';
20496 0 : name[5] = '4';
20497 : }
20498 : else
20499 : return NULL_TREE;
20500 3 : break;
20501 :
20502 : default:
20503 : return NULL_TREE;
20504 : }
20505 :
20506 3 : tree fndecl = mathfn_built_in (el_mode == DFmode
20507 : ? double_type_node : float_type_node, fn);
20508 3 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20509 3 : sprintf (name + 7, "%s", bname+10);
20510 :
20511 3 : arity = 0;
20512 3 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20513 0 : arity++;
20514 :
20515 3 : if (arity == 1)
20516 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20517 : else
20518 3 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20519 :
20520 : /* Build a function declaration for the vectorized function. */
20521 3 : new_fndecl = build_decl (BUILTINS_LOCATION,
20522 : FUNCTION_DECL, get_identifier (name), fntype);
20523 3 : TREE_PUBLIC (new_fndecl) = 1;
20524 3 : DECL_EXTERNAL (new_fndecl) = 1;
20525 3 : DECL_IS_NOVOPS (new_fndecl) = 1;
20526 3 : TREE_READONLY (new_fndecl) = 1;
20527 :
20528 3 : return new_fndecl;
20529 : }
20530 :
20531 : /* Handler for an AOCL-LibM-style interface to
20532 : a library with vectorized intrinsics. */
20533 :
20534 : tree
20535 386 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20536 : {
20537 386 : char name[20] = "amd_vr";
20538 386 : int name_len = 6;
20539 386 : tree fntype, new_fndecl, args;
20540 386 : unsigned arity;
20541 386 : const char *bname;
20542 386 : machine_mode el_mode, in_mode;
20543 386 : int n, in_n;
20544 :
20545 : /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20546 : as it trades off some accuracy for increased performance. */
20547 386 : if (!TARGET_64BIT
20548 386 : || !flag_unsafe_math_optimizations)
20549 : return NULL_TREE;
20550 :
20551 386 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20552 386 : n = TYPE_VECTOR_SUBPARTS (type_out);
20553 386 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20554 386 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20555 386 : if (el_mode != in_mode
20556 386 : || n != in_n)
20557 : return NULL_TREE;
20558 :
20559 386 : gcc_checking_assert (n > 0);
20560 :
20561 : /* Decide whether there exists a function for the combination of FN, the mode
20562 : and the vector width. Return early if it doesn't. */
20563 :
20564 386 : if (el_mode != DFmode && el_mode != SFmode)
20565 : return NULL_TREE;
20566 :
20567 : /* Supported vector widths for given FN and single/double precision. Zeros
20568 : are used to fill out unused positions in the arrays. */
20569 386 : static const int supported_n[][2][3] = {
20570 : /* Single prec. , Double prec. */
20571 : { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20572 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20573 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20574 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20575 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20576 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20577 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20578 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20579 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20580 : { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20581 : { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20582 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20583 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20584 : { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20585 : { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20586 : { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20587 : { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20588 : { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20589 : };
20590 :
20591 : /* We cannot simply index the supported_n array with FN since multiple FNs
20592 : may correspond to a single operation (see the definitions of these
20593 : CASE_CFN_* macros). */
20594 386 : int i;
20595 386 : switch (fn)
20596 : {
20597 : CASE_CFN_TAN : i = 0; break;
20598 28 : CASE_CFN_EXP : i = 1; break;
20599 28 : CASE_CFN_EXP2 : i = 2; break;
20600 28 : CASE_CFN_LOG : i = 3; break;
20601 28 : CASE_CFN_LOG2 : i = 4; break;
20602 28 : CASE_CFN_COS : i = 5; break;
20603 28 : CASE_CFN_SIN : i = 6; break;
20604 28 : CASE_CFN_POW : i = 7; break;
20605 28 : CASE_CFN_ERF : i = 8; break;
20606 25 : CASE_CFN_ATAN : i = 9; break;
20607 20 : CASE_CFN_LOG10 : i = 10; break;
20608 10 : CASE_CFN_EXP10 : i = 11; break;
20609 10 : CASE_CFN_LOG1P : i = 12; break;
20610 24 : CASE_CFN_ASIN : i = 13; break;
20611 14 : CASE_CFN_ACOS : i = 14; break;
20612 18 : CASE_CFN_TANH : i = 15; break;
20613 9 : CASE_CFN_EXPM1 : i = 16; break;
20614 14 : CASE_CFN_COSH : i = 17; break;
20615 : default: return NULL_TREE;
20616 : }
20617 :
20618 386 : int j = el_mode == DFmode;
20619 386 : bool n_is_supported = false;
20620 976 : for (unsigned k = 0; k < 3; k++)
20621 857 : if (supported_n[i][j][k] == n)
20622 : {
20623 : n_is_supported = true;
20624 : break;
20625 : }
20626 386 : if (!n_is_supported)
20627 : return NULL_TREE;
20628 :
20629 : /* Append the precision and the vector width to the function name we are
20630 : constructing. */
20631 267 : name[name_len++] = el_mode == DFmode ? 'd' : 's';
20632 267 : switch (n)
20633 : {
20634 214 : case 2:
20635 214 : case 4:
20636 214 : case 8:
20637 214 : name[name_len++] = '0' + n;
20638 214 : break;
20639 53 : case 16:
20640 53 : name[name_len++] = '1';
20641 53 : name[name_len++] = '6';
20642 53 : break;
20643 0 : default:
20644 0 : gcc_unreachable ();
20645 : }
20646 267 : name[name_len++] = '_';
20647 :
20648 : /* Append the operation name (steal it from the name of a builtin). */
20649 267 : tree fndecl = mathfn_built_in (el_mode == DFmode
20650 : ? double_type_node : float_type_node, fn);
20651 267 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20652 267 : sprintf (name + name_len, "%s", bname + 10);
20653 :
20654 267 : arity = 0;
20655 267 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20656 0 : arity++;
20657 :
20658 267 : if (arity == 1)
20659 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20660 : else
20661 267 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20662 :
20663 : /* Build a function declaration for the vectorized function. */
20664 267 : new_fndecl = build_decl (BUILTINS_LOCATION,
20665 : FUNCTION_DECL, get_identifier (name), fntype);
20666 267 : TREE_PUBLIC (new_fndecl) = 1;
20667 267 : DECL_EXTERNAL (new_fndecl) = 1;
20668 267 : TREE_READONLY (new_fndecl) = 1;
20669 :
20670 267 : return new_fndecl;
20671 : }
20672 :
20673 : /* Returns a decl of a function that implements scatter store with
20674 : register type VECTYPE and index type INDEX_TYPE and SCALE.
20675 : Return NULL_TREE if it is not available. */
20676 :
20677 : static tree
20678 129737 : ix86_vectorize_builtin_scatter (const_tree vectype,
20679 : const_tree index_type, int scale)
20680 : {
20681 129737 : bool si;
20682 129737 : enum ix86_builtins code;
20683 :
20684 129737 : if (!TARGET_AVX512F)
20685 : return NULL_TREE;
20686 :
20687 4193 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20688 7367 : ? !TARGET_USE_SCATTER_2PARTS
20689 7367 : : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20690 3174 : ? !TARGET_USE_SCATTER_4PARTS
20691 2057 : : !TARGET_USE_SCATTER_8PARTS))
20692 : return NULL_TREE;
20693 :
20694 4193 : if ((TREE_CODE (index_type) != INTEGER_TYPE
20695 463 : && !POINTER_TYPE_P (index_type))
20696 4656 : || (TYPE_MODE (index_type) != SImode
20697 1777 : && TYPE_MODE (index_type) != DImode))
20698 0 : return NULL_TREE;
20699 :
20700 4423 : if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20701 : return NULL_TREE;
20702 :
20703 : /* v*scatter* insn sign extends index to pointer mode. */
20704 4193 : if (TYPE_PRECISION (index_type) < POINTER_SIZE
20705 4193 : && TYPE_UNSIGNED (index_type))
20706 : return NULL_TREE;
20707 :
20708 : /* Scale can be 1, 2, 4 or 8. */
20709 4193 : if (scale <= 0
20710 4193 : || scale > 8
20711 4177 : || (scale & (scale - 1)) != 0)
20712 : return NULL_TREE;
20713 :
20714 4177 : si = TYPE_MODE (index_type) == SImode;
20715 4177 : switch (TYPE_MODE (vectype))
20716 : {
20717 169 : case E_V8DFmode:
20718 169 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20719 : break;
20720 104 : case E_V8DImode:
20721 104 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20722 : break;
20723 177 : case E_V16SFmode:
20724 177 : code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20725 : break;
20726 257 : case E_V16SImode:
20727 257 : code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20728 : break;
20729 206 : case E_V4DFmode:
20730 206 : if (TARGET_AVX512VL)
20731 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20732 : else
20733 : return NULL_TREE;
20734 : break;
20735 142 : case E_V4DImode:
20736 142 : if (TARGET_AVX512VL)
20737 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20738 : else
20739 : return NULL_TREE;
20740 : break;
20741 248 : case E_V8SFmode:
20742 248 : if (TARGET_AVX512VL)
20743 40 : code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20744 : else
20745 : return NULL_TREE;
20746 : break;
20747 268 : case E_V8SImode:
20748 268 : if (TARGET_AVX512VL)
20749 82 : code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20750 : else
20751 : return NULL_TREE;
20752 : break;
20753 251 : case E_V2DFmode:
20754 251 : if (TARGET_AVX512VL)
20755 94 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20756 : else
20757 : return NULL_TREE;
20758 : break;
20759 196 : case E_V2DImode:
20760 196 : if (TARGET_AVX512VL)
20761 94 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20762 : else
20763 : return NULL_TREE;
20764 : break;
20765 301 : case E_V4SFmode:
20766 301 : if (TARGET_AVX512VL)
20767 96 : code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20768 : else
20769 : return NULL_TREE;
20770 : break;
20771 324 : case E_V4SImode:
20772 324 : if (TARGET_AVX512VL)
20773 138 : code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20774 : else
20775 : return NULL_TREE;
20776 : break;
20777 : default:
20778 : return NULL_TREE;
20779 : }
20780 :
20781 1319 : return get_ix86_builtin (code);
20782 : }
20783 :
20784 : /* Return true if it is safe to use the rsqrt optabs to optimize
20785 : 1.0/sqrt. */
20786 :
20787 : static bool
20788 66 : use_rsqrt_p (machine_mode mode)
20789 : {
20790 66 : return ((mode == HFmode
20791 42 : || (TARGET_SSE && TARGET_SSE_MATH))
20792 66 : && flag_finite_math_only
20793 65 : && !flag_trapping_math
20794 119 : && flag_unsafe_math_optimizations);
20795 : }
20796 :
20797 : /* Helper for avx_vpermilps256_operand et al. This is also used by
20798 : the expansion functions to turn the parallel back into a mask.
20799 : The return value is 0 for no match and the imm8+1 for a match. */
20800 :
20801 : int
20802 64039 : avx_vpermilp_parallel (rtx par, machine_mode mode)
20803 : {
20804 64039 : unsigned i, nelt = GET_MODE_NUNITS (mode);
20805 64039 : unsigned mask = 0;
20806 64039 : unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20807 :
20808 64039 : if (XVECLEN (par, 0) != (int) nelt)
20809 : return 0;
20810 :
20811 : /* Validate that all of the elements are constants, and not totally
20812 : out of range. Copy the data into an integral array to make the
20813 : subsequent checks easier. */
20814 312319 : for (i = 0; i < nelt; ++i)
20815 : {
20816 248280 : rtx er = XVECEXP (par, 0, i);
20817 248280 : unsigned HOST_WIDE_INT ei;
20818 :
20819 248280 : if (!CONST_INT_P (er))
20820 : return 0;
20821 248280 : ei = INTVAL (er);
20822 248280 : if (ei >= nelt)
20823 : return 0;
20824 248280 : ipar[i] = ei;
20825 : }
20826 :
20827 64039 : switch (mode)
20828 : {
20829 : case E_V8DFmode:
20830 : case E_V8DImode:
20831 : /* In the 512-bit DFmode case, we can only move elements within
20832 : a 128-bit lane. First fill the second part of the mask,
20833 : then fallthru. */
20834 4762 : for (i = 4; i < 6; ++i)
20835 : {
20836 3305 : if (!IN_RANGE (ipar[i], 4, 5))
20837 : return 0;
20838 3080 : mask |= (ipar[i] - 4) << i;
20839 : }
20840 3519 : for (i = 6; i < 8; ++i)
20841 : {
20842 2488 : if (!IN_RANGE (ipar[i], 6, 7))
20843 : return 0;
20844 2062 : mask |= (ipar[i] - 6) << i;
20845 : }
20846 : /* FALLTHRU */
20847 :
20848 : case E_V4DFmode:
20849 : case E_V4DImode:
20850 : /* In the 256-bit DFmode case, we can only move elements within
20851 : a 128-bit lane. */
20852 46176 : for (i = 0; i < 2; ++i)
20853 : {
20854 38891 : if (!IN_RANGE (ipar[i], 0, 1))
20855 : return 0;
20856 25894 : mask |= ipar[i] << i;
20857 : }
20858 19165 : for (i = 2; i < 4; ++i)
20859 : {
20860 13230 : if (!IN_RANGE (ipar[i], 2, 3))
20861 : return 0;
20862 11880 : mask |= (ipar[i] - 2) << i;
20863 : }
20864 : break;
20865 :
20866 : case E_V16SFmode:
20867 : case E_V16SImode:
20868 : /* In 512 bit SFmode case, permutation in the upper 256 bits
20869 : must mirror the permutation in the lower 256-bits. */
20870 3652 : for (i = 0; i < 8; ++i)
20871 3256 : if (ipar[i] + 8 != ipar[i + 8])
20872 : return 0;
20873 : /* FALLTHRU */
20874 :
20875 : case E_V8SFmode:
20876 : case E_V8SImode:
20877 : /* In 256 bit SFmode case, we have full freedom of
20878 : movement within the low 128-bit lane, but the high 128-bit
20879 : lane must mirror the exact same pattern. */
20880 33768 : for (i = 0; i < 4; ++i)
20881 28657 : if (ipar[i] + 4 != ipar[i + 4])
20882 : return 0;
20883 : nelt = 4;
20884 : /* FALLTHRU */
20885 :
20886 38401 : case E_V2DFmode:
20887 38401 : case E_V2DImode:
20888 38401 : case E_V4SFmode:
20889 38401 : case E_V4SImode:
20890 : /* In the 128-bit case, we've full freedom in the placement of
20891 : the elements from the source operand. */
20892 134281 : for (i = 0; i < nelt; ++i)
20893 95880 : mask |= ipar[i] << (i * (nelt / 2));
20894 : break;
20895 :
20896 0 : default:
20897 0 : gcc_unreachable ();
20898 : }
20899 :
20900 : /* Make sure success has a non-zero value by adding one. */
20901 44336 : return mask + 1;
20902 : }
20903 :
20904 : /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20905 : the expansion functions to turn the parallel back into a mask.
20906 : The return value is 0 for no match and the imm8+1 for a match. */
20907 :
20908 : int
20909 42902 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
20910 : {
20911 42902 : unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20912 42902 : unsigned mask = 0;
20913 42902 : unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20914 :
20915 42902 : if (XVECLEN (par, 0) != (int) nelt)
20916 : return 0;
20917 :
20918 : /* Validate that all of the elements are constants, and not totally
20919 : out of range. Copy the data into an integral array to make the
20920 : subsequent checks easier. */
20921 345806 : for (i = 0; i < nelt; ++i)
20922 : {
20923 302904 : rtx er = XVECEXP (par, 0, i);
20924 302904 : unsigned HOST_WIDE_INT ei;
20925 :
20926 302904 : if (!CONST_INT_P (er))
20927 : return 0;
20928 302904 : ei = INTVAL (er);
20929 302904 : if (ei >= 2 * nelt)
20930 : return 0;
20931 302904 : ipar[i] = ei;
20932 : }
20933 :
20934 : /* Validate that the halves of the permute are halves. */
20935 82667 : for (i = 0; i < nelt2 - 1; ++i)
20936 66570 : if (ipar[i] + 1 != ipar[i + 1])
20937 : return 0;
20938 50942 : for (i = nelt2; i < nelt - 1; ++i)
20939 35479 : if (ipar[i] + 1 != ipar[i + 1])
20940 : return 0;
20941 :
20942 : /* Reconstruct the mask. */
20943 46293 : for (i = 0; i < 2; ++i)
20944 : {
20945 30880 : unsigned e = ipar[i * nelt2];
20946 30880 : if (e % nelt2)
20947 : return 0;
20948 30830 : e /= nelt2;
20949 30830 : mask |= e << (i * 4);
20950 : }
20951 :
20952 : /* Make sure success has a non-zero value by adding one. */
20953 15413 : return mask + 1;
20954 : }
20955 :
20956 : /* Return a mask of VPTERNLOG operands that do not affect output. */
20957 :
20958 : int
20959 2431 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
20960 : {
20961 2431 : int mask = 0;
20962 2431 : int imm8 = INTVAL (pternlog_imm);
20963 :
20964 2431 : if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20965 6 : mask |= 1;
20966 2431 : if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20967 6 : mask |= 2;
20968 2431 : if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20969 155 : mask |= 4;
20970 :
20971 2431 : return mask;
20972 : }
20973 :
20974 : /* Eliminate false dependencies on operands that do not affect output
20975 : by substituting other operands of a VPTERNLOG. */
20976 :
20977 : void
20978 81 : substitute_vpternlog_operands (rtx *operands)
20979 : {
20980 81 : int mask = vpternlog_redundant_operand_mask (operands[4]);
20981 :
20982 81 : if (mask & 1) /* The first operand is redundant. */
20983 2 : operands[1] = operands[2];
20984 :
20985 81 : if (mask & 2) /* The second operand is redundant. */
20986 2 : operands[2] = operands[1];
20987 :
20988 81 : if (mask & 4) /* The third operand is redundant. */
20989 77 : operands[3] = operands[1];
20990 4 : else if (REG_P (operands[3]))
20991 : {
20992 0 : if (mask & 1)
20993 0 : operands[1] = operands[3];
20994 0 : if (mask & 2)
20995 0 : operands[2] = operands[3];
20996 : }
20997 81 : }
20998 :
20999 : /* Return a register priority for hard reg REGNO. */
21000 : static int
21001 57926219 : ix86_register_priority (int hard_regno)
21002 : {
21003 : /* ebp and r13 as the base always wants a displacement, r12 as the
21004 : base always wants an index. So discourage their usage in an
21005 : address. */
21006 57926219 : if (hard_regno == R12_REG || hard_regno == R13_REG)
21007 : return 0;
21008 53523919 : if (hard_regno == BP_REG)
21009 : return 1;
21010 : /* New x86-64 int registers result in bigger code size. Discourage them. */
21011 51590749 : if (REX_INT_REGNO_P (hard_regno))
21012 : return 2;
21013 35171096 : if (REX2_INT_REGNO_P (hard_regno))
21014 : return 2;
21015 : /* New x86-64 SSE registers result in bigger code size. Discourage them. */
21016 35168654 : if (REX_SSE_REGNO_P (hard_regno))
21017 : return 2;
21018 29049150 : if (EXT_REX_SSE_REGNO_P (hard_regno))
21019 : return 1;
21020 : /* Usage of AX register results in smaller code. Prefer it. */
21021 28771993 : if (hard_regno == AX_REG)
21022 3777613 : return 4;
21023 : return 3;
21024 : }
21025 :
21026 : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
21027 :
21028 : Put float CONST_DOUBLE in the constant pool instead of fp regs.
21029 : QImode must go into class Q_REGS.
21030 : Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21031 : movdf to do mem-to-mem moves through integer regs. */
21032 :
21033 : static reg_class_t
21034 545208504 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
21035 : {
21036 545208504 : machine_mode mode = GET_MODE (x);
21037 :
21038 : /* We're only allowed to return a subclass of CLASS. Many of the
21039 : following checks fail for NO_REGS, so eliminate that early. */
21040 545208504 : if (regclass == NO_REGS)
21041 : return NO_REGS;
21042 :
21043 : /* All classes can load zeros. */
21044 544362660 : if (x == CONST0_RTX (mode))
21045 : return regclass;
21046 :
21047 : /* Force constants into memory if we are loading a (nonzero) constant into
21048 : an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
21049 : instructions to load from a constant. */
21050 519619903 : if (CONSTANT_P (x)
21051 519619903 : && (MAYBE_MMX_CLASS_P (regclass)
21052 151684257 : || MAYBE_SSE_CLASS_P (regclass)
21053 121714517 : || MAYBE_MASK_CLASS_P (regclass)))
21054 30100375 : return NO_REGS;
21055 :
21056 : /* Floating-point constants need more complex checks. */
21057 489519528 : if (CONST_DOUBLE_P (x))
21058 : {
21059 : /* General regs can load everything. */
21060 304043 : if (INTEGER_CLASS_P (regclass))
21061 : return regclass;
21062 :
21063 : /* Floats can load 0 and 1 plus some others. Note that we eliminated
21064 : zero above. We only want to wind up preferring 80387 registers if
21065 : we plan on doing computation with them. */
21066 179992 : if (IS_STACK_MODE (mode)
21067 238377 : && standard_80387_constant_p (x) > 0)
21068 : {
21069 : /* Limit class to FP regs. */
21070 40502 : if (FLOAT_CLASS_P (regclass))
21071 : return FLOAT_REGS;
21072 : }
21073 :
21074 139490 : return NO_REGS;
21075 : }
21076 :
21077 : /* Prefer SSE if we can use them for math. Also allow integer regs
21078 : when moves between register units are cheap. */
21079 489215485 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21080 : {
21081 31087954 : if (TARGET_INTER_UNIT_MOVES_FROM_VEC
21082 31073041 : && TARGET_INTER_UNIT_MOVES_TO_VEC
21083 93224770 : && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
21084 30931295 : return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21085 : else
21086 156659 : return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21087 : }
21088 :
21089 : /* Generally when we see PLUS here, it's the function invariant
21090 : (plus soft-fp const_int). Which can only be computed into general
21091 : regs. */
21092 458127531 : if (GET_CODE (x) == PLUS)
21093 1885352 : return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
21094 :
21095 : /* QImode constants are easy to load, but non-constant QImode data
21096 : must go into Q_REGS or ALL_MASK_REGS. */
21097 456242179 : if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21098 : {
21099 24380107 : if (Q_CLASS_P (regclass))
21100 : return regclass;
21101 19682147 : else if (reg_class_subset_p (Q_REGS, regclass))
21102 : return Q_REGS;
21103 55529 : else if (MASK_CLASS_P (regclass))
21104 : return regclass;
21105 : else
21106 : return NO_REGS;
21107 : }
21108 :
21109 : return regclass;
21110 : }
21111 :
21112 : /* Discourage putting floating-point values in SSE registers unless
21113 : SSE math is being used, and likewise for the 387 registers. */
21114 : static reg_class_t
21115 74116585 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
21116 : {
21117 : /* Restrict the output reload class to the register bank that we are doing
21118 : math on. If we would like not to return a subset of CLASS, reject this
21119 : alternative: if reload cannot do this, it will still use its choice. */
21120 74116585 : machine_mode mode = GET_MODE (x);
21121 74116585 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21122 7214536 : return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
21123 :
21124 66902049 : if (IS_STACK_MODE (mode))
21125 207707 : return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21126 :
21127 : return regclass;
21128 : }
21129 :
21130 : static reg_class_t
21131 384410967 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
21132 : machine_mode mode, secondary_reload_info *sri)
21133 : {
21134 : /* Double-word spills from general registers to non-offsettable memory
21135 : references (zero-extended addresses) require special handling. */
21136 384410967 : if (TARGET_64BIT
21137 331456987 : && MEM_P (x)
21138 179824754 : && GET_MODE_SIZE (mode) > UNITS_PER_WORD
21139 18816078 : && INTEGER_CLASS_P (rclass)
21140 387125098 : && !offsettable_memref_p (x))
21141 : {
21142 2461374 : sri->icode = (in_p
21143 1230687 : ? CODE_FOR_reload_noff_load
21144 : : CODE_FOR_reload_noff_store);
21145 : /* Add the cost of moving address to a temporary. */
21146 1230687 : sri->extra_cost = 1;
21147 :
21148 1230687 : return NO_REGS;
21149 : }
21150 :
21151 : /* QImode spills from non-QI registers require
21152 : intermediate register on 32bit targets. */
21153 383180280 : if (mode == QImode
21154 383180280 : && ((!TARGET_64BIT && !in_p
21155 586226 : && INTEGER_CLASS_P (rclass)
21156 586186 : && MAYBE_NON_Q_CLASS_P (rclass))
21157 21999864 : || (!TARGET_AVX512DQ
21158 21800982 : && MAYBE_MASK_CLASS_P (rclass))))
21159 : {
21160 6476 : int regno = true_regnum (x);
21161 :
21162 : /* Return Q_REGS if the operand is in memory. */
21163 6476 : if (regno == -1)
21164 : return Q_REGS;
21165 :
21166 : return NO_REGS;
21167 : }
21168 :
21169 : /* Require movement to gpr, and then store to memory. */
21170 383173804 : if ((mode == HFmode || mode == HImode || mode == V2QImode
21171 : || mode == BFmode)
21172 3956887 : && !TARGET_SSE4_1
21173 3365984 : && SSE_CLASS_P (rclass)
21174 272554 : && !in_p && MEM_P (x))
21175 : {
21176 167324 : sri->extra_cost = 1;
21177 167324 : return GENERAL_REGS;
21178 : }
21179 :
21180 : /* This condition handles corner case where an expression involving
21181 : pointers gets vectorized. We're trying to use the address of a
21182 : stack slot as a vector initializer.
21183 :
21184 : (set (reg:V2DI 74 [ vect_cst_.2 ])
21185 : (vec_duplicate:V2DI (reg/f:DI 20 frame)))
21186 :
21187 : Eventually frame gets turned into sp+offset like this:
21188 :
21189 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21190 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21191 : (const_int 392 [0x188]))))
21192 :
21193 : That later gets turned into:
21194 :
21195 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21196 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21197 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
21198 :
21199 : We'll have the following reload recorded:
21200 :
21201 : Reload 0: reload_in (DI) =
21202 : (plus:DI (reg/f:DI 7 sp)
21203 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
21204 : reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21205 : SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
21206 : reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
21207 : reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21208 : reload_reg_rtx: (reg:V2DI 22 xmm1)
21209 :
21210 : Which isn't going to work since SSE instructions can't handle scalar
21211 : additions. Returning GENERAL_REGS forces the addition into integer
21212 : register and reload can handle subsequent reloads without problems. */
21213 :
21214 220290201 : if (in_p && GET_CODE (x) == PLUS
21215 2 : && SSE_CLASS_P (rclass)
21216 383006480 : && SCALAR_INT_MODE_P (mode))
21217 : return GENERAL_REGS;
21218 :
21219 : return NO_REGS;
21220 : }
21221 :
21222 : /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
21223 :
21224 : static bool
21225 714463470 : ix86_class_likely_spilled_p (reg_class_t rclass)
21226 : {
21227 704523415 : switch (rclass)
21228 : {
21229 : case AREG:
21230 : case DREG:
21231 : case CREG:
21232 : case BREG:
21233 : case AD_REGS:
21234 : case SIREG:
21235 : case DIREG:
21236 : case SSE_FIRST_REG:
21237 : case FP_TOP_REG:
21238 : case FP_SECOND_REG:
21239 : return true;
21240 :
21241 683124542 : default:
21242 683124542 : break;
21243 : }
21244 :
21245 683124542 : return false;
21246 : }
21247 :
21248 : /* Implement TARGET_CALLEE_SAVE_COST. */
21249 :
21250 : static int
21251 81459600 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
21252 : unsigned int, int mem_cost, const HARD_REG_SET &, bool)
21253 : {
21254 : /* Account for the fact that push and pop are shorter and do their
21255 : own allocation and deallocation. */
21256 81459600 : if (GENERAL_REGNO_P (hard_regno))
21257 : {
21258 : /* push is 1 byte while typical spill is 4-5 bytes.
21259 : ??? We probably should adjust size costs accordingly.
21260 : Costs are relative to reg-reg move that has 2 bytes for 32bit
21261 : and 3 bytes otherwise. Be sure that no cost table sets cost
21262 : to 2, so we end up with 0. */
21263 81449742 : if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
21264 3576776 : return 1;
21265 77872966 : return mem_cost - 2;
21266 : }
21267 : return mem_cost;
21268 : }
21269 :
21270 : /* Return true if a set of DST by the expression SRC should be allowed.
21271 : This prevents complex sets of likely_spilled hard regs before split1. */
21272 :
21273 : bool
21274 633284862 : ix86_hardreg_mov_ok (rtx dst, rtx src)
21275 : {
21276 : /* Avoid complex sets of likely_spilled hard registers before reload. */
21277 515351550 : if (REG_P (dst) && HARD_REGISTER_P (dst)
21278 307720254 : && !REG_P (src) && !MEM_P (src)
21279 95022611 : && !(VECTOR_MODE_P (GET_MODE (dst))
21280 95022611 : ? standard_sse_constant_p (src, GET_MODE (dst))
21281 47275342 : : x86_64_immediate_operand (src, GET_MODE (dst)))
21282 9940055 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
21283 642004431 : && ix86_pre_reload_split ())
21284 : return false;
21285 : return true;
21286 : }
21287 :
21288 : /* If we are copying between registers from different register sets
21289 : (e.g. FP and integer), we may need a memory location.
21290 :
21291 : The function can't work reliably when one of the CLASSES is a class
21292 : containing registers from multiple sets. We avoid this by never combining
21293 : different sets in a single alternative in the machine description.
21294 : Ensure that this constraint holds to avoid unexpected surprises.
21295 :
21296 : When STRICT is false, we are being called from REGISTER_MOVE_COST,
21297 : so do not enforce these sanity checks.
21298 :
21299 : To optimize register_move_cost performance, define inline variant. */
21300 :
21301 : static inline bool
21302 5808522681 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21303 : reg_class_t class2, int strict)
21304 : {
21305 5808522681 : if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
21306 : return false;
21307 :
21308 5776302054 : if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21309 4922161376 : || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21310 4203031161 : || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21311 4010055544 : || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21312 3827432583 : || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21313 3827432583 : || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
21314 3827432583 : || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
21315 9429718743 : || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
21316 : {
21317 2288702893 : gcc_assert (!strict || lra_in_progress);
21318 : return true;
21319 : }
21320 :
21321 3487599161 : if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21322 : return true;
21323 :
21324 : /* ??? This is a lie. We do have moves between mmx/general, and for
21325 : mmx/sse2. But by saying we need secondary memory we discourage the
21326 : register allocator from using the mmx registers unless needed. */
21327 3335180667 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21328 : return true;
21329 :
21330 : /* Between mask and general, we have moves no larger than word size. */
21331 3236812426 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21332 : {
21333 2699571 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
21334 3518937 : || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21335 200475 : return true;
21336 : }
21337 :
21338 3236611951 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21339 : {
21340 : /* SSE1 doesn't have any direct moves from other classes. */
21341 703161876 : if (!TARGET_SSE2)
21342 : return true;
21343 :
21344 700498144 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
21345 : return true;
21346 :
21347 : /* If the target says that inter-unit moves are more expensive
21348 : than moving through memory, then don't generate them. */
21349 1050273289 : if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
21350 1049787445 : || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
21351 1321114 : return true;
21352 :
21353 : /* With SSE4.1, *mov{ti,di}_internal supports moves between
21354 : SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
21355 699177030 : if (TARGET_SSE4_1
21356 37999464 : && (TARGET_64BIT ? mode == TImode : mode == DImode))
21357 : return false;
21358 :
21359 697529813 : int msize = GET_MODE_SIZE (mode);
21360 :
21361 : /* Between SSE and general, we have moves no larger than word size. */
21362 713893855 : if (msize > UNITS_PER_WORD)
21363 : return true;
21364 :
21365 : /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
21366 : Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
21367 603495183 : int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
21368 :
21369 603495183 : if (msize < minsize)
21370 : return true;
21371 : }
21372 :
21373 : return false;
21374 : }
21375 :
21376 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21377 :
21378 : static bool
21379 70950048 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21380 : reg_class_t class2)
21381 : {
21382 70950048 : return inline_secondary_memory_needed (mode, class1, class2, true);
21383 : }
21384 :
21385 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21386 :
21387 : get_secondary_mem widens integral modes to BITS_PER_WORD.
21388 : There is no need to emit full 64 bit move on 64 bit targets
21389 : for integral modes that can be moved using 32 bit move. */
21390 :
21391 : static machine_mode
21392 13074 : ix86_secondary_memory_needed_mode (machine_mode mode)
21393 : {
21394 26148 : if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21395 19 : return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21396 : return mode;
21397 : }
21398 :
21399 : /* Implement the TARGET_CLASS_MAX_NREGS hook.
21400 :
21401 : On the 80386, this is the size of MODE in words,
21402 : except in the FP regs, where a single reg is always enough. */
21403 :
21404 : static unsigned char
21405 6061848900 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21406 : {
21407 6061848900 : if (MAYBE_INTEGER_CLASS_P (rclass))
21408 : {
21409 4078415048 : if (mode == XFmode)
21410 149069411 : return (TARGET_64BIT ? 2 : 3);
21411 3929345637 : else if (mode == XCmode)
21412 149069036 : return (TARGET_64BIT ? 4 : 6);
21413 : else
21414 7666466321 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21415 : }
21416 : else
21417 : {
21418 1983433852 : if (COMPLEX_MODE_P (mode))
21419 : return 2;
21420 : else
21421 1693839513 : return 1;
21422 : }
21423 : }
21424 :
21425 : /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21426 :
21427 : static bool
21428 40288537 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
21429 : reg_class_t regclass)
21430 : {
21431 40288537 : if (from == to)
21432 : return true;
21433 :
21434 : /* x87 registers can't do subreg at all, as all values are reformatted
21435 : to extended precision.
21436 :
21437 : ??? middle-end queries mode changes for ALL_REGS and this makes
21438 : vec_series_lowpart_p to always return false. We probably should
21439 : restrict this to modes supported by i387 and check if it is enabled. */
21440 38884506 : if (MAYBE_FLOAT_CLASS_P (regclass))
21441 : return false;
21442 :
21443 34231946 : if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21444 : {
21445 : /* Vector registers do not support QI or HImode loads. If we don't
21446 : disallow a change to these modes, reload will assume it's ok to
21447 : drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21448 : the vec_dupv4hi pattern.
21449 : NB: SSE2 can load 16bit data to sse register via pinsrw. */
21450 16507695 : int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21451 16507695 : if (GET_MODE_SIZE (from) < mov_size
21452 33015078 : || GET_MODE_SIZE (to) < mov_size)
21453 : return false;
21454 : }
21455 :
21456 : return true;
21457 : }
21458 :
21459 : /* Return index of MODE in the sse load/store tables. */
21460 :
21461 : static inline int
21462 791200603 : sse_store_index (machine_mode mode)
21463 : {
21464 : /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21465 : costs to processor_costs, which requires changes to all entries in
21466 : processor cost table. */
21467 791200603 : if (mode == E_HFmode)
21468 140218844 : mode = E_SFmode;
21469 :
21470 1582401206 : switch (GET_MODE_SIZE (mode))
21471 : {
21472 : case 4:
21473 : return 0;
21474 : case 8:
21475 : return 1;
21476 : case 16:
21477 : return 2;
21478 : case 32:
21479 : return 3;
21480 : case 64:
21481 : return 4;
21482 : default:
21483 : return -1;
21484 : }
21485 : }
21486 :
21487 : /* Return the cost of moving data of mode M between a
21488 : register and memory. A value of 2 is the default; this cost is
21489 : relative to those in `REGISTER_MOVE_COST'.
21490 :
21491 : This function is used extensively by register_move_cost that is used to
21492 : build tables at startup. Make it inline in this case.
21493 : When IN is 2, return maximum of in and out move cost.
21494 :
21495 : If moving between registers and memory is more expensive than
21496 : between two registers, you should define this macro to express the
21497 : relative cost.
21498 :
21499 : Model also increased moving costs of QImode registers in non
21500 : Q_REGS classes.
21501 : */
21502 : static inline int
21503 7071590040 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21504 : {
21505 7071590040 : int cost;
21506 :
21507 7071590040 : if (FLOAT_CLASS_P (regclass))
21508 : {
21509 361127475 : int index;
21510 361127475 : switch (mode)
21511 : {
21512 : case E_SFmode:
21513 : index = 0;
21514 : break;
21515 : case E_DFmode:
21516 : index = 1;
21517 : break;
21518 : case E_XFmode:
21519 : index = 2;
21520 : break;
21521 : default:
21522 : return 100;
21523 : }
21524 107946991 : if (in == 2)
21525 103919262 : return MAX (ix86_cost->hard_register.fp_load [index],
21526 : ix86_cost->hard_register.fp_store [index]);
21527 4027729 : return in ? ix86_cost->hard_register.fp_load [index]
21528 4027729 : : ix86_cost->hard_register.fp_store [index];
21529 : }
21530 6710462565 : if (SSE_CLASS_P (regclass))
21531 : {
21532 659329647 : int index = sse_store_index (mode);
21533 659329647 : if (index == -1)
21534 : return 100;
21535 573393452 : if (in == 2)
21536 406320345 : return MAX (ix86_cost->hard_register.sse_load [index],
21537 : ix86_cost->hard_register.sse_store [index]);
21538 167073107 : return in ? ix86_cost->hard_register.sse_load [index]
21539 167073107 : : ix86_cost->hard_register.sse_store [index];
21540 : }
21541 6051132918 : if (MASK_CLASS_P (regclass))
21542 : {
21543 110748435 : int index;
21544 221496870 : switch (GET_MODE_SIZE (mode))
21545 : {
21546 : case 1:
21547 : index = 0;
21548 : break;
21549 9142669 : case 2:
21550 9142669 : index = 1;
21551 9142669 : break;
21552 : /* DImode loads and stores assumed to cost the same as SImode. */
21553 41100846 : case 4:
21554 41100846 : case 8:
21555 41100846 : index = 2;
21556 41100846 : break;
21557 : default:
21558 : return 100;
21559 : }
21560 :
21561 53904915 : if (in == 2)
21562 614067 : return MAX (ix86_cost->hard_register.mask_load[index],
21563 : ix86_cost->hard_register.mask_store[index]);
21564 53290848 : return in ? ix86_cost->hard_register.mask_load[2]
21565 53290848 : : ix86_cost->hard_register.mask_store[2];
21566 : }
21567 5940384483 : if (MMX_CLASS_P (regclass))
21568 : {
21569 176322170 : int index;
21570 352644340 : switch (GET_MODE_SIZE (mode))
21571 : {
21572 : case 4:
21573 : index = 0;
21574 : break;
21575 103482270 : case 8:
21576 103482270 : index = 1;
21577 103482270 : break;
21578 : default:
21579 : return 100;
21580 : }
21581 141683150 : if (in == 2)
21582 121254988 : return MAX (ix86_cost->hard_register.mmx_load [index],
21583 : ix86_cost->hard_register.mmx_store [index]);
21584 20428162 : return in ? ix86_cost->hard_register.mmx_load [index]
21585 20428162 : : ix86_cost->hard_register.mmx_store [index];
21586 : }
21587 11528124626 : switch (GET_MODE_SIZE (mode))
21588 : {
21589 127590891 : case 1:
21590 127590891 : if (Q_CLASS_P (regclass) || TARGET_64BIT)
21591 : {
21592 124963440 : if (!in)
21593 20044824 : return ix86_cost->hard_register.int_store[0];
21594 104918616 : if (TARGET_PARTIAL_REG_DEPENDENCY
21595 104918616 : && optimize_function_for_speed_p (cfun))
21596 97974362 : cost = ix86_cost->hard_register.movzbl_load;
21597 : else
21598 6944254 : cost = ix86_cost->hard_register.int_load[0];
21599 104918616 : if (in == 2)
21600 84845520 : return MAX (cost, ix86_cost->hard_register.int_store[0]);
21601 : return cost;
21602 : }
21603 : else
21604 : {
21605 2627451 : if (in == 2)
21606 1860710 : return MAX (ix86_cost->hard_register.movzbl_load,
21607 : ix86_cost->hard_register.int_store[0] + 4);
21608 766741 : if (in)
21609 383425 : return ix86_cost->hard_register.movzbl_load;
21610 : else
21611 383316 : return ix86_cost->hard_register.int_store[0] + 4;
21612 : }
21613 658592209 : break;
21614 658592209 : case 2:
21615 658592209 : {
21616 658592209 : int cost;
21617 658592209 : if (in == 2)
21618 556431119 : cost = MAX (ix86_cost->hard_register.int_load[1],
21619 : ix86_cost->hard_register.int_store[1]);
21620 : else
21621 102161090 : cost = in ? ix86_cost->hard_register.int_load[1]
21622 : : ix86_cost->hard_register.int_store[1];
21623 :
21624 658592209 : if (mode == E_HFmode)
21625 : {
21626 : /* Prefer SSE over GPR for HFmode. */
21627 127618180 : int sse_cost;
21628 127618180 : int index = sse_store_index (mode);
21629 127618180 : if (in == 2)
21630 117404048 : sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21631 : ix86_cost->hard_register.sse_store[index]);
21632 : else
21633 20428264 : sse_cost = (in
21634 10214132 : ? ix86_cost->hard_register.sse_load [index]
21635 : : ix86_cost->hard_register.sse_store [index]);
21636 127618180 : if (sse_cost >= cost)
21637 127618180 : cost = sse_cost + 1;
21638 : }
21639 : return cost;
21640 : }
21641 4977879213 : default:
21642 4977879213 : if (in == 2)
21643 3853955688 : cost = MAX (ix86_cost->hard_register.int_load[2],
21644 : ix86_cost->hard_register.int_store[2]);
21645 1123923525 : else if (in)
21646 562149675 : cost = ix86_cost->hard_register.int_load[2];
21647 : else
21648 561773850 : cost = ix86_cost->hard_register.int_store[2];
21649 : /* Multiply with the number of GPR moves needed. */
21650 10074654555 : return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21651 : }
21652 : }
21653 :
21654 : static int
21655 1817858222 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21656 : {
21657 2726466362 : return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
21658 : }
21659 :
21660 :
21661 : /* Return the cost of moving data from a register in class CLASS1 to
21662 : one in class CLASS2.
21663 :
21664 : It is not required that the cost always equal 2 when FROM is the same as TO;
21665 : on some machines it is expensive to move between registers if they are not
21666 : general registers. */
21667 :
21668 : static int
21669 5737572633 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21670 : reg_class_t class2_i)
21671 : {
21672 5737572633 : enum reg_class class1 = (enum reg_class) class1_i;
21673 5737572633 : enum reg_class class2 = (enum reg_class) class2_i;
21674 :
21675 : /* In case we require secondary memory, compute cost of the store followed
21676 : by load. In order to avoid bad register allocation choices, we need
21677 : for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21678 :
21679 5737572633 : if (inline_secondary_memory_needed (mode, class1, class2, false))
21680 : {
21681 2626865909 : int cost = 1;
21682 :
21683 2626865909 : cost += inline_memory_move_cost (mode, class1, 2);
21684 2626865909 : cost += inline_memory_move_cost (mode, class2, 2);
21685 :
21686 : /* In case of copying from general_purpose_register we may emit multiple
21687 : stores followed by single load causing memory size mismatch stall.
21688 : Count this as arbitrarily high cost of 20. */
21689 5253731818 : if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21690 786005540 : && TARGET_MEMORY_MISMATCH_STALL
21691 4198876989 : && targetm.class_max_nregs (class1, mode)
21692 786005540 : > targetm.class_max_nregs (class2, mode))
21693 149524934 : cost += 20;
21694 :
21695 : /* In the case of FP/MMX moves, the registers actually overlap, and we
21696 : have to switch modes in order to treat them differently. */
21697 60627530 : if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21698 2677945504 : || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21699 19095870 : cost += 20;
21700 :
21701 2626865909 : return cost;
21702 : }
21703 :
21704 : /* Moves between MMX and non-MMX units require secondary memory. */
21705 3110706724 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21706 0 : gcc_unreachable ();
21707 :
21708 3110706724 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21709 595691688 : return (SSE_CLASS_P (class1)
21710 595691688 : ? ix86_cost->hard_register.sse_to_integer
21711 595691688 : : ix86_cost->hard_register.integer_to_sse);
21712 :
21713 : /* Moves between mask register and GPR. */
21714 2515015036 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21715 : {
21716 1106063 : return (MASK_CLASS_P (class1)
21717 1106063 : ? ix86_cost->hard_register.mask_to_integer
21718 1106063 : : ix86_cost->hard_register.integer_to_mask);
21719 : }
21720 : /* Moving between mask registers. */
21721 2513908973 : if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21722 106408 : return ix86_cost->hard_register.mask_move;
21723 :
21724 2513802565 : if (MAYBE_FLOAT_CLASS_P (class1))
21725 12066084 : return ix86_cost->hard_register.fp_move;
21726 2501736481 : if (MAYBE_SSE_CLASS_P (class1))
21727 : {
21728 234885228 : if (GET_MODE_BITSIZE (mode) <= 128)
21729 114864086 : return ix86_cost->hard_register.xmm_move;
21730 5157056 : if (GET_MODE_BITSIZE (mode) <= 256)
21731 1635081 : return ix86_cost->hard_register.ymm_move;
21732 943447 : return ix86_cost->hard_register.zmm_move;
21733 : }
21734 2384293867 : if (MAYBE_MMX_CLASS_P (class1))
21735 2220505 : return ix86_cost->hard_register.mmx_move;
21736 : return 2;
21737 : }
21738 :
21739 : /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21740 : words of a value of mode MODE but can be less for certain modes in
21741 : special long registers.
21742 :
21743 : Actually there are no two word move instructions for consecutive
21744 : registers. And only registers 0-3 may have mov byte instructions
21745 : applied to them. */
21746 :
21747 : static unsigned int
21748 9008053408 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21749 : {
21750 9008053408 : if (GENERAL_REGNO_P (regno))
21751 : {
21752 3133235968 : if (mode == XFmode)
21753 25746432 : return TARGET_64BIT ? 2 : 3;
21754 3107967936 : if (mode == XCmode)
21755 25746432 : return TARGET_64BIT ? 4 : 6;
21756 6223764608 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21757 : }
21758 5874817440 : if (COMPLEX_MODE_P (mode))
21759 : return 2;
21760 : /* Register pair for mask registers. */
21761 5116776480 : if (mode == P2QImode || mode == P2HImode)
21762 94755120 : return 2;
21763 :
21764 : return 1;
21765 : }
21766 :
21767 : /* Implement REGMODE_NATURAL_SIZE(MODE). */
21768 : unsigned int
21769 110701356 : ix86_regmode_natural_size (machine_mode mode)
21770 : {
21771 110701356 : if (mode == P2HImode || mode == P2QImode)
21772 2462 : return GET_MODE_SIZE (mode) / 2;
21773 110700125 : return UNITS_PER_WORD;
21774 : }
21775 :
21776 : /* Implement TARGET_HARD_REGNO_MODE_OK. */
21777 :
21778 : static bool
21779 55132258831 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21780 : {
21781 : /* Flags and only flags can only hold CCmode values. */
21782 55132258831 : if (CC_REGNO_P (regno))
21783 438239777 : return GET_MODE_CLASS (mode) == MODE_CC;
21784 54694019054 : if (GET_MODE_CLASS (mode) == MODE_CC
21785 : || GET_MODE_CLASS (mode) == MODE_RANDOM)
21786 : return false;
21787 49067883297 : if (STACK_REGNO_P (regno))
21788 4778884507 : return VALID_FP_MODE_P (mode);
21789 44288998790 : if (MASK_REGNO_P (regno))
21790 : {
21791 : /* Register pair only starts at even register number. */
21792 3725023477 : if ((mode == P2QImode || mode == P2HImode))
21793 51932098 : return MASK_PAIR_REGNO_P(regno);
21794 :
21795 1003720843 : return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21796 4656401300 : || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21797 : }
21798 :
21799 40563975313 : if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21800 : return false;
21801 :
21802 39568605866 : if (SSE_REGNO_P (regno))
21803 : {
21804 : /* We implement the move patterns for all vector modes into and
21805 : out of SSE registers, even when no operation instructions
21806 : are available. */
21807 :
21808 : /* For AVX-512 we allow, regardless of regno:
21809 : - XI mode
21810 : - any of 512-bit wide vector mode
21811 : - any scalar mode. */
21812 17110189365 : if (TARGET_AVX512F
21813 : && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21814 : || VALID_AVX512F_SCALAR_MODE (mode)))
21815 : return true;
21816 :
21817 : /* TODO check for QI/HI scalars. */
21818 : /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21819 16417921243 : if (TARGET_AVX512VL
21820 1752504624 : && (VALID_AVX256_REG_OR_OI_MODE (mode)
21821 1540093745 : || VALID_AVX512VL_128_REG_MODE (mode)))
21822 : return true;
21823 :
21824 : /* xmm16-xmm31 are only available for AVX-512. */
21825 15971075831 : if (EXT_REX_SSE_REGNO_P (regno))
21826 : return false;
21827 :
21828 : /* OImode and AVX modes are available only when AVX is enabled. */
21829 9246003422 : return ((TARGET_AVX
21830 1933732993 : && VALID_AVX256_REG_OR_OI_MODE (mode))
21831 : || VALID_SSE_REG_MODE (mode)
21832 : || VALID_SSE2_REG_MODE (mode)
21833 : || VALID_MMX_REG_MODE (mode)
21834 9246003422 : || VALID_MMX_REG_MODE_3DNOW (mode));
21835 : }
21836 22458416501 : if (MMX_REGNO_P (regno))
21837 : {
21838 : /* We implement the move patterns for 3DNOW modes even in MMX mode,
21839 : so if the register is available at all, then we can move data of
21840 : the given mode into or out of it. */
21841 4002010879 : return (VALID_MMX_REG_MODE (mode)
21842 : || VALID_MMX_REG_MODE_3DNOW (mode));
21843 : }
21844 :
21845 18456405622 : if (mode == QImode)
21846 : {
21847 : /* Take care for QImode values - they can be in non-QI regs,
21848 : but then they do cause partial register stalls. */
21849 208821652 : if (ANY_QI_REGNO_P (regno))
21850 : return true;
21851 14431639 : if (!TARGET_PARTIAL_REG_STALL)
21852 : return true;
21853 : /* LRA checks if the hard register is OK for the given mode.
21854 : QImode values can live in non-QI regs, so we allow all
21855 : registers here. */
21856 0 : if (lra_in_progress)
21857 : return true;
21858 0 : return !can_create_pseudo_p ();
21859 : }
21860 : /* We handle both integer and floats in the general purpose registers. */
21861 18247583970 : else if (VALID_INT_MODE_P (mode)
21862 13349782955 : || VALID_FP_MODE_P (mode))
21863 : return true;
21864 : /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21865 : on to use that value in smaller contexts, this can easily force a
21866 : pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21867 : supporting DImode, allow it. */
21868 12274520046 : else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21869 : return true;
21870 :
21871 : return false;
21872 : }
21873 :
21874 : /* Initialize function_abis with corresponding abi_id,
21875 : currently only handle vzeroupper. */
21876 : void
21877 21870 : ix86_initialize_callee_abi (unsigned int abi_id)
21878 : {
21879 21870 : gcc_assert (abi_id == ABI_VZEROUPPER);
21880 21870 : predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21881 21870 : if (!vzeroupper_abi.initialized_p ())
21882 : {
21883 : HARD_REG_SET full_reg_clobbers;
21884 4279 : CLEAR_HARD_REG_SET (full_reg_clobbers);
21885 4279 : vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21886 : }
21887 21870 : }
21888 :
21889 : void
21890 21870 : ix86_expand_avx_vzeroupper (void)
21891 : {
21892 : /* Initialize vzeroupper_abi here. */
21893 21870 : ix86_initialize_callee_abi (ABI_VZEROUPPER);
21894 21870 : rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21895 21870 : CALL_INSN_ABI_ID (insn) = ABI_VZEROUPPER;
21896 : /* Return false for non-local goto in can_nonlocal_goto. */
21897 21870 : make_reg_eh_region_note (insn, 0, INT_MIN);
21898 : /* Flag used for call_insn indicates it's a fake call. */
21899 21870 : RTX_FLAG (insn, used) = 1;
21900 21870 : }
21901 :
21902 :
21903 : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21904 : saves SSE registers across calls is Win64 (thus no need to check the
21905 : current ABI here), and with AVX enabled Win64 only guarantees that
21906 : the low 16 bytes are saved. */
21907 :
21908 : static bool
21909 2070650478 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21910 : machine_mode mode)
21911 : {
21912 : /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21913 2070650478 : if (abi_id == ABI_VZEROUPPER)
21914 30934615 : return (GET_MODE_SIZE (mode) > 16
21915 30934615 : && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21916 4732774 : || LEGACY_SSE_REGNO_P (regno)));
21917 :
21918 2682541503 : return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21919 : }
21920 :
21921 : /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21922 : tieable integer mode. */
21923 :
21924 : static bool
21925 52431626 : ix86_tieable_integer_mode_p (machine_mode mode)
21926 : {
21927 52431626 : switch (mode)
21928 : {
21929 : case E_HImode:
21930 : case E_SImode:
21931 : return true;
21932 :
21933 5330771 : case E_QImode:
21934 5330771 : return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21935 :
21936 10122448 : case E_DImode:
21937 10122448 : return TARGET_64BIT;
21938 :
21939 : default:
21940 : return false;
21941 : }
21942 : }
21943 :
21944 : /* Implement TARGET_MODES_TIEABLE_P.
21945 :
21946 : Return true if MODE1 is accessible in a register that can hold MODE2
21947 : without copying. That is, all register classes that can hold MODE2
21948 : can also hold MODE1. */
21949 :
21950 : static bool
21951 33986372 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21952 : {
21953 33986372 : if (mode1 == mode2)
21954 : return true;
21955 :
21956 33899914 : if (ix86_tieable_integer_mode_p (mode1)
21957 33899914 : && ix86_tieable_integer_mode_p (mode2))
21958 : return true;
21959 :
21960 : /* MODE2 being XFmode implies fp stack or general regs, which means we
21961 : can tie any smaller floating point modes to it. Note that we do not
21962 : tie this with TFmode. */
21963 24923455 : if (mode2 == XFmode)
21964 4314 : return mode1 == SFmode || mode1 == DFmode;
21965 :
21966 : /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21967 : that we can tie it with SFmode. */
21968 24919141 : if (mode2 == DFmode)
21969 249932 : return mode1 == SFmode;
21970 :
21971 : /* If MODE2 is only appropriate for an SSE register, then tie with
21972 : any vector modes or scalar floating point modes acceptable to SSE
21973 : registers, excluding scalar integer modes with SUBREG:
21974 : (subreg:QI (reg:TI 99) 0))
21975 : (subreg:HI (reg:TI 99) 0))
21976 : (subreg:SI (reg:TI 99) 0))
21977 : (subreg:DI (reg:TI 99) 0))
21978 : to avoid unnecessary move from SSE register to integer register.
21979 : */
21980 24669209 : if (GET_MODE_SIZE (mode2) >= 16
21981 38653244 : && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
21982 13719643 : || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
21983 486118 : && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
21984 30518157 : && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21985 5406314 : return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
21986 :
21987 : /* If MODE2 is appropriate for an MMX register, then tie
21988 : with any other mode acceptable to MMX registers. */
21989 19262895 : if (GET_MODE_SIZE (mode2) == 8
21990 19262895 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
21991 3304058 : return (GET_MODE_SIZE (mode1) == 8
21992 3304058 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
21993 :
21994 : /* SCmode and DImode can be tied. */
21995 15958837 : if ((mode1 == E_SCmode && mode2 == E_DImode)
21996 15958837 : || (mode1 == E_DImode && mode2 == E_SCmode))
21997 108 : return TARGET_64BIT;
21998 :
21999 : /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
22000 15958729 : if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
22001 15958729 : || (mode1 == E_V2SFmode && mode2 == E_SCmode)
22002 15958729 : || (mode1 == E_DCmode && mode2 == E_V2DFmode)
22003 15958729 : || (mode1 == E_V2DFmode && mode2 == E_DCmode))
22004 0 : return true;
22005 :
22006 : return false;
22007 : }
22008 :
22009 : /* Return the cost of moving between two registers of mode MODE. */
22010 :
22011 : static int
22012 29863581 : ix86_set_reg_reg_cost (machine_mode mode)
22013 : {
22014 29863581 : unsigned int units = UNITS_PER_WORD;
22015 :
22016 29863581 : switch (GET_MODE_CLASS (mode))
22017 : {
22018 : default:
22019 : break;
22020 :
22021 : case MODE_CC:
22022 29863581 : units = GET_MODE_SIZE (CCmode);
22023 : break;
22024 :
22025 1185201 : case MODE_FLOAT:
22026 1185201 : if ((TARGET_SSE && mode == TFmode)
22027 693396 : || (TARGET_80387 && mode == XFmode)
22028 210960 : || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
22029 142495 : || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
22030 2340486 : units = GET_MODE_SIZE (mode);
22031 : break;
22032 :
22033 1336644 : case MODE_COMPLEX_FLOAT:
22034 1336644 : if ((TARGET_SSE && mode == TCmode)
22035 896026 : || (TARGET_80387 && mode == XCmode)
22036 455286 : || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
22037 14518 : || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
22038 2666800 : units = GET_MODE_SIZE (mode);
22039 : break;
22040 :
22041 19125022 : case MODE_VECTOR_INT:
22042 19125022 : case MODE_VECTOR_FLOAT:
22043 19125022 : if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
22044 19024539 : || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
22045 18846155 : || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22046 16159656 : || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22047 14825466 : || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
22048 14779956 : && VALID_MMX_REG_MODE (mode)))
22049 8705478 : units = GET_MODE_SIZE (mode);
22050 : }
22051 :
22052 : /* Return the cost of moving between two registers of mode MODE,
22053 : assuming that the move will be in pieces of at most UNITS bytes. */
22054 29863581 : return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
22055 : }
22056 :
22057 : /* Return cost of vector operation in MODE given that scalar version has
22058 : COST. */
22059 :
22060 : static int
22061 2895881329 : ix86_vec_cost (machine_mode mode, int cost)
22062 : {
22063 2895881329 : if (!VECTOR_MODE_P (mode))
22064 : return cost;
22065 :
22066 2895647164 : if (GET_MODE_BITSIZE (mode) == 128
22067 2895647164 : && TARGET_SSE_SPLIT_REGS)
22068 2861918 : return cost * GET_MODE_BITSIZE (mode) / 64;
22069 2894216205 : else if (GET_MODE_BITSIZE (mode) > 128
22070 2894216205 : && TARGET_AVX256_SPLIT_REGS)
22071 1674620 : return cost * GET_MODE_BITSIZE (mode) / 128;
22072 2893378895 : else if (GET_MODE_BITSIZE (mode) > 256
22073 2893378895 : && TARGET_AVX512_SPLIT_REGS)
22074 265000 : return cost * GET_MODE_BITSIZE (mode) / 256;
22075 : return cost;
22076 : }
22077 :
22078 : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
22079 : vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
22080 : static int
22081 1074 : ix86_widen_mult_cost (const struct processor_costs *cost,
22082 : enum machine_mode mode, bool uns_p)
22083 : {
22084 1074 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
22085 1074 : int extra_cost = 0;
22086 1074 : int basic_cost = 0;
22087 1074 : switch (mode)
22088 : {
22089 124 : case V8HImode:
22090 124 : case V16HImode:
22091 124 : if (!uns_p || mode == V16HImode)
22092 53 : extra_cost = cost->sse_op * 2;
22093 124 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22094 124 : break;
22095 203 : case V4SImode:
22096 203 : case V8SImode:
22097 : /* pmulhw/pmullw can be used. */
22098 203 : basic_cost = cost->mulss * 2 + cost->sse_op * 2;
22099 203 : break;
22100 679 : case V2DImode:
22101 : /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
22102 : require extra 4 mul, 4 add, 4 cmp and 2 shift. */
22103 679 : if (!TARGET_SSE4_1 && !uns_p)
22104 401 : extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
22105 401 : + cost->sse_op * 2;
22106 : /* Fallthru. */
22107 735 : case V4DImode:
22108 735 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22109 735 : break;
22110 : default:
22111 : /* Not implemented. */
22112 : return 100;
22113 : }
22114 1062 : return ix86_vec_cost (mode, basic_cost + extra_cost);
22115 : }
22116 :
22117 : /* Return cost of multiplication in MODE. */
22118 :
22119 : static int
22120 1235871490 : ix86_multiplication_cost (const struct processor_costs *cost,
22121 : enum machine_mode mode)
22122 : {
22123 1235871490 : machine_mode inner_mode = mode;
22124 1235871490 : if (VECTOR_MODE_P (mode))
22125 1234850538 : inner_mode = GET_MODE_INNER (mode);
22126 :
22127 1235871490 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22128 752472 : return inner_mode == DFmode ? cost->mulsd : cost->mulss;
22129 1235119018 : else if (X87_FLOAT_MODE_P (mode))
22130 162211 : return cost->fmul;
22131 1234956807 : else if (FLOAT_MODE_P (mode))
22132 230926 : return ix86_vec_cost (mode,
22133 230926 : inner_mode == DFmode ? cost->mulsd : cost->mulss);
22134 1234725881 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22135 : {
22136 1234643498 : int nmults, nops;
22137 : /* Cost of reading the memory. */
22138 1234643498 : int extra;
22139 :
22140 1234643498 : switch (mode)
22141 : {
22142 19442874 : case V4QImode:
22143 19442874 : case V8QImode:
22144 : /* Partial V*QImode is emulated with 4-6 insns. */
22145 19442874 : nmults = 1;
22146 19442874 : nops = 3;
22147 19442874 : extra = 0;
22148 :
22149 19442874 : if (TARGET_AVX512BW && TARGET_AVX512VL)
22150 : ;
22151 19333216 : else if (TARGET_AVX2)
22152 : nops += 2;
22153 18797356 : else if (TARGET_XOP)
22154 10040 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22155 : else
22156 : {
22157 18787316 : nops += 1;
22158 18787316 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22159 : }
22160 19442874 : goto do_qimode;
22161 :
22162 9721912 : case V16QImode:
22163 : /* V*QImode is emulated with 4-11 insns. */
22164 9721912 : nmults = 1;
22165 9721912 : nops = 3;
22166 9721912 : extra = 0;
22167 :
22168 9721912 : if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
22169 : {
22170 320723 : if (!(TARGET_AVX512BW && TARGET_AVX512VL))
22171 266180 : nops += 3;
22172 : }
22173 9401189 : else if (TARGET_XOP)
22174 : {
22175 5464 : nmults += 1;
22176 5464 : nops += 2;
22177 5464 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22178 : }
22179 : else
22180 : {
22181 9395725 : nmults += 1;
22182 9395725 : nops += 4;
22183 9395725 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22184 : }
22185 9721912 : goto do_qimode;
22186 :
22187 9720292 : case V32QImode:
22188 9720292 : nmults = 1;
22189 9720292 : nops = 3;
22190 9720292 : extra = 0;
22191 :
22192 9720292 : if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
22193 : {
22194 9632588 : nmults += 1;
22195 9632588 : nops += 4;
22196 : /* 2 loads, so no division by 2. */
22197 9632588 : extra += COSTS_N_INSNS (cost->sse_load[3]);
22198 : }
22199 9720292 : goto do_qimode;
22200 :
22201 9719813 : case V64QImode:
22202 9719813 : nmults = 2;
22203 9719813 : nops = 9;
22204 : /* 2 loads of each size, so no division by 2. */
22205 9719813 : extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
22206 :
22207 48604891 : do_qimode:
22208 48604891 : return ix86_vec_cost (mode, cost->mulss * nmults
22209 48604891 : + cost->sse_op * nops) + extra;
22210 :
22211 41568190 : case V4SImode:
22212 : /* pmulld is used in this case. No emulation is needed. */
22213 41568190 : if (TARGET_SSE4_1)
22214 2322413 : goto do_native;
22215 : /* V4SImode is emulated with 7 insns. */
22216 : else
22217 39245777 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
22218 :
22219 168010411 : case V2DImode:
22220 168010411 : case V4DImode:
22221 : /* vpmullq is used in this case. No emulation is needed. */
22222 168010411 : if (TARGET_AVX512DQ && TARGET_AVX512VL)
22223 593747 : goto do_native;
22224 : /* V*DImode is emulated with 6-8 insns. */
22225 167416664 : else if (TARGET_XOP && mode == V2DImode)
22226 55100 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
22227 : /* FALLTHRU */
22228 251306326 : case V8DImode:
22229 : /* vpmullq is used in this case. No emulation is needed. */
22230 251306326 : if (TARGET_AVX512DQ && mode == V8DImode)
22231 391110 : goto do_native;
22232 : else
22233 250915216 : return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
22234 :
22235 895822514 : default:
22236 895822514 : do_native:
22237 895822514 : return ix86_vec_cost (mode, cost->mulss);
22238 : }
22239 : }
22240 : else
22241 164758 : return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
22242 : }
22243 :
22244 : /* Return cost of multiplication in MODE. */
22245 :
22246 : static int
22247 74228603 : ix86_division_cost (const struct processor_costs *cost,
22248 : enum machine_mode mode)
22249 : {
22250 74228603 : machine_mode inner_mode = mode;
22251 74228603 : if (VECTOR_MODE_P (mode))
22252 54797957 : inner_mode = GET_MODE_INNER (mode);
22253 :
22254 74228603 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22255 248095 : return inner_mode == DFmode ? cost->divsd : cost->divss;
22256 73980508 : else if (X87_FLOAT_MODE_P (mode))
22257 44880 : return cost->fdiv;
22258 73935628 : else if (FLOAT_MODE_P (mode))
22259 17606 : return ix86_vec_cost (mode,
22260 17606 : inner_mode == DFmode ? cost->divsd : cost->divss);
22261 : else
22262 82450858 : return cost->divide[MODE_INDEX (mode)];
22263 : }
22264 :
22265 : /* Return cost of shift in MODE.
22266 : If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
22267 : AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
22268 : if op1 is a result of subreg.
22269 :
22270 : SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
22271 :
22272 : static int
22273 793148593 : ix86_shift_rotate_cost (const struct processor_costs *cost,
22274 : enum rtx_code code,
22275 : enum machine_mode mode, bool constant_op1,
22276 : HOST_WIDE_INT op1_val,
22277 : bool and_in_op1,
22278 : bool shift_and_truncate,
22279 : bool *skip_op0, bool *skip_op1)
22280 : {
22281 793148593 : if (skip_op0)
22282 793076245 : *skip_op0 = *skip_op1 = false;
22283 :
22284 793148593 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22285 : {
22286 407540913 : int count;
22287 : /* Cost of reading the memory. */
22288 407540913 : int extra;
22289 :
22290 407540913 : switch (mode)
22291 : {
22292 6186377 : case V4QImode:
22293 6186377 : case V8QImode:
22294 6186377 : if (TARGET_AVX2)
22295 : /* Use vpbroadcast. */
22296 205455 : extra = cost->sse_op;
22297 : else
22298 5980922 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22299 :
22300 6186377 : if (constant_op1)
22301 : {
22302 6186347 : if (code == ASHIFTRT)
22303 : {
22304 190 : count = 4;
22305 190 : extra *= 2;
22306 : }
22307 : else
22308 : count = 2;
22309 : }
22310 30 : else if (TARGET_AVX512BW && TARGET_AVX512VL)
22311 30 : return ix86_vec_cost (mode, cost->sse_op * 4);
22312 0 : else if (TARGET_SSE4_1)
22313 : count = 5;
22314 0 : else if (code == ASHIFTRT)
22315 : count = 6;
22316 : else
22317 0 : count = 5;
22318 6186347 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22319 :
22320 3096237 : case V16QImode:
22321 3096237 : if (TARGET_XOP)
22322 : {
22323 : /* For XOP we use vpshab, which requires a broadcast of the
22324 : value to the variable shift insn. For constants this
22325 : means a V16Q const in mem; even when we can perform the
22326 : shift with one insn set the cost to prefer paddb. */
22327 3573 : if (constant_op1)
22328 : {
22329 2614 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22330 2614 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22331 : }
22332 : else
22333 : {
22334 959 : count = (code == ASHIFT) ? 3 : 4;
22335 959 : return ix86_vec_cost (mode, cost->sse_op * count);
22336 : }
22337 : }
22338 : /* FALLTHRU */
22339 6185547 : case V32QImode:
22340 6185547 : if (TARGET_GFNI && constant_op1)
22341 : {
22342 : /* Use vgf2p8affine. One extra load for the mask, but in a loop
22343 : with enough registers it will be moved out. So for now don't
22344 : account the constant mask load. This is not quite right
22345 : for non loop vectorization. */
22346 11990 : extra = 0;
22347 11990 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22348 : }
22349 6173557 : if (TARGET_AVX2)
22350 : /* Use vpbroadcast. */
22351 198290 : extra = cost->sse_op;
22352 : else
22353 5975267 : extra = COSTS_N_INSNS (mode == V16QImode
22354 : ? cost->sse_load[2]
22355 5975267 : : cost->sse_load[3]) / 2;
22356 :
22357 6173557 : if (constant_op1)
22358 : {
22359 6173369 : if (code == ASHIFTRT)
22360 : {
22361 198 : count = 4;
22362 198 : extra *= 2;
22363 : }
22364 : else
22365 : count = 2;
22366 : }
22367 188 : else if (TARGET_AVX512BW
22368 76 : && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22369 38 : || (mode == V16QImode && TARGET_AVX512VL
22370 38 : && !TARGET_PREFER_AVX128)))
22371 76 : return ix86_vec_cost (mode, cost->sse_op * 4);
22372 112 : else if (TARGET_AVX2
22373 0 : && mode == V16QImode && !TARGET_PREFER_AVX128)
22374 : count = 6;
22375 112 : else if (TARGET_SSE4_1)
22376 : count = 9;
22377 112 : else if (code == ASHIFTRT)
22378 : count = 10;
22379 : else
22380 76 : count = 9;
22381 6173481 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22382 :
22383 3093132 : case V64QImode:
22384 : /* Ignore the mask load for GF2P8AFFINEQB. */
22385 3093132 : extra = 0;
22386 3093132 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22387 :
22388 55758041 : case V2DImode:
22389 55758041 : case V4DImode:
22390 : /* V*DImode arithmetic right shift is emulated. */
22391 55758041 : if (code == ASHIFTRT && !TARGET_AVX512VL)
22392 : {
22393 1387 : if (constant_op1)
22394 : {
22395 648 : if (op1_val == 63)
22396 438 : count = TARGET_SSE4_2 ? 1 : 2;
22397 509 : else if (TARGET_XOP)
22398 : count = 2;
22399 210 : else if (TARGET_SSE4_1)
22400 : count = 3;
22401 : else
22402 230 : count = 4;
22403 : }
22404 739 : else if (TARGET_XOP)
22405 : count = 3;
22406 74 : else if (TARGET_SSE4_2)
22407 : count = 4;
22408 : else
22409 1387 : count = 5;
22410 :
22411 1387 : return ix86_vec_cost (mode, cost->sse_op * count);
22412 : }
22413 : /* FALLTHRU */
22414 392070897 : default:
22415 392070897 : return ix86_vec_cost (mode, cost->sse_op);
22416 : }
22417 : }
22418 :
22419 779907676 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22420 : {
22421 197449456 : if (constant_op1)
22422 : {
22423 197414500 : if (op1_val > 32)
22424 140272857 : return cost->shift_const + COSTS_N_INSNS (2);
22425 : else
22426 57141643 : return cost->shift_const * 2;
22427 : }
22428 : else
22429 : {
22430 34956 : if (and_in_op1)
22431 63 : return cost->shift_var * 2;
22432 : else
22433 34893 : return cost->shift_var * 6 + COSTS_N_INSNS (2);
22434 : }
22435 : }
22436 : else
22437 : {
22438 188158224 : if (constant_op1)
22439 187420514 : return cost->shift_const;
22440 737710 : else if (shift_and_truncate)
22441 : {
22442 22906 : if (skip_op0)
22443 22906 : *skip_op0 = *skip_op1 = true;
22444 : /* Return the cost after shift-and truncation. */
22445 22906 : return cost->shift_var;
22446 : }
22447 : else
22448 714804 : return cost->shift_var;
22449 : }
22450 : }
22451 :
22452 : static int
22453 146260970 : ix86_insn_cost (rtx_insn *insn, bool speed)
22454 : {
22455 146260970 : int insn_cost = 0;
22456 : /* Add extra cost to avoid post_reload late_combine revert
22457 : the optimization did in pass_rpad. */
22458 146260970 : if (reload_completed
22459 4547960 : && ix86_rpad_gate ()
22460 253318 : && recog_memoized (insn) >= 0
22461 146514026 : && get_attr_avx_partial_xmm_update (insn)
22462 : == AVX_PARTIAL_XMM_UPDATE_TRUE)
22463 : insn_cost += COSTS_N_INSNS (3);
22464 :
22465 146260970 : rtx pat = PATTERN (insn);
22466 : /* A USE of a memory is more expensive than a use of a REG.
22467 : For example *<absneg>mode2_1's use of a signbit mask. */
22468 146260970 : if (GET_CODE (pat) == PARALLEL)
22469 : {
22470 46444687 : for (int i = 0; i < XVECLEN (pat, 0); i++)
22471 : {
22472 31192467 : rtx x = XVECEXP (pat, 0, i);
22473 31192467 : if (GET_CODE (x) == USE && MEM_P (XEXP (x, 0)))
22474 57173 : insn_cost += !speed ? COSTS_N_BYTES (4)
22475 26911 : : TARGET_64BIT ? COSTS_N_INSNS (1) + 1
22476 : : COSTS_N_INSNS (3) + 1;
22477 : }
22478 : }
22479 :
22480 146260970 : return insn_cost + pattern_cost (pat, speed);
22481 : }
22482 :
22483 : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22484 :
22485 : static int
22486 757435 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22487 : {
22488 757435 : if (size < 128)
22489 752298 : return cost->cvtss2sd;
22490 5137 : else if (size < 256)
22491 : {
22492 2352 : if (TARGET_SSE_SPLIT_REGS)
22493 0 : return cost->cvtss2sd * size / 64;
22494 2352 : return cost->cvtss2sd;
22495 : }
22496 2785 : if (size < 512)
22497 1483 : return cost->vcvtps2pd256;
22498 : else
22499 1302 : return cost->vcvtps2pd512;
22500 : }
22501 :
22502 : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22503 :
22504 : static bool
22505 272908 : unspec_pcmp_p (rtx x)
22506 : {
22507 272908 : return GET_CODE (x) == UNSPEC
22508 272908 : && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22509 : }
22510 :
22511 : /* Compute a (partial) cost for rtx X. Return true if the complete
22512 : cost has been computed, and false if subexpressions should be
22513 : scanned. In either case, *TOTAL contains the cost result. */
22514 :
22515 : static bool
22516 7858874154 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22517 : int *total, bool speed)
22518 : {
22519 7858874154 : rtx mask;
22520 7858874154 : enum rtx_code code = GET_CODE (x);
22521 7858874154 : enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22522 4200151359 : const struct processor_costs *cost
22523 7858874154 : = speed ? ix86_tune_cost : &ix86_size_cost;
22524 7858874154 : int src_cost;
22525 :
22526 : /* Handling different vternlog variants. */
22527 7858874154 : if ((GET_MODE_SIZE (mode) == 64
22528 7858874154 : ? TARGET_AVX512F
22529 6647169655 : : (TARGET_AVX512VL
22530 6584577895 : || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22531 186350006 : && GET_MODE_SIZE (mode) >= 16
22532 126846639 : && outer_code_i == SET
22533 7907813036 : && ternlog_operand (x, mode))
22534 : {
22535 33644 : rtx args[3];
22536 :
22537 33644 : args[0] = NULL_RTX;
22538 33644 : args[1] = NULL_RTX;
22539 33644 : args[2] = NULL_RTX;
22540 33644 : int idx = ix86_ternlog_idx (x, args);
22541 33644 : gcc_assert (idx >= 0);
22542 :
22543 33644 : *total = cost->sse_op;
22544 134576 : for (int i = 0; i != 3; i++)
22545 100932 : if (args[i])
22546 71162 : *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22547 33644 : return true;
22548 : }
22549 :
22550 :
22551 7858840510 : switch (code)
22552 : {
22553 48281420 : case SET:
22554 48281420 : if (register_operand (SET_DEST (x), VOIDmode)
22555 48281420 : && register_operand (SET_SRC (x), VOIDmode))
22556 : {
22557 29863581 : *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22558 29863581 : return true;
22559 : }
22560 :
22561 18417839 : if (register_operand (SET_SRC (x), VOIDmode))
22562 : /* Avoid potentially incorrect high cost from rtx_costs
22563 : for non-tieable SUBREGs. */
22564 : src_cost = 0;
22565 : else
22566 : {
22567 15610730 : src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22568 :
22569 15610730 : if (CONSTANT_P (SET_SRC (x)))
22570 : /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22571 : a small value, possibly zero for cheap constants. */
22572 6978898 : src_cost += COSTS_N_INSNS (1);
22573 : }
22574 :
22575 18417839 : *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22576 18417839 : return true;
22577 :
22578 2893220731 : case CONST_INT:
22579 2893220731 : case CONST:
22580 2893220731 : case LABEL_REF:
22581 2893220731 : case SYMBOL_REF:
22582 2893220731 : if (x86_64_immediate_operand (x, VOIDmode))
22583 2273630377 : *total = 0;
22584 619590354 : else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22585 : /* Consider the zext constants slightly more expensive, as they
22586 : can't appear in most instructions. */
22587 28688549 : *total = 1;
22588 : else
22589 : /* movabsq is slightly more expensive than a simple instruction. */
22590 590901805 : *total = COSTS_N_INSNS (1) + 1;
22591 : return true;
22592 :
22593 7534402 : case CONST_DOUBLE:
22594 7534402 : if (IS_STACK_MODE (mode))
22595 1300462 : switch (standard_80387_constant_p (x))
22596 : {
22597 : case -1:
22598 : case 0:
22599 : break;
22600 279974 : case 1: /* 0.0 */
22601 279974 : *total = 1;
22602 279974 : return true;
22603 485555 : default: /* Other constants */
22604 485555 : *total = 2;
22605 485555 : return true;
22606 : }
22607 : /* FALLTHRU */
22608 :
22609 14508842 : case CONST_VECTOR:
22610 14508842 : switch (standard_sse_constant_p (x, mode))
22611 : {
22612 : case 0:
22613 : break;
22614 4206217 : case 1: /* 0: xor eliminates false dependency */
22615 4206217 : *total = 0;
22616 4206217 : return true;
22617 192833 : default: /* -1: cmp contains false dependency */
22618 192833 : *total = 1;
22619 192833 : return true;
22620 : }
22621 : /* FALLTHRU */
22622 :
22623 11106217 : case CONST_WIDE_INT:
22624 : /* Fall back to (MEM (SYMBOL_REF)), since that's where
22625 : it'll probably end up. Add a penalty for size. */
22626 22212434 : *total = (COSTS_N_INSNS (1)
22627 21986408 : + (!TARGET_64BIT && flag_pic)
22628 22212434 : + (GET_MODE_SIZE (mode) <= 4
22629 19441198 : ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22630 11106217 : return true;
22631 :
22632 22864899 : case ZERO_EXTEND:
22633 : /* The zero extensions is often completely free on x86_64, so make
22634 : it as cheap as possible. */
22635 22864899 : if (TARGET_64BIT && mode == DImode
22636 4886760 : && GET_MODE (XEXP (x, 0)) == SImode)
22637 2969411 : *total = 1;
22638 19895488 : else if (TARGET_ZERO_EXTEND_WITH_AND)
22639 0 : *total = cost->add;
22640 : else
22641 19895488 : *total = cost->movzx;
22642 : return false;
22643 :
22644 2714271 : case SIGN_EXTEND:
22645 2714271 : *total = cost->movsx;
22646 2714271 : return false;
22647 :
22648 652569832 : case ASHIFT:
22649 652569832 : if (SCALAR_INT_MODE_P (mode)
22650 251964801 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22651 696679927 : && CONST_INT_P (XEXP (x, 1)))
22652 : {
22653 43931421 : HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22654 43931421 : if (value == 1)
22655 : {
22656 2528988 : *total = cost->add;
22657 2528988 : return false;
22658 : }
22659 41402433 : if ((value == 2 || value == 3)
22660 4650050 : && cost->lea <= cost->shift_const)
22661 : {
22662 2197374 : *total = cost->lea;
22663 2197374 : return false;
22664 : }
22665 : }
22666 : /* FALLTHRU */
22667 :
22668 793076245 : case ROTATE:
22669 793076245 : case ASHIFTRT:
22670 793076245 : case LSHIFTRT:
22671 793076245 : case ROTATERT:
22672 793076245 : bool skip_op0, skip_op1;
22673 793076245 : *total = ix86_shift_rotate_cost (cost, code, mode,
22674 793076245 : CONSTANT_P (XEXP (x, 1)),
22675 : CONST_INT_P (XEXP (x, 1))
22676 : ? INTVAL (XEXP (x, 1)) : -1,
22677 : GET_CODE (XEXP (x, 1)) == AND,
22678 793076245 : SUBREG_P (XEXP (x, 1))
22679 793076245 : && GET_CODE (XEXP (XEXP (x, 1),
22680 : 0)) == AND,
22681 : &skip_op0, &skip_op1);
22682 793076245 : if (skip_op0 || skip_op1)
22683 : {
22684 22906 : if (!skip_op0)
22685 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22686 22906 : if (!skip_op1)
22687 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22688 22906 : return true;
22689 : }
22690 : return false;
22691 :
22692 232120 : case FMA:
22693 232120 : {
22694 232120 : rtx sub;
22695 :
22696 232120 : gcc_assert (FLOAT_MODE_P (mode));
22697 232120 : gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22698 :
22699 464240 : *total = ix86_vec_cost (mode,
22700 232120 : GET_MODE_INNER (mode) == SFmode
22701 : ? cost->fmass : cost->fmasd);
22702 232120 : *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22703 :
22704 : /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22705 232120 : sub = XEXP (x, 0);
22706 232120 : if (GET_CODE (sub) == NEG)
22707 51516 : sub = XEXP (sub, 0);
22708 232120 : *total += rtx_cost (sub, mode, FMA, 0, speed);
22709 :
22710 232120 : sub = XEXP (x, 2);
22711 232120 : if (GET_CODE (sub) == NEG)
22712 40543 : sub = XEXP (sub, 0);
22713 232120 : *total += rtx_cost (sub, mode, FMA, 2, speed);
22714 232120 : return true;
22715 : }
22716 :
22717 1800223609 : case MULT:
22718 1800223609 : if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22719 : {
22720 564604377 : rtx op0 = XEXP (x, 0);
22721 564604377 : rtx op1 = XEXP (x, 1);
22722 564604377 : int nbits;
22723 564604377 : if (CONST_INT_P (XEXP (x, 1)))
22724 : {
22725 545862554 : unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22726 1107512381 : for (nbits = 0; value != 0; value &= value - 1)
22727 561649827 : nbits++;
22728 : }
22729 : else
22730 : /* This is arbitrary. */
22731 : nbits = 7;
22732 :
22733 : /* Compute costs correctly for widening multiplication. */
22734 564604377 : if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22735 570260615 : && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22736 5656238 : == GET_MODE_SIZE (mode))
22737 : {
22738 5642313 : int is_mulwiden = 0;
22739 5642313 : machine_mode inner_mode = GET_MODE (op0);
22740 :
22741 5642313 : if (GET_CODE (op0) == GET_CODE (op1))
22742 5541712 : is_mulwiden = 1, op1 = XEXP (op1, 0);
22743 100601 : else if (CONST_INT_P (op1))
22744 : {
22745 90620 : if (GET_CODE (op0) == SIGN_EXTEND)
22746 40529 : is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22747 40529 : == INTVAL (op1);
22748 : else
22749 50091 : is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22750 : }
22751 :
22752 5632332 : if (is_mulwiden)
22753 5632332 : op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22754 : }
22755 :
22756 564604377 : int mult_init;
22757 : // Double word multiplication requires 3 mults and 2 adds.
22758 1144980966 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22759 : {
22760 339732672 : mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22761 339732672 : + 2 * cost->add;
22762 339732672 : nbits *= 3;
22763 : }
22764 387892810 : else mult_init = cost->mult_init[MODE_INDEX (mode)];
22765 :
22766 1129208754 : *total = (mult_init
22767 564604377 : + nbits * cost->mult_bit
22768 564604377 : + rtx_cost (op0, mode, outer_code, opno, speed)
22769 564604377 : + rtx_cost (op1, mode, outer_code, opno, speed));
22770 :
22771 564604377 : return true;
22772 : }
22773 1235619232 : *total = ix86_multiplication_cost (cost, mode);
22774 1235619232 : return false;
22775 :
22776 74214512 : case DIV:
22777 74214512 : case UDIV:
22778 74214512 : case MOD:
22779 74214512 : case UMOD:
22780 74214512 : *total = ix86_division_cost (cost, mode);
22781 74214512 : return false;
22782 :
22783 702596783 : case PLUS:
22784 702596783 : if (GET_MODE_CLASS (mode) == MODE_INT
22785 961106604 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22786 : {
22787 142965899 : if (GET_CODE (XEXP (x, 0)) == PLUS
22788 3696317 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22789 832882 : && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22790 832857 : && CONSTANT_P (XEXP (x, 1)))
22791 : {
22792 832800 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22793 832800 : if (val == 2 || val == 4 || val == 8)
22794 : {
22795 832696 : *total = cost->lea;
22796 832696 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22797 : outer_code, opno, speed);
22798 832696 : *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22799 : outer_code, opno, speed);
22800 832696 : *total += rtx_cost (XEXP (x, 1), mode,
22801 : outer_code, opno, speed);
22802 832696 : return true;
22803 : }
22804 : }
22805 142133099 : else if (GET_CODE (XEXP (x, 0)) == MULT
22806 53559212 : && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22807 : {
22808 53497875 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22809 53497875 : if (val == 2 || val == 4 || val == 8)
22810 : {
22811 8129942 : *total = cost->lea;
22812 8129942 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22813 : outer_code, opno, speed);
22814 8129942 : *total += rtx_cost (XEXP (x, 1), mode,
22815 : outer_code, opno, speed);
22816 8129942 : return true;
22817 : }
22818 : }
22819 88635224 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
22820 : {
22821 2863517 : rtx op = XEXP (XEXP (x, 0), 0);
22822 :
22823 : /* Add with carry, ignore the cost of adding a carry flag. */
22824 2863517 : if (ix86_carry_flag_operator (op, mode)
22825 2863517 : || ix86_carry_flag_unset_operator (op, mode))
22826 70510 : *total = cost->add;
22827 : else
22828 : {
22829 2793007 : *total = cost->lea;
22830 2793007 : *total += rtx_cost (op, mode,
22831 : outer_code, opno, speed);
22832 : }
22833 :
22834 2863517 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22835 : outer_code, opno, speed);
22836 2863517 : *total += rtx_cost (XEXP (x, 1), mode,
22837 : outer_code, opno, speed);
22838 2863517 : return true;
22839 : }
22840 : }
22841 : /* FALLTHRU */
22842 :
22843 1876578748 : case MINUS:
22844 : /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22845 1876578748 : if (GET_MODE_CLASS (mode) == MODE_INT
22846 527896868 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22847 237520167 : && GET_CODE (XEXP (x, 0)) == MINUS
22848 1876619183 : && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22849 15506 : || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22850 : {
22851 24929 : *total = cost->add;
22852 24929 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22853 : outer_code, opno, speed);
22854 24929 : *total += rtx_cost (XEXP (x, 1), mode,
22855 : outer_code, opno, speed);
22856 24929 : return true;
22857 : }
22858 :
22859 1876553819 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22860 2411533 : *total = cost->addss;
22861 1874142286 : else if (X87_FLOAT_MODE_P (mode))
22862 220127 : *total = cost->fadd;
22863 1873922159 : else if (FLOAT_MODE_P (mode))
22864 448648 : *total = ix86_vec_cost (mode, cost->addss);
22865 1873473511 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22866 1235437463 : *total = ix86_vec_cost (mode, cost->sse_op);
22867 1315437049 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22868 338690210 : *total = cost->add * 2;
22869 : else
22870 299345838 : *total = cost->add;
22871 : return false;
22872 :
22873 3940081 : case IOR:
22874 3940081 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22875 3693395 : || SSE_FLOAT_MODE_P (mode))
22876 : {
22877 : /* (ior (not ...) ...) can be a single insn in AVX512. */
22878 480 : if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22879 256277 : && (GET_MODE_SIZE (mode) == 64
22880 0 : || (TARGET_AVX512VL
22881 0 : && (GET_MODE_SIZE (mode) == 32
22882 0 : || GET_MODE_SIZE (mode) == 16))))
22883 : {
22884 0 : rtx right = GET_CODE (XEXP (x, 1)) != NOT
22885 0 : ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22886 :
22887 0 : *total = ix86_vec_cost (mode, cost->sse_op)
22888 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22889 : outer_code, opno, speed)
22890 0 : + rtx_cost (right, mode, outer_code, opno, speed);
22891 0 : return true;
22892 : }
22893 256277 : *total = ix86_vec_cost (mode, cost->sse_op);
22894 256277 : }
22895 3683804 : else if (TARGET_64BIT
22896 3391663 : && mode == TImode
22897 1690196 : && GET_CODE (XEXP (x, 0)) == ASHIFT
22898 252796 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22899 250800 : && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22900 250800 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22901 250800 : && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22902 250800 : && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22903 228698 : && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22904 : {
22905 : /* *concatditi3 is cheap. */
22906 228698 : rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22907 228698 : rtx op1 = XEXP (XEXP (x, 1), 0);
22908 1386 : *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22909 228698 : ? COSTS_N_INSNS (1) /* movq. */
22910 227312 : : set_src_cost (op0, DImode, speed);
22911 2348 : *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22912 228698 : ? COSTS_N_INSNS (1) /* movq. */
22913 226363 : : set_src_cost (op1, DImode, speed);
22914 228698 : return true;
22915 : }
22916 3455106 : else if (TARGET_64BIT
22917 3162965 : && mode == TImode
22918 1461498 : && GET_CODE (XEXP (x, 0)) == AND
22919 1401561 : && REG_P (XEXP (XEXP (x, 0), 0))
22920 1396419 : && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22921 1393709 : && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22922 1393709 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22923 909659 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22924 909659 : && GET_CODE (XEXP (x, 1)) == ASHIFT
22925 907497 : && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22926 907497 : && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22927 907497 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22928 4362603 : && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22929 : {
22930 : /* *insvti_highpart is cheap. */
22931 907497 : rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22932 907497 : *total = COSTS_N_INSNS (1) + 1;
22933 1389 : *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22934 907497 : ? COSTS_N_INSNS (1) /* movq. */
22935 906594 : : set_src_cost (op, DImode, speed);
22936 907497 : return true;
22937 : }
22938 5387359 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22939 747323 : *total = cost->add * 2;
22940 : else
22941 1800286 : *total = cost->add;
22942 : return false;
22943 :
22944 569361 : case XOR:
22945 569361 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22946 436106 : || SSE_FLOAT_MODE_P (mode))
22947 133255 : *total = ix86_vec_cost (mode, cost->sse_op);
22948 931750 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22949 16513 : *total = cost->add * 2;
22950 : else
22951 419593 : *total = cost->add;
22952 : return false;
22953 :
22954 6947691 : case AND:
22955 6947691 : if (address_no_seg_operand (x, mode))
22956 : {
22957 15792 : *total = cost->lea;
22958 15792 : return true;
22959 : }
22960 6931899 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22961 6526443 : || SSE_FLOAT_MODE_P (mode))
22962 : {
22963 : /* pandn is a single instruction. */
22964 438937 : if (GET_CODE (XEXP (x, 0)) == NOT)
22965 : {
22966 56519 : rtx right = XEXP (x, 1);
22967 :
22968 : /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22969 362 : if (GET_CODE (right) == NOT && TARGET_AVX512F
22970 56519 : && (GET_MODE_SIZE (mode) == 64
22971 0 : || (TARGET_AVX512VL
22972 0 : && (GET_MODE_SIZE (mode) == 32
22973 0 : || GET_MODE_SIZE (mode) == 16))))
22974 0 : right = XEXP (right, 0);
22975 :
22976 56519 : *total = ix86_vec_cost (mode, cost->sse_op)
22977 56519 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22978 : outer_code, opno, speed)
22979 56519 : + rtx_cost (right, mode, outer_code, opno, speed);
22980 56519 : return true;
22981 : }
22982 382418 : else if (GET_CODE (XEXP (x, 1)) == NOT)
22983 : {
22984 740 : *total = ix86_vec_cost (mode, cost->sse_op)
22985 740 : + rtx_cost (XEXP (x, 0), mode,
22986 : outer_code, opno, speed)
22987 740 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22988 : outer_code, opno, speed);
22989 740 : return true;
22990 : }
22991 381678 : *total = ix86_vec_cost (mode, cost->sse_op);
22992 381678 : }
22993 13685615 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22994 : {
22995 1134095 : if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22996 : {
22997 1670 : *total = cost->add * 2
22998 835 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22999 : outer_code, opno, speed)
23000 835 : + rtx_cost (XEXP (x, 1), mode,
23001 : outer_code, opno, speed);
23002 835 : return true;
23003 : }
23004 1133260 : else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
23005 : {
23006 0 : *total = cost->add * 2
23007 0 : + rtx_cost (XEXP (x, 0), mode,
23008 : outer_code, opno, speed)
23009 0 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
23010 : outer_code, opno, speed);
23011 0 : return true;
23012 : }
23013 1133260 : *total = cost->add * 2;
23014 : }
23015 5358867 : else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
23016 : {
23017 7578 : *total = cost->add
23018 3789 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23019 : outer_code, opno, speed)
23020 3789 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23021 3789 : return true;
23022 : }
23023 5355078 : else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
23024 : {
23025 112 : *total = cost->add
23026 56 : + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23027 56 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
23028 : outer_code, opno, speed);
23029 56 : return true;
23030 : }
23031 : else
23032 5355022 : *total = cost->add;
23033 : return false;
23034 :
23035 518018 : case NOT:
23036 518018 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23037 : {
23038 : /* (not (xor ...)) can be a single insn in AVX512. */
23039 0 : if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
23040 10968 : && (GET_MODE_SIZE (mode) == 64
23041 0 : || (TARGET_AVX512VL
23042 0 : && (GET_MODE_SIZE (mode) == 32
23043 0 : || GET_MODE_SIZE (mode) == 16))))
23044 : {
23045 0 : *total = ix86_vec_cost (mode, cost->sse_op)
23046 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23047 : outer_code, opno, speed)
23048 0 : + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
23049 : outer_code, opno, speed);
23050 0 : return true;
23051 : }
23052 :
23053 : // vnot is pxor -1.
23054 10968 : *total = ix86_vec_cost (mode, cost->sse_op) + 1;
23055 : }
23056 1160013 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23057 45968 : *total = cost->add * 2;
23058 : else
23059 461082 : *total = cost->add;
23060 : return false;
23061 :
23062 18618208 : case NEG:
23063 18618208 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23064 52068 : *total = cost->sse_op;
23065 18566140 : else if (X87_FLOAT_MODE_P (mode))
23066 15087 : *total = cost->fchs;
23067 18551053 : else if (FLOAT_MODE_P (mode))
23068 27070 : *total = ix86_vec_cost (mode, cost->sse_op);
23069 18523983 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23070 13708129 : *total = ix86_vec_cost (mode, cost->sse_op);
23071 9783093 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23072 1804335 : *total = cost->add * 3;
23073 : else
23074 3011519 : *total = cost->add;
23075 : return false;
23076 :
23077 53381026 : case COMPARE:
23078 53381026 : rtx op0, op1;
23079 53381026 : op0 = XEXP (x, 0);
23080 53381026 : op1 = XEXP (x, 1);
23081 53381026 : if (GET_CODE (op0) == ZERO_EXTRACT
23082 163826 : && XEXP (op0, 1) == const1_rtx
23083 146630 : && CONST_INT_P (XEXP (op0, 2))
23084 146594 : && op1 == const0_rtx)
23085 : {
23086 : /* This kind of construct is implemented using test[bwl].
23087 : Treat it as if we had an AND. */
23088 146594 : mode = GET_MODE (XEXP (op0, 0));
23089 293188 : *total = (cost->add
23090 146594 : + rtx_cost (XEXP (op0, 0), mode, outer_code,
23091 : opno, speed)
23092 146594 : + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
23093 146594 : return true;
23094 : }
23095 :
23096 53234432 : if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
23097 : {
23098 : /* This is an overflow detection, count it as a normal compare. */
23099 143477 : *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
23100 143477 : return true;
23101 : }
23102 :
23103 53090955 : rtx geu;
23104 : /* Match x
23105 : (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
23106 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
23107 53090955 : if (mode == CCCmode
23108 291442 : && GET_CODE (op0) == NEG
23109 7878 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
23110 7875 : && REG_P (XEXP (geu, 0))
23111 7875 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
23112 759 : || GET_MODE (XEXP (geu, 0)) == CCmode)
23113 7875 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
23114 7875 : && XEXP (geu, 1) == const0_rtx
23115 7875 : && GET_CODE (op1) == LTU
23116 7875 : && REG_P (XEXP (op1, 0))
23117 7875 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
23118 7875 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23119 53098830 : && XEXP (op1, 1) == const0_rtx)
23120 : {
23121 : /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
23122 7875 : *total = 0;
23123 7875 : return true;
23124 : }
23125 : /* Match x
23126 : (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
23127 : (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
23128 53083080 : if (mode == CCCmode
23129 283567 : && GET_CODE (op0) == NEG
23130 3 : && GET_CODE (XEXP (op0, 0)) == LTU
23131 3 : && REG_P (XEXP (XEXP (op0, 0), 0))
23132 3 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
23133 3 : && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
23134 3 : && XEXP (XEXP (op0, 0), 1) == const0_rtx
23135 3 : && GET_CODE (op1) == GEU
23136 3 : && REG_P (XEXP (op1, 0))
23137 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode
23138 3 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23139 53083083 : && XEXP (op1, 1) == const0_rtx)
23140 : {
23141 : /* This is *x86_cmc. */
23142 3 : if (!speed)
23143 0 : *total = COSTS_N_BYTES (1);
23144 3 : else if (TARGET_SLOW_STC)
23145 0 : *total = COSTS_N_INSNS (2);
23146 : else
23147 3 : *total = COSTS_N_INSNS (1);
23148 3 : return true;
23149 : }
23150 :
23151 53083077 : if (SCALAR_INT_MODE_P (GET_MODE (op0))
23152 110625641 : && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
23153 : {
23154 756338 : if (op1 == const0_rtx)
23155 217984 : *total = cost->add
23156 108992 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
23157 : else
23158 1294692 : *total = 3*cost->add
23159 647346 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
23160 647346 : + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
23161 756338 : return true;
23162 : }
23163 :
23164 : /* The embedded comparison operand is completely free. */
23165 52326739 : if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
23166 372337 : *total = 0;
23167 :
23168 : return false;
23169 :
23170 1369760 : case FLOAT_EXTEND:
23171 : /* x87 represents all values extended to 80bit. */
23172 1369760 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23173 664905 : *total = 0;
23174 : else
23175 1409710 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23176 : return false;
23177 :
23178 83682 : case FLOAT_TRUNCATE:
23179 83682 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23180 58083 : *total = cost->fadd;
23181 : else
23182 51198 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23183 : return false;
23184 683164 : case FLOAT:
23185 683164 : case UNSIGNED_FLOAT:
23186 683164 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23187 : /* TODO: We do not have cost tables for x87. */
23188 93902 : *total = cost->fadd;
23189 589262 : else if (VECTOR_MODE_P (mode))
23190 0 : *total = ix86_vec_cost (mode, cost->cvtpi2ps);
23191 : else
23192 589262 : *total = cost->cvtsi2ss;
23193 : return false;
23194 :
23195 284974 : case FIX:
23196 284974 : case UNSIGNED_FIX:
23197 284974 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23198 : /* TODO: We do not have cost tables for x87. */
23199 284974 : *total = cost->fadd;
23200 0 : else if (VECTOR_MODE_P (mode))
23201 0 : *total = ix86_vec_cost (mode, cost->cvtps2pi);
23202 : else
23203 0 : *total = cost->cvtss2si;
23204 : return false;
23205 :
23206 387534 : case ABS:
23207 : /* SSE requires memory load for the constant operand. It may make
23208 : sense to account for this. Of course the constant operand may or
23209 : may not be reused. */
23210 387534 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23211 280477 : *total = cost->sse_op;
23212 107057 : else if (X87_FLOAT_MODE_P (mode))
23213 31493 : *total = cost->fabs;
23214 75564 : else if (FLOAT_MODE_P (mode))
23215 25869 : *total = ix86_vec_cost (mode, cost->sse_op);
23216 49695 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23217 6354 : *total = cost->sse_op;
23218 : return false;
23219 :
23220 28683 : case SQRT:
23221 28683 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23222 18326 : *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
23223 10357 : else if (X87_FLOAT_MODE_P (mode))
23224 4315 : *total = cost->fsqrt;
23225 6042 : else if (FLOAT_MODE_P (mode))
23226 6042 : *total = ix86_vec_cost (mode,
23227 : mode == SFmode ? cost->sqrtss : cost->sqrtsd);
23228 : return false;
23229 :
23230 3948714 : case UNSPEC:
23231 3948714 : switch (XINT (x, 1))
23232 : {
23233 126118 : case UNSPEC_TP:
23234 126118 : *total = 0;
23235 126118 : break;
23236 :
23237 5210 : case UNSPEC_VTERNLOG:
23238 5210 : *total = cost->sse_op;
23239 5210 : if (!REG_P (XVECEXP (x, 0, 0)))
23240 720 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23241 5210 : if (!REG_P (XVECEXP (x, 0, 1)))
23242 694 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23243 5210 : if (!REG_P (XVECEXP (x, 0, 2)))
23244 733 : *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
23245 : return true;
23246 :
23247 95241 : case UNSPEC_PTEST:
23248 95241 : {
23249 95241 : *total = cost->sse_op;
23250 95241 : rtx test_op0 = XVECEXP (x, 0, 0);
23251 95241 : if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
23252 : return false;
23253 94593 : if (GET_CODE (test_op0) == AND)
23254 : {
23255 23 : rtx and_op0 = XEXP (test_op0, 0);
23256 23 : if (GET_CODE (and_op0) == NOT)
23257 0 : and_op0 = XEXP (and_op0, 0);
23258 23 : *total += rtx_cost (and_op0, GET_MODE (and_op0),
23259 : AND, 0, speed)
23260 23 : + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
23261 : AND, 1, speed);
23262 : }
23263 : else
23264 94570 : *total = rtx_cost (test_op0, GET_MODE (test_op0),
23265 : UNSPEC, 0, speed);
23266 : }
23267 : return true;
23268 :
23269 20846 : case UNSPEC_BLENDV:
23270 20846 : *total = cost->sse_op;
23271 20846 : if (!REG_P (XVECEXP (x, 0, 0)))
23272 8683 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23273 20846 : if (!REG_P (XVECEXP (x, 0, 1)))
23274 10262 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23275 20846 : if (!REG_P (XVECEXP (x, 0, 2)))
23276 : {
23277 12984 : rtx cond = XVECEXP (x, 0, 2);
23278 12984 : if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
23279 773 : && CONST_VECTOR_P (XEXP (cond, 1)))
23280 : {
23281 : /* avx2_blendvpd256_gt and friends. */
23282 153 : if (!REG_P (XEXP (cond, 0)))
23283 70 : *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
23284 : }
23285 : else
23286 12831 : *total += rtx_cost (cond, mode, code, 2, speed);
23287 : }
23288 : return true;
23289 :
23290 28353 : case UNSPEC_MOVMSK:
23291 28353 : *total = cost->sse_op;
23292 28353 : return true;
23293 :
23294 : default:
23295 : break;
23296 : }
23297 : return false;
23298 :
23299 2020951 : case VEC_CONCAT:
23300 : /* ??? Assume all of these vector manipulation patterns are
23301 : recognizable. In which case they all pretty much have the
23302 : same cost.
23303 : ??? We should still recruse when computing cost. */
23304 2020951 : *total = cost->sse_op;
23305 2020951 : return true;
23306 :
23307 2449813 : case VEC_SELECT:
23308 : /* Special case extracting lower part from the vector.
23309 : This by itself needs to code and most of SSE/AVX instructions have
23310 : packed and single forms where the single form may be represented
23311 : by such VEC_SELECT.
23312 :
23313 : Use cost 1 (despite the fact that functionally equivalent SUBREG has
23314 : cost 0). Making VEC_SELECT completely free, for example instructs CSE
23315 : to forward propagate VEC_SELECT into
23316 :
23317 : (set (reg eax) (reg src))
23318 :
23319 : which then prevents fwprop and combining. See i.e.
23320 : gcc.target/i386/pr91103-1.c.
23321 :
23322 : ??? rtvec_series_p test should be, for valid patterns, equivalent to
23323 : vec_series_lowpart_p but is not, since the latter calls
23324 : can_cange_mode_class on ALL_REGS and this return false since x87 does
23325 : not support subregs at all. */
23326 2449813 : if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
23327 762248 : *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
23328 762248 : outer_code, opno, speed) + 1;
23329 : else
23330 : /* ??? We should still recruse when computing cost. */
23331 1687565 : *total = cost->sse_op;
23332 : return true;
23333 :
23334 1224343 : case VEC_DUPLICATE:
23335 2448686 : *total = rtx_cost (XEXP (x, 0),
23336 1224343 : GET_MODE (XEXP (x, 0)),
23337 : VEC_DUPLICATE, 0, speed);
23338 : /* It's broadcast instruction, not embedded broadcasting. */
23339 1224343 : if (outer_code == SET)
23340 1176056 : *total += cost->sse_op;
23341 :
23342 : return true;
23343 :
23344 725714 : case VEC_MERGE:
23345 725714 : mask = XEXP (x, 2);
23346 : /* Scalar versions of SSE instructions may be represented as:
23347 :
23348 : (vec_merge (vec_duplicate (operation ....))
23349 : (register or memory)
23350 : (const_int 1))
23351 :
23352 : In this case vec_merge and vec_duplicate is for free.
23353 : Just recurse into operation and second operand. */
23354 725714 : if (mask == const1_rtx
23355 214994 : && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
23356 : {
23357 75554 : *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23358 : outer_code, opno, speed)
23359 75554 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23360 75554 : return true;
23361 : }
23362 : /* This is masked instruction, assume the same cost,
23363 : as nonmasked variant. */
23364 650160 : else if (TARGET_AVX512F
23365 650160 : && (register_operand (mask, GET_MODE (mask))
23366 : /* Redunduant clean up of high bits for kmask with VL=2/4
23367 : .i.e (vec_merge op0, op1, (and op3 15)). */
23368 121973 : || (GET_CODE (mask) == AND
23369 372 : && register_operand (XEXP (mask, 0), GET_MODE (mask))
23370 372 : && CONST_INT_P (XEXP (mask, 1))
23371 372 : && ((INTVAL (XEXP (mask, 1)) == 3
23372 131 : && GET_MODE_NUNITS (mode) == 2)
23373 241 : || (INTVAL (XEXP (mask, 1)) == 15
23374 241 : && GET_MODE_NUNITS (mode) == 4)))))
23375 : {
23376 375002 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23377 375002 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23378 375002 : return true;
23379 : }
23380 : /* Combination of the two above:
23381 :
23382 : (vec_merge (vec_merge (vec_duplicate (operation ...))
23383 : (register or memory)
23384 : (reg:QI mask))
23385 : (register or memory)
23386 : (const_int 1))
23387 :
23388 : i.e. avx512fp16_vcvtss2sh_mask. */
23389 275158 : else if (TARGET_AVX512F
23390 121601 : && mask == const1_rtx
23391 48667 : && GET_CODE (XEXP (x, 0)) == VEC_MERGE
23392 27158 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
23393 277420 : && register_operand (XEXP (XEXP (x, 0), 2),
23394 2262 : GET_MODE (XEXP (XEXP (x, 0), 2))))
23395 : {
23396 2250 : *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23397 : mode, outer_code, opno, speed)
23398 2250 : + rtx_cost (XEXP (XEXP (x, 0), 1),
23399 : mode, outer_code, opno, speed)
23400 2250 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23401 2250 : return true;
23402 : }
23403 : /* vcmp. */
23404 272908 : else if (unspec_pcmp_p (mask)
23405 272908 : || (GET_CODE (mask) == NOT
23406 0 : && unspec_pcmp_p (XEXP (mask, 0))))
23407 : {
23408 1950 : rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
23409 1950 : rtx unsop0 = XVECEXP (uns, 0, 0);
23410 : /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
23411 : cost the same as register.
23412 : This is used by avx_cmp<mode>3_ltint_not. */
23413 1950 : if (SUBREG_P (unsop0))
23414 417 : unsop0 = XEXP (unsop0, 0);
23415 1950 : if (GET_CODE (unsop0) == NOT)
23416 18 : unsop0 = XEXP (unsop0, 0);
23417 1950 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23418 1950 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
23419 1950 : + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
23420 1950 : + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
23421 1950 : + cost->sse_op;
23422 1950 : return true;
23423 : }
23424 : else
23425 270958 : *total = cost->sse_op;
23426 270958 : return false;
23427 :
23428 105951522 : case MEM:
23429 : /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23430 : or variants in ix86_vector_duplicate_simode_const. */
23431 :
23432 105951522 : if (GET_MODE_SIZE (mode) >= 16
23433 17958449 : && VECTOR_MODE_P (mode)
23434 11978443 : && SYMBOL_REF_P (XEXP (x, 0))
23435 2207390 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23436 107929794 : && ix86_broadcast_from_constant (mode, x))
23437 : {
23438 488504 : *total = COSTS_N_INSNS (2) + speed;
23439 488504 : return true;
23440 : }
23441 :
23442 : /* An insn that accesses memory is slightly more expensive
23443 : than one that does not. */
23444 105463018 : if (speed)
23445 : {
23446 94332895 : *total += 1;
23447 94332895 : rtx addr = XEXP (x, 0);
23448 : /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23449 : so for MEM (reg) and MEM (reg + 4), the former costs 5,
23450 : the latter costs 9, it is not accurate for x86. Ideally
23451 : address_cost should be used, but it reduce cost too much.
23452 : So current solution is make constant disp as cheap as possible. */
23453 94332895 : if (GET_CODE (addr) == PLUS
23454 76986370 : && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23455 : /* Only handle (reg + disp) since other forms of addr are mostly LEA,
23456 : there's no additional cost for the plus of disp. */
23457 165708507 : && register_operand (XEXP (addr, 0), Pmode))
23458 : {
23459 55367816 : *total += 1;
23460 68203950 : *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23461 55367816 : return true;
23462 : }
23463 : }
23464 :
23465 : return false;
23466 :
23467 52720 : case ZERO_EXTRACT:
23468 52720 : if (XEXP (x, 1) == const1_rtx
23469 11575 : && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23470 0 : && GET_MODE (XEXP (x, 2)) == SImode
23471 0 : && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23472 : {
23473 : /* Ignore cost of zero extension and masking of last argument. */
23474 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23475 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23476 0 : *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23477 0 : return true;
23478 : }
23479 : return false;
23480 :
23481 28813631 : case IF_THEN_ELSE:
23482 28813631 : if (TARGET_XOP
23483 25097 : && VECTOR_MODE_P (mode)
23484 28819022 : && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23485 : {
23486 : /* vpcmov. */
23487 4823 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23488 4823 : if (!REG_P (XEXP (x, 0)))
23489 4663 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23490 4823 : if (!REG_P (XEXP (x, 1)))
23491 4630 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23492 4823 : if (!REG_P (XEXP (x, 2)))
23493 4632 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23494 4823 : return true;
23495 : }
23496 0 : else if (TARGET_CMOVE
23497 28808808 : && SCALAR_INT_MODE_P (mode)
23498 31199832 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23499 : {
23500 : /* cmov. */
23501 2196060 : *total = COSTS_N_INSNS (1);
23502 2196060 : if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23503 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23504 2196060 : if (!REG_P (XEXP (x, 1)))
23505 115421 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23506 2196060 : if (!REG_P (XEXP (x, 2)))
23507 707482 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23508 2196060 : return true;
23509 : }
23510 : return false;
23511 :
23512 17884883 : case EQ:
23513 17884883 : case GT:
23514 17884883 : case GTU:
23515 17884883 : case LT:
23516 17884883 : case LTU:
23517 17884883 : if (TARGET_SSE2
23518 17881685 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23519 18222427 : && GET_MODE_SIZE (mode) >= 8)
23520 : {
23521 : /* vpcmpeq */
23522 332979 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
23523 332979 : if (!REG_P (XEXP (x, 0)))
23524 63809 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23525 332979 : if (!REG_P (XEXP (x, 1)))
23526 127592 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23527 332979 : return true;
23528 : }
23529 17551904 : if (TARGET_XOP
23530 12206 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23531 17552012 : && GET_MODE_SIZE (mode) <= 16)
23532 : {
23533 : /* vpcomeq */
23534 108 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23535 108 : if (!REG_P (XEXP (x, 0)))
23536 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23537 108 : if (!REG_P (XEXP (x, 1)))
23538 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23539 108 : return true;
23540 : }
23541 : return false;
23542 :
23543 15872815 : case NE:
23544 15872815 : case GE:
23545 15872815 : case GEU:
23546 15872815 : if (TARGET_XOP
23547 21900 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23548 15879569 : && GET_MODE_SIZE (mode) <= 16)
23549 : {
23550 : /* vpcomneq */
23551 6754 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23552 6754 : if (!REG_P (XEXP (x, 0)))
23553 1401 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23554 6754 : if (!REG_P (XEXP (x, 1)))
23555 5734 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23556 6754 : return true;
23557 : }
23558 15866061 : if (TARGET_SSE2
23559 15863954 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23560 15867367 : && GET_MODE_SIZE (mode) >= 8)
23561 : {
23562 1330 : if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
23563 : /* vpcmpeq + vpternlog */
23564 40 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
23565 : else
23566 : /* vpcmpeq + pxor + vpcmpeq */
23567 1264 : *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
23568 1282 : if (!REG_P (XEXP (x, 0)))
23569 28 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23570 1282 : if (!REG_P (XEXP (x, 1)))
23571 28 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23572 1282 : return true;
23573 : }
23574 : return false;
23575 :
23576 : default:
23577 : return false;
23578 : }
23579 : }
23580 :
23581 : #if TARGET_MACHO
23582 :
23583 : static int current_machopic_label_num;
23584 :
23585 : /* Given a symbol name and its associated stub, write out the
23586 : definition of the stub. */
23587 :
23588 : void
23589 : machopic_output_stub (FILE *file, const char *symb, const char *stub)
23590 : {
23591 : unsigned int length;
23592 : char *binder_name, *symbol_name, lazy_ptr_name[32];
23593 : int label = ++current_machopic_label_num;
23594 :
23595 : /* For 64-bit we shouldn't get here. */
23596 : gcc_assert (!TARGET_64BIT);
23597 :
23598 : /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23599 : symb = targetm.strip_name_encoding (symb);
23600 :
23601 : length = strlen (stub);
23602 : binder_name = XALLOCAVEC (char, length + 32);
23603 : GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23604 :
23605 : length = strlen (symb);
23606 : symbol_name = XALLOCAVEC (char, length + 32);
23607 : GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23608 :
23609 : sprintf (lazy_ptr_name, "L%d$lz", label);
23610 :
23611 : if (MACHOPIC_ATT_STUB)
23612 : switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23613 : else if (MACHOPIC_PURE)
23614 : switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23615 : else
23616 : switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23617 :
23618 : fprintf (file, "%s:\n", stub);
23619 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23620 :
23621 : if (MACHOPIC_ATT_STUB)
23622 : {
23623 : fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23624 : }
23625 : else if (MACHOPIC_PURE)
23626 : {
23627 : /* PIC stub. */
23628 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23629 : rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23630 : output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23631 : fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23632 : label, lazy_ptr_name, label);
23633 : fprintf (file, "\tjmp\t*%%ecx\n");
23634 : }
23635 : else
23636 : fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23637 :
23638 : /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23639 : it needs no stub-binding-helper. */
23640 : if (MACHOPIC_ATT_STUB)
23641 : return;
23642 :
23643 : fprintf (file, "%s:\n", binder_name);
23644 :
23645 : if (MACHOPIC_PURE)
23646 : {
23647 : fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23648 : fprintf (file, "\tpushl\t%%ecx\n");
23649 : }
23650 : else
23651 : fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23652 :
23653 : fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23654 :
23655 : /* N.B. Keep the correspondence of these
23656 : 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23657 : old-pic/new-pic/non-pic stubs; altering this will break
23658 : compatibility with existing dylibs. */
23659 : if (MACHOPIC_PURE)
23660 : {
23661 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23662 : switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23663 : }
23664 : else
23665 : /* 16-byte -mdynamic-no-pic stub. */
23666 : switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23667 :
23668 : fprintf (file, "%s:\n", lazy_ptr_name);
23669 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23670 : fprintf (file, ASM_LONG "%s\n", binder_name);
23671 : }
23672 : #endif /* TARGET_MACHO */
23673 :
23674 : /* Order the registers for register allocator. */
23675 :
23676 : void
23677 222045 : x86_order_regs_for_local_alloc (void)
23678 : {
23679 222045 : int pos = 0;
23680 222045 : int i;
23681 :
23682 : /* First allocate the local general purpose registers. */
23683 20650185 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23684 27533580 : if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
23685 5782146 : reg_alloc_order [pos++] = i;
23686 :
23687 : /* Global general purpose registers. */
23688 20650185 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23689 23715027 : if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
23690 1323294 : reg_alloc_order [pos++] = i;
23691 :
23692 : /* x87 registers come first in case we are doing FP math
23693 : using them. */
23694 222045 : if (!TARGET_SSE_MATH)
23695 57663 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23696 51256 : reg_alloc_order [pos++] = i;
23697 :
23698 : /* SSE registers. */
23699 1998405 : for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23700 1776360 : reg_alloc_order [pos++] = i;
23701 1998405 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23702 1776360 : reg_alloc_order [pos++] = i;
23703 :
23704 : /* Extended REX SSE registers. */
23705 3774765 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23706 3552720 : reg_alloc_order [pos++] = i;
23707 :
23708 : /* Mask register. */
23709 1998405 : for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23710 1776360 : reg_alloc_order [pos++] = i;
23711 :
23712 : /* x87 registers. */
23713 222045 : if (TARGET_SSE_MATH)
23714 1940742 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23715 1725104 : reg_alloc_order [pos++] = i;
23716 :
23717 1998405 : for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23718 1776360 : reg_alloc_order [pos++] = i;
23719 :
23720 : /* Initialize the rest of array as we do not allocate some registers
23721 : at all. */
23722 1110225 : while (pos < FIRST_PSEUDO_REGISTER)
23723 888180 : reg_alloc_order [pos++] = 0;
23724 222045 : }
23725 :
23726 : static bool
23727 246046844 : ix86_ms_bitfield_layout_p (const_tree record_type)
23728 : {
23729 246046844 : return ((TARGET_MS_BITFIELD_LAYOUT
23730 215 : && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23731 246046844 : || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
23732 : }
23733 :
23734 : /* Returns an expression indicating where the this parameter is
23735 : located on entry to the FUNCTION. */
23736 :
23737 : static rtx
23738 1767 : x86_this_parameter (tree function)
23739 : {
23740 1767 : tree type = TREE_TYPE (function);
23741 1767 : bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23742 1767 : int nregs;
23743 :
23744 1767 : if (TARGET_64BIT)
23745 : {
23746 1765 : const int *parm_regs;
23747 :
23748 1765 : if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
23749 : parm_regs = x86_64_preserve_none_int_parameter_registers;
23750 1765 : else if (ix86_function_type_abi (type) == MS_ABI)
23751 : parm_regs = x86_64_ms_abi_int_parameter_registers;
23752 : else
23753 1765 : parm_regs = x86_64_int_parameter_registers;
23754 1765 : return gen_rtx_REG (Pmode, parm_regs[aggr]);
23755 : }
23756 :
23757 2 : nregs = ix86_function_regparm (type, function);
23758 :
23759 2 : if (nregs > 0 && !stdarg_p (type))
23760 : {
23761 0 : int regno;
23762 0 : unsigned int ccvt = ix86_get_callcvt (type);
23763 :
23764 0 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23765 0 : regno = aggr ? DX_REG : CX_REG;
23766 0 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23767 : {
23768 0 : regno = CX_REG;
23769 0 : if (aggr)
23770 0 : return gen_rtx_MEM (SImode,
23771 0 : plus_constant (Pmode, stack_pointer_rtx, 4));
23772 : }
23773 : else
23774 : {
23775 0 : regno = AX_REG;
23776 0 : if (aggr)
23777 : {
23778 0 : regno = DX_REG;
23779 0 : if (nregs == 1)
23780 0 : return gen_rtx_MEM (SImode,
23781 0 : plus_constant (Pmode,
23782 : stack_pointer_rtx, 4));
23783 : }
23784 : }
23785 0 : return gen_rtx_REG (SImode, regno);
23786 : }
23787 :
23788 4 : return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23789 4 : aggr ? 8 : 4));
23790 : }
23791 :
23792 : /* Determine whether x86_output_mi_thunk can succeed. */
23793 :
23794 : static bool
23795 4919 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23796 : const_tree function)
23797 : {
23798 : /* 64-bit can handle anything. */
23799 4919 : if (TARGET_64BIT)
23800 : return true;
23801 :
23802 : /* For 32-bit, everything's fine if we have one free register. */
23803 76 : if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23804 : return true;
23805 :
23806 : /* Need a free register for vcall_offset. */
23807 0 : if (vcall_offset)
23808 : return false;
23809 :
23810 : /* Need a free register for GOT references. */
23811 0 : if (flag_pic && !targetm.binds_local_p (function))
23812 : return false;
23813 :
23814 : /* Otherwise ok. */
23815 : return true;
23816 : }
23817 :
23818 : /* Output the assembler code for a thunk function. THUNK_DECL is the
23819 : declaration for the thunk function itself, FUNCTION is the decl for
23820 : the target function. DELTA is an immediate constant offset to be
23821 : added to THIS. If VCALL_OFFSET is nonzero, the word at
23822 : *(*this + vcall_offset) should be added to THIS. */
23823 :
23824 : static void
23825 1767 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23826 : HOST_WIDE_INT vcall_offset, tree function)
23827 : {
23828 1767 : const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23829 1767 : rtx this_param = x86_this_parameter (function);
23830 1767 : rtx this_reg, tmp, fnaddr;
23831 1767 : unsigned int tmp_regno;
23832 1767 : rtx_insn *insn;
23833 1767 : int saved_flag_force_indirect_call = flag_force_indirect_call;
23834 :
23835 1767 : if (TARGET_64BIT)
23836 : tmp_regno = R10_REG;
23837 : else
23838 : {
23839 2 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23840 2 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23841 : tmp_regno = AX_REG;
23842 2 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23843 : tmp_regno = DX_REG;
23844 : else
23845 2 : tmp_regno = CX_REG;
23846 :
23847 2 : if (flag_pic)
23848 2 : flag_force_indirect_call = 0;
23849 : }
23850 :
23851 1767 : emit_note (NOTE_INSN_PROLOGUE_END);
23852 :
23853 : /* CET is enabled, insert EB instruction. */
23854 1767 : if ((flag_cf_protection & CF_BRANCH))
23855 20 : emit_insn (gen_nop_endbr ());
23856 :
23857 : /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23858 : pull it in now and let DELTA benefit. */
23859 1767 : if (REG_P (this_param))
23860 : this_reg = this_param;
23861 2 : else if (vcall_offset)
23862 : {
23863 : /* Put the this parameter into %eax. */
23864 2 : this_reg = gen_rtx_REG (Pmode, AX_REG);
23865 1 : emit_move_insn (this_reg, this_param);
23866 : }
23867 : else
23868 : this_reg = NULL_RTX;
23869 :
23870 : /* Adjust the this parameter by a fixed constant. */
23871 1767 : if (delta)
23872 : {
23873 828 : rtx delta_rtx = GEN_INT (delta);
23874 828 : rtx delta_dst = this_reg ? this_reg : this_param;
23875 :
23876 828 : if (TARGET_64BIT)
23877 : {
23878 827 : if (!x86_64_general_operand (delta_rtx, Pmode))
23879 : {
23880 0 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23881 0 : emit_move_insn (tmp, delta_rtx);
23882 0 : delta_rtx = tmp;
23883 : }
23884 : }
23885 :
23886 829 : ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
23887 : }
23888 :
23889 : /* Adjust the this parameter by a value stored in the vtable. */
23890 1767 : if (vcall_offset)
23891 : {
23892 990 : rtx vcall_addr, vcall_mem, this_mem;
23893 :
23894 991 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23895 :
23896 990 : this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23897 991 : if (Pmode != ptr_mode)
23898 0 : this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23899 990 : emit_move_insn (tmp, this_mem);
23900 :
23901 : /* Adjust the this parameter. */
23902 991 : vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23903 990 : if (TARGET_64BIT
23904 990 : && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
23905 : {
23906 0 : rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23907 0 : emit_move_insn (tmp2, GEN_INT (vcall_offset));
23908 0 : vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23909 : }
23910 :
23911 990 : vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23912 991 : if (Pmode != ptr_mode)
23913 0 : emit_insn (gen_addsi_1_zext (this_reg,
23914 : gen_rtx_REG (ptr_mode,
23915 : REGNO (this_reg)),
23916 : vcall_mem));
23917 : else
23918 990 : ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
23919 : }
23920 :
23921 : /* If necessary, drop THIS back to its stack slot. */
23922 1767 : if (this_reg && this_reg != this_param)
23923 1 : emit_move_insn (this_param, this_reg);
23924 :
23925 1767 : fnaddr = XEXP (DECL_RTL (function), 0);
23926 1767 : if (TARGET_64BIT)
23927 : {
23928 25 : if (!flag_pic || targetm.binds_local_p (function)
23929 1790 : || TARGET_PECOFF)
23930 : ;
23931 : else
23932 : {
23933 0 : tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23934 0 : tmp = gen_rtx_CONST (Pmode, tmp);
23935 0 : fnaddr = gen_const_mem (Pmode, tmp);
23936 : }
23937 : }
23938 : else
23939 : {
23940 2 : if (!flag_pic || targetm.binds_local_p (function))
23941 : ;
23942 : #if TARGET_MACHO
23943 : else if (TARGET_MACHO)
23944 : {
23945 : fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23946 : fnaddr = XEXP (fnaddr, 0);
23947 : }
23948 : #endif /* TARGET_MACHO */
23949 : else
23950 : {
23951 0 : tmp = gen_rtx_REG (Pmode, CX_REG);
23952 0 : output_set_got (tmp, NULL_RTX);
23953 :
23954 0 : fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23955 0 : fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23956 0 : fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23957 0 : fnaddr = gen_const_mem (Pmode, fnaddr);
23958 : }
23959 : }
23960 :
23961 : /* Our sibling call patterns do not allow memories, because we have no
23962 : predicate that can distinguish between frame and non-frame memory.
23963 : For our purposes here, we can get away with (ab)using a jump pattern,
23964 : because we're going to do no optimization. */
23965 1767 : if (MEM_P (fnaddr))
23966 : {
23967 0 : if (sibcall_insn_operand (fnaddr, word_mode))
23968 : {
23969 0 : fnaddr = XEXP (DECL_RTL (function), 0);
23970 0 : tmp = gen_rtx_MEM (QImode, fnaddr);
23971 0 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23972 0 : tmp = emit_call_insn (tmp);
23973 0 : SIBLING_CALL_P (tmp) = 1;
23974 : }
23975 : else
23976 0 : emit_jump_insn (gen_indirect_jump (fnaddr));
23977 : }
23978 : else
23979 : {
23980 1767 : if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23981 : {
23982 : // CM_LARGE_PIC always uses pseudo PIC register which is
23983 : // uninitialized. Since FUNCTION is local and calling it
23984 : // doesn't go through PLT, we use scratch register %r11 as
23985 : // PIC register and initialize it here.
23986 3 : pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23987 3 : ix86_init_large_pic_reg (tmp_regno);
23988 3 : fnaddr = legitimize_pic_address (fnaddr,
23989 3 : gen_rtx_REG (Pmode, tmp_regno));
23990 : }
23991 :
23992 1767 : if (!sibcall_insn_operand (fnaddr, word_mode))
23993 : {
23994 9 : tmp = gen_rtx_REG (word_mode, tmp_regno);
23995 9 : if (GET_MODE (fnaddr) != word_mode)
23996 0 : fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
23997 9 : emit_move_insn (tmp, fnaddr);
23998 9 : fnaddr = tmp;
23999 : }
24000 :
24001 1767 : tmp = gen_rtx_MEM (QImode, fnaddr);
24002 1767 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
24003 1767 : tmp = emit_call_insn (tmp);
24004 1767 : SIBLING_CALL_P (tmp) = 1;
24005 : }
24006 1767 : emit_barrier ();
24007 :
24008 : /* Emit just enough of rest_of_compilation to get the insns emitted. */
24009 1767 : insn = get_insns ();
24010 1767 : shorten_branches (insn);
24011 1767 : assemble_start_function (thunk_fndecl, fnname);
24012 1767 : final_start_function (insn, file, 1);
24013 1767 : final (insn, file, 1);
24014 1767 : final_end_function ();
24015 1767 : assemble_end_function (thunk_fndecl, fnname);
24016 :
24017 1767 : flag_force_indirect_call = saved_flag_force_indirect_call;
24018 1767 : }
24019 :
24020 : static void
24021 281763 : x86_file_start (void)
24022 : {
24023 281763 : default_file_start ();
24024 281763 : if (TARGET_16BIT)
24025 6 : fputs ("\t.code16gcc\n", asm_out_file);
24026 : #if TARGET_MACHO
24027 : darwin_file_start ();
24028 : #endif
24029 281763 : if (X86_FILE_START_VERSION_DIRECTIVE)
24030 : fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24031 281763 : if (X86_FILE_START_FLTUSED)
24032 : fputs ("\t.global\t__fltused\n", asm_out_file);
24033 281763 : if (ix86_asm_dialect == ASM_INTEL)
24034 73 : fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24035 281763 : }
24036 :
24037 : int
24038 102215919 : x86_field_alignment (tree type, int computed)
24039 : {
24040 102215919 : machine_mode mode;
24041 :
24042 102215919 : if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24043 : return computed;
24044 9121371 : if (TARGET_IAMCU)
24045 0 : return iamcu_alignment (type, computed);
24046 9121371 : type = strip_array_types (type);
24047 9121371 : mode = TYPE_MODE (type);
24048 9121371 : if (mode == DFmode || mode == DCmode
24049 9015594 : || GET_MODE_CLASS (mode) == MODE_INT
24050 3017747 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24051 : {
24052 6103624 : if (TYPE_ATOMIC (type) && computed > 32)
24053 : {
24054 0 : static bool warned;
24055 :
24056 0 : if (!warned && warn_psabi)
24057 : {
24058 0 : const char *url
24059 : = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
24060 :
24061 0 : warned = true;
24062 0 : inform (input_location, "the alignment of %<_Atomic %T%> "
24063 : "fields changed in %{GCC 11.1%}",
24064 0 : TYPE_MAIN_VARIANT (type), url);
24065 : }
24066 : }
24067 : else
24068 6103624 : return MIN (32, computed);
24069 : }
24070 : return computed;
24071 : }
24072 :
24073 : /* Print call to TARGET to FILE. */
24074 :
24075 : static void
24076 389 : x86_print_call_or_nop (FILE *file, const char *target,
24077 : const char *label)
24078 : {
24079 389 : if (flag_nop_mcount || !strcmp (target, "nop"))
24080 : {
24081 9 : if (TARGET_16BIT)
24082 : /* 3 byte no-op: lea 0(%si), %si */
24083 1 : fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
24084 : else
24085 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
24086 8 : fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
24087 : label);
24088 : }
24089 380 : else if (!TARGET_PECOFF && flag_pic)
24090 : {
24091 8 : gcc_assert (flag_plt);
24092 :
24093 8 : fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
24094 : }
24095 : else
24096 372 : fprintf (file, "%s\tcall\t%s\n", label, target);
24097 389 : }
24098 :
24099 : static bool
24100 409 : current_fentry_name (const char **name)
24101 : {
24102 409 : tree attr = lookup_attribute ("fentry_name",
24103 409 : DECL_ATTRIBUTES (current_function_decl));
24104 409 : if (!attr)
24105 : return false;
24106 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24107 2 : return true;
24108 : }
24109 :
24110 : static bool
24111 16 : current_fentry_section (const char **name)
24112 : {
24113 16 : tree attr = lookup_attribute ("fentry_section",
24114 16 : DECL_ATTRIBUTES (current_function_decl));
24115 16 : if (!attr)
24116 : return false;
24117 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24118 2 : return true;
24119 : }
24120 :
24121 : /* Return a caller-saved register which isn't live or a callee-saved
24122 : register which has been saved on stack in the prologue at entry for
24123 : profile. */
24124 :
24125 : static int
24126 17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
24127 : {
24128 : /* Use %r10 if the profiler is emitted before the prologue or it isn't
24129 : used by DRAP. */
24130 17 : if (ix86_profile_before_prologue ()
24131 4 : || !crtl->drap_reg
24132 17 : || REGNO (crtl->drap_reg) != R10_REG)
24133 : return R10_REG;
24134 :
24135 : /* The profiler is emitted after the prologue. If there is a
24136 : caller-saved register which isn't live or a callee-saved
24137 : register saved on stack in the prologue, use it. */
24138 :
24139 0 : bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
24140 :
24141 0 : int i;
24142 0 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24143 0 : if (GENERAL_REGNO_P (i)
24144 0 : && i != R10_REG
24145 : #ifdef NO_PROFILE_COUNTERS
24146 0 : && (r11_ok || i != R11_REG)
24147 : #else
24148 : && i != R11_REG
24149 : #endif
24150 0 : && TEST_HARD_REG_BIT (accessible_reg_set, i)
24151 0 : && (ix86_save_reg (i, true, true)
24152 0 : || (call_used_regs[i]
24153 0 : && !fixed_regs[i]
24154 0 : && !REGNO_REG_SET_P (reg_live, i))))
24155 0 : return i;
24156 :
24157 0 : sorry ("no register available for profiling %<-mcmodel=large%s%>",
24158 0 : ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
24159 :
24160 0 : return R10_REG;
24161 : }
24162 :
24163 : /* Output assembler code to FILE to increment profiler label # LABELNO
24164 : for profiling a function entry. */
24165 : void
24166 409 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24167 : {
24168 409 : if (cfun->machine->insn_queued_at_entrance)
24169 : {
24170 7 : if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
24171 6 : fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
24172 7 : unsigned int patch_area_size
24173 7 : = crtl->patch_area_size - crtl->patch_area_entry;
24174 7 : if (patch_area_size)
24175 2 : ix86_output_patchable_area (patch_area_size,
24176 : crtl->patch_area_entry == 0);
24177 : }
24178 :
24179 409 : const char *mcount_name = MCOUNT_NAME;
24180 :
24181 409 : bool fentry_section_p
24182 409 : = (flag_record_mcount
24183 803 : || lookup_attribute ("fentry_section",
24184 394 : DECL_ATTRIBUTES (current_function_decl)));
24185 :
24186 : const char *label = fentry_section_p ? "1:" : "";
24187 :
24188 409 : if (current_fentry_name (&mcount_name))
24189 : ;
24190 407 : else if (fentry_name)
24191 1 : mcount_name = fentry_name;
24192 406 : else if (flag_fentry)
24193 394 : mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
24194 :
24195 409 : if (TARGET_64BIT)
24196 : {
24197 : #ifndef NO_PROFILE_COUNTERS
24198 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24199 : fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
24200 : else
24201 : fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
24202 : #endif
24203 :
24204 408 : int scratch;
24205 408 : const char *reg;
24206 408 : char legacy_reg[4] = { 0 };
24207 :
24208 408 : if (!TARGET_PECOFF)
24209 : {
24210 408 : switch (ix86_cmodel)
24211 : {
24212 7 : case CM_LARGE:
24213 7 : scratch = x86_64_select_profile_regnum (true);
24214 7 : reg = hi_reg_name[scratch];
24215 7 : if (LEGACY_INT_REGNO_P (scratch))
24216 : {
24217 0 : legacy_reg[0] = 'r';
24218 0 : legacy_reg[1] = reg[0];
24219 0 : legacy_reg[2] = reg[1];
24220 0 : reg = legacy_reg;
24221 : }
24222 7 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24223 1 : fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
24224 : "\tcall\t%s\n", label, reg, mcount_name,
24225 : reg);
24226 : else
24227 6 : fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
24228 : label, mcount_name, reg, reg);
24229 : break;
24230 10 : case CM_LARGE_PIC:
24231 : #ifdef NO_PROFILE_COUNTERS
24232 10 : scratch = x86_64_select_profile_regnum (false);
24233 10 : reg = hi_reg_name[scratch];
24234 10 : if (LEGACY_INT_REGNO_P (scratch))
24235 : {
24236 0 : legacy_reg[0] = 'r';
24237 0 : legacy_reg[1] = reg[0];
24238 0 : legacy_reg[2] = reg[1];
24239 0 : reg = legacy_reg;
24240 : }
24241 10 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24242 : {
24243 1 : fprintf (file, "1:movabs\tr11, "
24244 : "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
24245 1 : fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
24246 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24247 1 : fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
24248 : mcount_name);
24249 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24250 1 : fprintf (file, "\tcall\t%s\n", reg);
24251 1 : break;
24252 : }
24253 9 : fprintf (file,
24254 : "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
24255 9 : fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
24256 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24257 9 : fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
24258 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24259 9 : fprintf (file, "\tcall\t*%%%s\n", reg);
24260 : #else
24261 : sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
24262 : #endif
24263 9 : break;
24264 12 : case CM_SMALL_PIC:
24265 12 : case CM_MEDIUM_PIC:
24266 12 : if (!flag_plt)
24267 : {
24268 3 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24269 0 : fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
24270 : label, mcount_name);
24271 : else
24272 3 : fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
24273 : label, mcount_name);
24274 : break;
24275 : }
24276 : /* fall through */
24277 388 : default:
24278 388 : x86_print_call_or_nop (file, mcount_name, label);
24279 388 : break;
24280 : }
24281 : }
24282 : else
24283 : x86_print_call_or_nop (file, mcount_name, label);
24284 : }
24285 1 : else if (flag_pic)
24286 : {
24287 : #ifndef NO_PROFILE_COUNTERS
24288 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24289 : fprintf (file,
24290 : "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
24291 : LPREFIX, labelno);
24292 : else
24293 : fprintf (file,
24294 : "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
24295 : LPREFIX, labelno);
24296 : #endif
24297 0 : if (flag_plt)
24298 0 : x86_print_call_or_nop (file, mcount_name, label);
24299 0 : else if (ASSEMBLER_DIALECT == ASM_INTEL)
24300 0 : fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
24301 : label, mcount_name);
24302 : else
24303 0 : fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
24304 : label, mcount_name);
24305 : }
24306 : else
24307 : {
24308 : #ifndef NO_PROFILE_COUNTERS
24309 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24310 : fprintf (file,
24311 : "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
24312 : LPREFIX, labelno);
24313 : else
24314 : fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
24315 : LPREFIX, labelno);
24316 : #endif
24317 1 : x86_print_call_or_nop (file, mcount_name, label);
24318 : }
24319 :
24320 409 : if (fentry_section_p)
24321 : {
24322 16 : const char *sname = "__mcount_loc";
24323 :
24324 16 : if (current_fentry_section (&sname))
24325 : ;
24326 14 : else if (fentry_section)
24327 1 : sname = fentry_section;
24328 :
24329 16 : fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
24330 16 : fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
24331 16 : fprintf (file, "\t.previous\n");
24332 : }
24333 409 : }
24334 :
24335 : /* We don't have exact information about the insn sizes, but we may assume
24336 : quite safely that we are informed about all 1 byte insns and memory
24337 : address sizes. This is enough to eliminate unnecessary padding in
24338 : 99% of cases. */
24339 :
24340 : int
24341 382252554 : ix86_min_insn_size (rtx_insn *insn)
24342 : {
24343 382252554 : int l = 0, len;
24344 :
24345 382252554 : if (!INSN_P (insn) || !active_insn_p (insn))
24346 500406 : return 0;
24347 :
24348 : /* Discard alignments we've emit and jump instructions. */
24349 381752148 : if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24350 381752148 : && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24351 : return 0;
24352 :
24353 : /* Important case - calls are always 5 bytes.
24354 : It is common to have many calls in the row. */
24355 381752142 : if (CALL_P (insn)
24356 9051494 : && symbolic_reference_mentioned_p (PATTERN (insn))
24357 390476245 : && !SIBLING_CALL_P (insn))
24358 : return 5;
24359 373260108 : len = get_attr_length (insn);
24360 373260108 : if (len <= 1)
24361 : return 1;
24362 :
24363 : /* For normal instructions we rely on get_attr_length being exact,
24364 : with a few exceptions. */
24365 364658192 : if (!JUMP_P (insn))
24366 : {
24367 359355062 : enum attr_type type = get_attr_type (insn);
24368 :
24369 359355062 : switch (type)
24370 : {
24371 95502 : case TYPE_MULTI:
24372 95502 : if (GET_CODE (PATTERN (insn)) == ASM_INPUT
24373 95502 : || asm_noperands (PATTERN (insn)) >= 0)
24374 527 : return 0;
24375 : break;
24376 : case TYPE_OTHER:
24377 : case TYPE_FCMP:
24378 : break;
24379 : default:
24380 : /* Otherwise trust get_attr_length. */
24381 : return len;
24382 : }
24383 :
24384 474866 : l = get_attr_length_address (insn);
24385 474866 : if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24386 : l = 4;
24387 : }
24388 384328 : if (l)
24389 90538 : return 1+l;
24390 : else
24391 5687458 : return 2;
24392 : }
24393 :
24394 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24395 :
24396 : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24397 : window. */
24398 :
24399 : static void
24400 45424 : ix86_avoid_jump_mispredicts (void)
24401 : {
24402 45424 : rtx_insn *insn, *start = get_insns ();
24403 45424 : int nbytes = 0, njumps = 0;
24404 45424 : bool isjump = false;
24405 :
24406 : /* Look for all minimal intervals of instructions containing 4 jumps.
24407 : The intervals are bounded by START and INSN. NBYTES is the total
24408 : size of instructions in the interval including INSN and not including
24409 : START. When the NBYTES is smaller than 16 bytes, it is possible
24410 : that the end of START and INSN ends up in the same 16byte page.
24411 :
24412 : The smallest offset in the page INSN can start is the case where START
24413 : ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24414 : We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
24415 :
24416 : Don't consider asm goto as jump, while it can contain a jump, it doesn't
24417 : have to, control transfer to label(s) can be performed through other
24418 : means, and also we estimate minimum length of all asm stmts as 0. */
24419 700828 : for (insn = start; insn; insn = NEXT_INSN (insn))
24420 : {
24421 655404 : int min_size;
24422 :
24423 655404 : if (LABEL_P (insn))
24424 : {
24425 961 : align_flags alignment = label_to_alignment (insn);
24426 961 : int align = alignment.levels[0].log;
24427 961 : int max_skip = alignment.levels[0].maxskip;
24428 :
24429 961 : if (max_skip > 15)
24430 : max_skip = 15;
24431 : /* If align > 3, only up to 16 - max_skip - 1 bytes can be
24432 : already in the current 16 byte page, because otherwise
24433 : ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
24434 : bytes to reach 16 byte boundary. */
24435 961 : if (align <= 0
24436 328 : || (align <= 3 && max_skip != (1 << align) - 1))
24437 961 : max_skip = 0;
24438 961 : if (dump_file)
24439 0 : fprintf (dump_file, "Label %i with max_skip %i\n",
24440 0 : INSN_UID (insn), max_skip);
24441 961 : if (max_skip)
24442 : {
24443 6293 : while (nbytes + max_skip >= 16)
24444 : {
24445 5965 : start = NEXT_INSN (start);
24446 310 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24447 5982 : || CALL_P (start))
24448 350 : njumps--, isjump = true;
24449 : else
24450 : isjump = false;
24451 5965 : nbytes -= ix86_min_insn_size (start);
24452 : }
24453 : }
24454 961 : continue;
24455 961 : }
24456 :
24457 654443 : min_size = ix86_min_insn_size (insn);
24458 654443 : nbytes += min_size;
24459 654443 : if (dump_file)
24460 0 : fprintf (dump_file, "Insn %i estimated to %i bytes\n",
24461 0 : INSN_UID (insn), min_size);
24462 46586 : if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
24463 654463 : || CALL_P (insn))
24464 47601 : njumps++;
24465 : else
24466 606842 : continue;
24467 :
24468 55996 : while (njumps > 3)
24469 : {
24470 8395 : start = NEXT_INSN (start);
24471 549 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24472 8395 : || CALL_P (start))
24473 1251 : njumps--, isjump = true;
24474 : else
24475 : isjump = false;
24476 8395 : nbytes -= ix86_min_insn_size (start);
24477 : }
24478 47601 : gcc_assert (njumps >= 0);
24479 47601 : if (dump_file)
24480 0 : fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24481 0 : INSN_UID (start), INSN_UID (insn), nbytes);
24482 :
24483 47601 : if (njumps == 3 && isjump && nbytes < 16)
24484 : {
24485 40 : int padsize = 15 - nbytes + ix86_min_insn_size (insn);
24486 :
24487 40 : if (dump_file)
24488 0 : fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24489 0 : INSN_UID (insn), padsize);
24490 40 : emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
24491 : }
24492 : }
24493 45424 : }
24494 : #endif
24495 :
24496 : /* AMD Athlon works faster
24497 : when RET is not destination of conditional jump or directly preceded
24498 : by other jump instruction. We avoid the penalty by inserting NOP just
24499 : before the RET instructions in such cases. */
24500 : static void
24501 45144 : ix86_pad_returns (void)
24502 : {
24503 45144 : edge e;
24504 45144 : edge_iterator ei;
24505 :
24506 90312 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24507 : {
24508 45168 : basic_block bb = e->src;
24509 45168 : rtx_insn *ret = BB_END (bb);
24510 45168 : rtx_insn *prev;
24511 45168 : bool replace = false;
24512 :
24513 45158 : if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24514 90326 : || optimize_bb_for_size_p (bb))
24515 23 : continue;
24516 179724 : for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24517 134161 : if (active_insn_p (prev) || LABEL_P (prev))
24518 : break;
24519 45145 : if (prev && LABEL_P (prev))
24520 : {
24521 43 : edge e;
24522 43 : edge_iterator ei;
24523 :
24524 56 : FOR_EACH_EDGE (e, ei, bb->preds)
24525 146 : if (EDGE_FREQUENCY (e) && e->src->index >= 0
24526 97 : && !(e->flags & EDGE_FALLTHRU))
24527 : {
24528 : replace = true;
24529 : break;
24530 : }
24531 : }
24532 43 : if (!replace)
24533 : {
24534 45109 : prev = prev_active_insn (ret);
24535 45109 : if (prev
24536 45109 : && ((JUMP_P (prev) && any_condjump_p (prev))
24537 44673 : || CALL_P (prev)))
24538 : replace = true;
24539 : /* Empty functions get branch mispredict even when
24540 : the jump destination is not visible to us. */
24541 45109 : if (!prev && !optimize_function_for_size_p (cfun))
24542 : replace = true;
24543 : }
24544 44691 : if (replace)
24545 : {
24546 489 : emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24547 489 : delete_insn (ret);
24548 : }
24549 : }
24550 45144 : }
24551 :
24552 : /* Count the minimum number of instructions in BB. Return 4 if the
24553 : number of instructions >= 4. */
24554 :
24555 : static int
24556 42 : ix86_count_insn_bb (basic_block bb)
24557 : {
24558 42 : rtx_insn *insn;
24559 42 : int insn_count = 0;
24560 :
24561 : /* Count number of instructions in this block. Return 4 if the number
24562 : of instructions >= 4. */
24563 297 : FOR_BB_INSNS (bb, insn)
24564 : {
24565 : /* Only happen in exit blocks. */
24566 291 : if (JUMP_P (insn)
24567 291 : && ANY_RETURN_P (PATTERN (insn)))
24568 : break;
24569 :
24570 267 : if (NONDEBUG_INSN_P (insn)
24571 102 : && GET_CODE (PATTERN (insn)) != USE
24572 351 : && GET_CODE (PATTERN (insn)) != CLOBBER)
24573 : {
24574 84 : insn_count++;
24575 84 : if (insn_count >= 4)
24576 : return insn_count;
24577 : }
24578 : }
24579 :
24580 : return insn_count;
24581 : }
24582 :
24583 :
24584 : /* Count the minimum number of instructions in code path in BB.
24585 : Return 4 if the number of instructions >= 4. */
24586 :
24587 : static int
24588 62 : ix86_count_insn (basic_block bb)
24589 : {
24590 62 : edge e;
24591 62 : edge_iterator ei;
24592 62 : int min_prev_count;
24593 :
24594 : /* Only bother counting instructions along paths with no
24595 : more than 2 basic blocks between entry and exit. Given
24596 : that BB has an edge to exit, determine if a predecessor
24597 : of BB has an edge from entry. If so, compute the number
24598 : of instructions in the predecessor block. If there
24599 : happen to be multiple such blocks, compute the minimum. */
24600 62 : min_prev_count = 4;
24601 145 : FOR_EACH_EDGE (e, ei, bb->preds)
24602 : {
24603 109 : edge prev_e;
24604 109 : edge_iterator prev_ei;
24605 :
24606 109 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24607 : {
24608 26 : min_prev_count = 0;
24609 26 : break;
24610 : }
24611 182 : FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24612 : {
24613 109 : if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24614 : {
24615 10 : int count = ix86_count_insn_bb (e->src);
24616 10 : if (count < min_prev_count)
24617 83 : min_prev_count = count;
24618 : break;
24619 : }
24620 : }
24621 : }
24622 :
24623 62 : if (min_prev_count < 4)
24624 32 : min_prev_count += ix86_count_insn_bb (bb);
24625 :
24626 62 : return min_prev_count;
24627 : }
24628 :
24629 : /* Pad short function to 4 instructions. */
24630 :
24631 : static void
24632 63 : ix86_pad_short_function (void)
24633 : {
24634 63 : edge e;
24635 63 : edge_iterator ei;
24636 :
24637 128 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24638 : {
24639 65 : rtx_insn *ret = BB_END (e->src);
24640 65 : if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24641 : {
24642 62 : int insn_count = ix86_count_insn (e->src);
24643 :
24644 : /* Pad short function. */
24645 62 : if (insn_count < 4)
24646 : {
24647 : rtx_insn *insn = ret;
24648 :
24649 : /* Find epilogue. */
24650 : while (insn
24651 60 : && (!NOTE_P (insn)
24652 26 : || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24653 37 : insn = PREV_INSN (insn);
24654 :
24655 23 : if (!insn)
24656 0 : insn = ret;
24657 :
24658 : /* Two NOPs count as one instruction. */
24659 23 : insn_count = 2 * (4 - insn_count);
24660 23 : emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24661 : }
24662 : }
24663 : }
24664 63 : }
24665 :
24666 : /* Fix up a Windows system unwinder issue. If an EH region falls through into
24667 : the epilogue, the Windows system unwinder will apply epilogue logic and
24668 : produce incorrect offsets. This can be avoided by adding a nop between
24669 : the last insn that can throw and the first insn of the epilogue. */
24670 :
24671 : static void
24672 0 : ix86_seh_fixup_eh_fallthru (void)
24673 : {
24674 0 : edge e;
24675 0 : edge_iterator ei;
24676 :
24677 0 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24678 : {
24679 0 : rtx_insn *insn, *next;
24680 :
24681 : /* Find the beginning of the epilogue. */
24682 0 : for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24683 0 : if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24684 : break;
24685 0 : if (insn == NULL)
24686 0 : continue;
24687 :
24688 : /* We only care about preceding insns that can throw. */
24689 0 : insn = prev_active_insn (insn);
24690 0 : if (insn == NULL || !can_throw_internal (insn))
24691 0 : continue;
24692 :
24693 : /* Do not separate calls from their debug information. */
24694 0 : for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
24695 0 : if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24696 0 : insn = next;
24697 : else
24698 : break;
24699 :
24700 0 : emit_insn_after (gen_nops (const1_rtx), insn);
24701 : }
24702 0 : }
24703 : /* Split vector load from parm_decl to elemental loads to avoid STLF
24704 : stalls. */
24705 : static void
24706 974351 : ix86_split_stlf_stall_load ()
24707 : {
24708 974351 : rtx_insn* insn, *start = get_insns ();
24709 974351 : unsigned window = 0;
24710 :
24711 26587434 : for (insn = start; insn; insn = NEXT_INSN (insn))
24712 : {
24713 26586584 : if (!NONDEBUG_INSN_P (insn))
24714 14970398 : continue;
24715 11616186 : window++;
24716 : /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24717 : other, just emulate for pipeline) before stalled load, stlf stall
24718 : case is as fast as no stall cases on CLX.
24719 : Since CFG is freed before machine_reorg, just do a rough
24720 : calculation of the window according to the layout. */
24721 11616186 : if (window > (unsigned) x86_stlf_window_ninsns)
24722 : return;
24723 :
24724 11598180 : if (any_uncondjump_p (insn)
24725 11562839 : || ANY_RETURN_P (PATTERN (insn))
24726 22784619 : || CALL_P (insn))
24727 : return;
24728 :
24729 10642685 : rtx set = single_set (insn);
24730 10642685 : if (!set)
24731 435608 : continue;
24732 10207077 : rtx src = SET_SRC (set);
24733 20413802 : if (!MEM_P (src)
24734 : /* Only handle V2DFmode load since it doesn't need any scratch
24735 : register. */
24736 1458926 : || GET_MODE (src) != E_V2DFmode
24737 5495 : || !MEM_EXPR (src)
24738 10211059 : || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24739 10206725 : continue;
24740 :
24741 352 : rtx zero = CONST0_RTX (V2DFmode);
24742 352 : rtx dest = SET_DEST (set);
24743 352 : rtx m = adjust_address (src, DFmode, 0);
24744 352 : rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24745 352 : emit_insn_before (loadlpd, insn);
24746 352 : m = adjust_address (src, DFmode, 8);
24747 352 : rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24748 352 : if (dump_file && (dump_flags & TDF_DETAILS))
24749 : {
24750 0 : fputs ("Due to potential STLF stall, split instruction:\n",
24751 : dump_file);
24752 0 : print_rtl_single (dump_file, insn);
24753 0 : fputs ("To:\n", dump_file);
24754 0 : print_rtl_single (dump_file, loadlpd);
24755 0 : print_rtl_single (dump_file, loadhpd);
24756 : }
24757 352 : PATTERN (insn) = loadhpd;
24758 352 : INSN_CODE (insn) = -1;
24759 352 : gcc_assert (recog_memoized (insn) != -1);
24760 : }
24761 : }
24762 :
24763 : /* Implement machine specific optimizations. We implement padding of returns
24764 : for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24765 : static void
24766 1488371 : ix86_reorg (void)
24767 : {
24768 : /* We are freeing block_for_insn in the toplev to keep compatibility
24769 : with old MDEP_REORGS that are not CFG based. Recompute it now. */
24770 1488371 : compute_bb_for_insn ();
24771 :
24772 1488371 : if (TARGET_SEH && current_function_has_exception_handlers ())
24773 : ix86_seh_fixup_eh_fallthru ();
24774 :
24775 1488371 : if (optimize && optimize_function_for_speed_p (cfun))
24776 : {
24777 976653 : if (TARGET_SSE2)
24778 974351 : ix86_split_stlf_stall_load ();
24779 976653 : if (TARGET_PAD_SHORT_FUNCTION)
24780 63 : ix86_pad_short_function ();
24781 976590 : else if (TARGET_PAD_RETURNS)
24782 45144 : ix86_pad_returns ();
24783 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24784 976653 : if (TARGET_FOUR_JUMP_LIMIT)
24785 45424 : ix86_avoid_jump_mispredicts ();
24786 : #endif
24787 : }
24788 1488371 : }
24789 :
24790 : /* Return nonzero when QImode register that must be represented via REX prefix
24791 : is used. */
24792 : bool
24793 8549852 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24794 : {
24795 8549852 : int i;
24796 8549852 : extract_insn_cached (insn);
24797 32314763 : for (i = 0; i < recog_data.n_operands; i++)
24798 4617400 : if (GENERAL_REG_P (recog_data.operand[i])
24799 21400426 : && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24800 : return true;
24801 : return false;
24802 : }
24803 :
24804 : /* Return true when INSN mentions register that must be encoded using REX
24805 : prefix. */
24806 : bool
24807 195589151 : x86_extended_reg_mentioned_p (rtx insn)
24808 : {
24809 195589151 : subrtx_iterator::array_type array;
24810 1024858984 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24811 : {
24812 877282300 : const_rtx x = *iter;
24813 877282300 : if (REG_P (x)
24814 877282300 : && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24815 252743673 : || REX2_INT_REGNO_P (REGNO (x))))
24816 48012467 : return true;
24817 : }
24818 147576684 : return false;
24819 195589151 : }
24820 :
24821 : /* Return true when INSN mentions register that must be encoded using REX2
24822 : prefix. */
24823 : bool
24824 2046937 : x86_extended_rex2reg_mentioned_p (rtx insn)
24825 : {
24826 2046937 : subrtx_iterator::array_type array;
24827 9532685 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24828 : {
24829 7486404 : const_rtx x = *iter;
24830 7486404 : if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24831 656 : return true;
24832 : }
24833 2046281 : return false;
24834 2046937 : }
24835 :
24836 : /* Return true when rtx operands mentions register that must be encoded using
24837 : evex prefix. */
24838 : bool
24839 10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
24840 : {
24841 10 : int i;
24842 28 : for (i = 0; i < nops; i++)
24843 22 : if (EXT_REX_SSE_REG_P (operands[i])
24844 40 : || x86_extended_rex2reg_mentioned_p (operands[i]))
24845 4 : return true;
24846 : return false;
24847 : }
24848 :
24849 : /* If profitable, negate (without causing overflow) integer constant
24850 : of mode MODE at location LOC. Return true in this case. */
24851 : bool
24852 5902150 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24853 : {
24854 5902150 : HOST_WIDE_INT val;
24855 :
24856 5902150 : if (!CONST_INT_P (*loc))
24857 : return false;
24858 :
24859 4974992 : switch (mode)
24860 : {
24861 2823994 : case E_DImode:
24862 : /* DImode x86_64 constants must fit in 32 bits. */
24863 2823994 : gcc_assert (x86_64_immediate_operand (*loc, mode));
24864 :
24865 : mode = SImode;
24866 : break;
24867 :
24868 : case E_SImode:
24869 : case E_HImode:
24870 : case E_QImode:
24871 : break;
24872 :
24873 0 : default:
24874 0 : gcc_unreachable ();
24875 : }
24876 :
24877 : /* Avoid overflows. */
24878 4974992 : if (mode_signbit_p (mode, *loc))
24879 : return false;
24880 :
24881 4974472 : val = INTVAL (*loc);
24882 :
24883 : /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24884 : Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24885 4974472 : if ((val < 0 && val != -128)
24886 3267294 : || val == 128)
24887 : {
24888 1718440 : *loc = GEN_INT (-val);
24889 1718440 : return true;
24890 : }
24891 :
24892 : return false;
24893 : }
24894 :
24895 : /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24896 : optabs would emit if we didn't have TFmode patterns. */
24897 :
24898 : void
24899 4518 : x86_emit_floatuns (rtx operands[2])
24900 : {
24901 4518 : rtx_code_label *neglab, *donelab;
24902 4518 : rtx i0, i1, f0, in, out;
24903 4518 : machine_mode mode, inmode;
24904 :
24905 4518 : inmode = GET_MODE (operands[1]);
24906 4518 : gcc_assert (inmode == SImode || inmode == DImode);
24907 :
24908 4518 : out = operands[0];
24909 4518 : in = force_reg (inmode, operands[1]);
24910 4518 : mode = GET_MODE (out);
24911 4518 : neglab = gen_label_rtx ();
24912 4518 : donelab = gen_label_rtx ();
24913 4518 : f0 = gen_reg_rtx (mode);
24914 :
24915 4518 : emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24916 :
24917 4518 : expand_float (out, in, 0);
24918 :
24919 4518 : emit_jump_insn (gen_jump (donelab));
24920 4518 : emit_barrier ();
24921 :
24922 4518 : emit_label (neglab);
24923 :
24924 4518 : i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24925 : 1, OPTAB_DIRECT);
24926 4518 : i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24927 : 1, OPTAB_DIRECT);
24928 4518 : i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24929 :
24930 4518 : expand_float (f0, i0, 0);
24931 :
24932 4518 : emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24933 :
24934 4518 : emit_label (donelab);
24935 4518 : }
24936 :
24937 : /* Return the diagnostic message string if conversion from FROMTYPE to
24938 : TOTYPE is not allowed, NULL otherwise. */
24939 :
24940 : static const char *
24941 1083307915 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24942 : {
24943 1083307915 : machine_mode from_mode = element_mode (fromtype);
24944 1083307915 : machine_mode to_mode = element_mode (totype);
24945 :
24946 1083307915 : if (!TARGET_SSE2 && from_mode != to_mode)
24947 : {
24948 : /* Do no allow conversions to/from BFmode/HFmode scalar types
24949 : when TARGET_SSE2 is not available. */
24950 468009 : if (from_mode == BFmode)
24951 : return N_("invalid conversion from type %<__bf16%> "
24952 : "without option %<-msse2%>");
24953 468008 : if (from_mode == HFmode)
24954 : return N_("invalid conversion from type %<_Float16%> "
24955 : "without option %<-msse2%>");
24956 468008 : if (to_mode == BFmode)
24957 : return N_("invalid conversion to type %<__bf16%> "
24958 : "without option %<-msse2%>");
24959 468008 : if (to_mode == HFmode)
24960 : return N_("invalid conversion to type %<_Float16%> "
24961 : "without option %<-msse2%>");
24962 : }
24963 :
24964 : /* Warn for silent implicit conversion between __bf16 and short,
24965 : since __bfloat16 is refined as real __bf16 instead of short
24966 : since GCC13. */
24967 1083307913 : if (element_mode (fromtype) != element_mode (totype)
24968 1083307913 : && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24969 : {
24970 : /* Warn for silent implicit conversion where user may expect
24971 : a bitcast. */
24972 7766538 : if ((TYPE_MODE (fromtype) == BFmode
24973 279 : && TYPE_MODE (totype) == HImode)
24974 7766816 : || (TYPE_MODE (totype) == BFmode
24975 423 : && TYPE_MODE (fromtype) == HImode))
24976 1 : warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24977 : "to real %<__bf16%> since GCC 13.1, be careful of "
24978 : "implicit conversion between %<__bf16%> and %<short%>; "
24979 : "an explicit bitcast may be needed here");
24980 : }
24981 :
24982 : /* Conversion allowed. */
24983 : return NULL;
24984 : }
24985 :
24986 : /* Return the diagnostic message string if the unary operation OP is
24987 : not permitted on TYPE, NULL otherwise. */
24988 :
24989 : static const char *
24990 90885078 : ix86_invalid_unary_op (int op, const_tree type)
24991 : {
24992 90885078 : machine_mode mmode = element_mode (type);
24993 : /* Reject all single-operand operations on BFmode/HFmode except for &
24994 : when TARGET_SSE2 is not available. */
24995 90885078 : if (!TARGET_SSE2 && op != ADDR_EXPR)
24996 : {
24997 111098 : if (mmode == BFmode)
24998 : return N_("operation not permitted on type %<__bf16%> "
24999 : "without option %<-msse2%>");
25000 111098 : if (mmode == HFmode)
25001 0 : return N_("operation not permitted on type %<_Float16%> "
25002 : "without option %<-msse2%>");
25003 : }
25004 :
25005 : /* Operation allowed. */
25006 : return NULL;
25007 : }
25008 :
25009 : /* Return the diagnostic message string if the binary operation OP is
25010 : not permitted on TYPE1 and TYPE2, NULL otherwise. */
25011 :
25012 : static const char *
25013 161405897 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
25014 : const_tree type2)
25015 : {
25016 161405897 : machine_mode type1_mode = element_mode (type1);
25017 161405897 : machine_mode type2_mode = element_mode (type2);
25018 : /* Reject all 2-operand operations on BFmode or HFmode
25019 : when TARGET_SSE2 is not available. */
25020 161405897 : if (!TARGET_SSE2)
25021 : {
25022 1008820 : if (type1_mode == BFmode || type2_mode == BFmode)
25023 : return N_("operation not permitted on type %<__bf16%> "
25024 : "without option %<-msse2%>");
25025 :
25026 1008820 : if (type1_mode == HFmode || type2_mode == HFmode)
25027 0 : return N_("operation not permitted on type %<_Float16%> "
25028 : "without option %<-msse2%>");
25029 : }
25030 :
25031 : /* Operation allowed. */
25032 : return NULL;
25033 : }
25034 :
25035 :
25036 : /* Target hook for scalar_mode_supported_p. */
25037 : static bool
25038 4715408 : ix86_scalar_mode_supported_p (scalar_mode mode)
25039 : {
25040 4715408 : if (DECIMAL_FLOAT_MODE_P (mode))
25041 650178 : return default_decimal_float_supported_p ();
25042 4065230 : else if (mode == TFmode)
25043 : return true;
25044 3732517 : else if (mode == HFmode || mode == BFmode)
25045 : return true;
25046 : else
25047 3069074 : return default_scalar_mode_supported_p (mode);
25048 : }
25049 :
25050 : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
25051 : if MODE is HFmode, and punt to the generic implementation otherwise. */
25052 :
25053 : static bool
25054 2285955 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
25055 : {
25056 : /* NB: Always return TRUE for HFmode so that the _Float16 type will
25057 : be defined by the C front-end for AVX512FP16 intrinsics. We will
25058 : issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
25059 : enabled. */
25060 1954709 : return ((mode == HFmode || mode == BFmode)
25061 3909418 : ? true
25062 1623463 : : default_libgcc_floating_mode_supported_p (mode));
25063 : }
25064 :
25065 : /* Implements target hook vector_mode_supported_p. */
25066 : static bool
25067 1345161298 : ix86_vector_mode_supported_p (machine_mode mode)
25068 : {
25069 : /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
25070 : either. */
25071 1482099567 : if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
25072 : return false;
25073 1345160888 : if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
25074 : return true;
25075 1130710187 : if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
25076 : return true;
25077 508323464 : if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
25078 : return true;
25079 367748299 : if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
25080 : return true;
25081 233494915 : if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
25082 233438545 : && VALID_MMX_REG_MODE (mode))
25083 : return true;
25084 34323974 : if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
25085 33687914 : && VALID_MMX_REG_MODE_3DNOW (mode))
25086 : return true;
25087 22864604 : if (mode == V2QImode)
25088 24808 : return true;
25089 : return false;
25090 : }
25091 :
25092 : /* Target hook for c_mode_for_suffix. */
25093 : static machine_mode
25094 191824 : ix86_c_mode_for_suffix (char suffix)
25095 : {
25096 191824 : if (suffix == 'q')
25097 : return TFmode;
25098 37 : if (suffix == 'w')
25099 : return XFmode;
25100 :
25101 0 : return VOIDmode;
25102 : }
25103 :
25104 : /* Helper function to map common constraints to non-EGPR ones.
25105 : All related constraints have h prefix, and h plus Upper letter
25106 : means the constraint is strictly EGPR enabled, while h plus
25107 : lower letter indicates the constraint is strictly gpr16 only.
25108 :
25109 : Specially for "g" constraint, split it to rmi as there is
25110 : no corresponding general constraint define for backend.
25111 :
25112 : Here is the full list to map constraints that may involve
25113 : gpr to h prefixed.
25114 :
25115 : "g" -> "jrjmi"
25116 : "r" -> "jr"
25117 : "m" -> "jm"
25118 : "<" -> "j<"
25119 : ">" -> "j>"
25120 : "o" -> "jo"
25121 : "V" -> "jV"
25122 : "p" -> "jp"
25123 : "Bm" -> "ja"
25124 : */
25125 :
25126 57 : static void map_egpr_constraints (vec<const char *> &constraints)
25127 : {
25128 67 : for (size_t i = 0; i < constraints.length(); i++)
25129 : {
25130 10 : const char *cur = constraints[i];
25131 :
25132 10 : if (startswith (cur, "=@cc"))
25133 0 : continue;
25134 :
25135 10 : int len = strlen (cur);
25136 10 : auto_vec<char> buf;
25137 :
25138 24 : for (int j = 0; j < len; j++)
25139 : {
25140 14 : switch (cur[j])
25141 : {
25142 2 : case 'g':
25143 2 : buf.safe_push ('j');
25144 2 : buf.safe_push ('r');
25145 2 : buf.safe_push ('j');
25146 2 : buf.safe_push ('m');
25147 2 : buf.safe_push ('i');
25148 2 : break;
25149 8 : case 'r':
25150 8 : case 'm':
25151 8 : case '<':
25152 8 : case '>':
25153 8 : case 'o':
25154 8 : case 'V':
25155 8 : case 'p':
25156 8 : buf.safe_push ('j');
25157 8 : buf.safe_push (cur[j]);
25158 8 : break;
25159 0 : case 'B':
25160 0 : if (cur[j + 1] == 'm')
25161 : {
25162 0 : buf.safe_push ('j');
25163 0 : buf.safe_push ('a');
25164 0 : j++;
25165 : }
25166 : else
25167 : {
25168 0 : buf.safe_push (cur[j]);
25169 0 : buf.safe_push (cur[j + 1]);
25170 0 : j++;
25171 : }
25172 : break;
25173 0 : case 'T':
25174 0 : case 'Y':
25175 0 : case 'W':
25176 0 : case 'j':
25177 0 : buf.safe_push (cur[j]);
25178 0 : buf.safe_push (cur[j + 1]);
25179 0 : j++;
25180 0 : break;
25181 0 : case '{':
25182 0 : do
25183 : {
25184 0 : buf.safe_push (cur[j]);
25185 0 : } while (cur[j++] != '}');
25186 : break;
25187 4 : default:
25188 4 : buf.safe_push (cur[j]);
25189 4 : break;
25190 : }
25191 : }
25192 10 : buf.safe_push ('\0');
25193 20 : constraints[i] = xstrdup (buf.address ());
25194 10 : }
25195 57 : }
25196 :
25197 : /* Worker function for TARGET_MD_ASM_ADJUST.
25198 :
25199 : We implement asm flag outputs, and maintain source compatibility
25200 : with the old cc0-based compiler. */
25201 :
25202 : static rtx_insn *
25203 108774 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
25204 : vec<machine_mode> & /*input_modes*/,
25205 : vec<const char *> &constraints, vec<rtx> &/*uses*/,
25206 : vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
25207 : location_t loc)
25208 : {
25209 108774 : bool saw_asm_flag = false;
25210 :
25211 108774 : start_sequence ();
25212 :
25213 108774 : if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
25214 57 : map_egpr_constraints (constraints);
25215 :
25216 294259 : for (unsigned i = 0, n = outputs.length (); i < n; ++i)
25217 : {
25218 77747 : const char *con = constraints[i];
25219 77747 : if (!startswith (con, "=@cc"))
25220 77659 : continue;
25221 88 : con += 4;
25222 88 : if (strchr (con, ',') != NULL)
25223 : {
25224 1 : error_at (loc, "alternatives not allowed in %<asm%> flag output");
25225 1 : continue;
25226 : }
25227 :
25228 87 : bool invert = false;
25229 87 : if (con[0] == 'n')
25230 19 : invert = true, con++;
25231 :
25232 87 : machine_mode mode = CCmode;
25233 87 : rtx_code code = UNKNOWN;
25234 :
25235 87 : switch (con[0])
25236 : {
25237 15 : case 'a':
25238 15 : if (con[1] == 0)
25239 : mode = CCAmode, code = EQ;
25240 4 : else if (con[1] == 'e' && con[2] == 0)
25241 : mode = CCCmode, code = NE;
25242 : break;
25243 11 : case 'b':
25244 11 : if (con[1] == 0)
25245 : mode = CCCmode, code = EQ;
25246 6 : else if (con[1] == 'e' && con[2] == 0)
25247 : mode = CCAmode, code = NE;
25248 : break;
25249 14 : case 'c':
25250 14 : if (con[1] == 0)
25251 : mode = CCCmode, code = EQ;
25252 : break;
25253 8 : case 'e':
25254 8 : if (con[1] == 0)
25255 : mode = CCZmode, code = EQ;
25256 : break;
25257 11 : case 'g':
25258 11 : if (con[1] == 0)
25259 : mode = CCGCmode, code = GT;
25260 5 : else if (con[1] == 'e' && con[2] == 0)
25261 : mode = CCGCmode, code = GE;
25262 : break;
25263 10 : case 'l':
25264 10 : if (con[1] == 0)
25265 : mode = CCGCmode, code = LT;
25266 5 : else if (con[1] == 'e' && con[2] == 0)
25267 : mode = CCGCmode, code = LE;
25268 : break;
25269 4 : case 'o':
25270 4 : if (con[1] == 0)
25271 : mode = CCOmode, code = EQ;
25272 : break;
25273 4 : case 'p':
25274 4 : if (con[1] == 0)
25275 : mode = CCPmode, code = EQ;
25276 : break;
25277 4 : case 's':
25278 4 : if (con[1] == 0)
25279 : mode = CCSmode, code = EQ;
25280 : break;
25281 6 : case 'z':
25282 6 : if (con[1] == 0)
25283 : mode = CCZmode, code = EQ;
25284 : break;
25285 : }
25286 1 : if (code == UNKNOWN)
25287 : {
25288 1 : error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
25289 1 : continue;
25290 : }
25291 86 : if (invert)
25292 19 : code = reverse_condition (code);
25293 :
25294 86 : rtx dest = outputs[i];
25295 86 : if (!saw_asm_flag)
25296 : {
25297 : /* This is the first asm flag output. Here we put the flags
25298 : register in as the real output and adjust the condition to
25299 : allow it. */
25300 75 : constraints[i] = "=Bf";
25301 75 : outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
25302 75 : saw_asm_flag = true;
25303 : }
25304 : else
25305 : {
25306 : /* We don't need the flags register as output twice. */
25307 11 : constraints[i] = "=X";
25308 11 : outputs[i] = gen_rtx_SCRATCH (SImode);
25309 : }
25310 :
25311 86 : rtx x = gen_rtx_REG (mode, FLAGS_REG);
25312 86 : x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
25313 :
25314 86 : machine_mode dest_mode = GET_MODE (dest);
25315 86 : if (!SCALAR_INT_MODE_P (dest_mode))
25316 : {
25317 3 : error_at (loc, "invalid type for %<asm%> flag output");
25318 3 : continue;
25319 : }
25320 :
25321 83 : if (dest_mode == QImode)
25322 73 : emit_insn (gen_rtx_SET (dest, x));
25323 : else
25324 : {
25325 10 : rtx reg = gen_reg_rtx (QImode);
25326 10 : emit_insn (gen_rtx_SET (reg, x));
25327 :
25328 10 : reg = convert_to_mode (dest_mode, reg, 1);
25329 10 : emit_move_insn (dest, reg);
25330 : }
25331 : }
25332 :
25333 108774 : rtx_insn *seq = end_sequence ();
25334 :
25335 108774 : if (saw_asm_flag)
25336 : return seq;
25337 : else
25338 : {
25339 : /* If we had no asm flag outputs, clobber the flags. */
25340 108699 : clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
25341 108699 : SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
25342 108699 : return NULL;
25343 : }
25344 : }
25345 :
25346 : /* Implements target vector targetm.asm.encode_section_info. */
25347 :
25348 : static void ATTRIBUTE_UNUSED
25349 10007564 : ix86_encode_section_info (tree decl, rtx rtl, int first)
25350 : {
25351 10007564 : default_encode_section_info (decl, rtl, first);
25352 :
25353 10007564 : if (ix86_in_large_data_p (decl))
25354 32 : SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25355 10007564 : }
25356 :
25357 : /* Worker function for REVERSE_CONDITION. */
25358 :
25359 : enum rtx_code
25360 31387716 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
25361 : {
25362 31387716 : return (mode == CCFPmode
25363 31387716 : ? reverse_condition_maybe_unordered (code)
25364 27025750 : : reverse_condition (code));
25365 : }
25366 :
25367 : /* Output code to perform an x87 FP register move, from OPERANDS[1]
25368 : to OPERANDS[0]. */
25369 :
25370 : const char *
25371 649136 : output_387_reg_move (rtx_insn *insn, rtx *operands)
25372 : {
25373 649136 : if (REG_P (operands[0]))
25374 : {
25375 544019 : if (REG_P (operands[1])
25376 544019 : && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25377 : {
25378 295736 : if (REGNO (operands[0]) == FIRST_STACK_REG)
25379 275148 : return output_387_ffreep (operands, 0);
25380 : return "fstp\t%y0";
25381 : }
25382 248283 : if (STACK_TOP_P (operands[0]))
25383 248283 : return "fld%Z1\t%y1";
25384 : return "fst\t%y0";
25385 : }
25386 105117 : else if (MEM_P (operands[0]))
25387 : {
25388 105117 : gcc_assert (REG_P (operands[1]));
25389 105117 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25390 : return "fstp%Z0\t%y0";
25391 : else
25392 : {
25393 : /* There is no non-popping store to memory for XFmode.
25394 : So if we need one, follow the store with a load. */
25395 6219 : if (GET_MODE (operands[0]) == XFmode)
25396 : return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
25397 : else
25398 1888 : return "fst%Z0\t%y0";
25399 : }
25400 : }
25401 : else
25402 0 : gcc_unreachable();
25403 : }
25404 : #ifdef TARGET_SOLARIS
25405 : /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25406 :
25407 : static void
25408 : i386_solaris_elf_named_section (const char *name, unsigned int flags,
25409 : tree decl)
25410 : {
25411 : /* With Binutils 2.15, the "@unwind" marker must be specified on
25412 : every occurrence of the ".eh_frame" section, not just the first
25413 : one. */
25414 : if (TARGET_64BIT
25415 : && strcmp (name, ".eh_frame") == 0)
25416 : {
25417 : fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25418 : flags & SECTION_WRITE ? "aw" : "a");
25419 : return;
25420 : }
25421 :
25422 : #if HAVE_SOLARIS_AS
25423 : if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
25424 : {
25425 : solaris_elf_asm_comdat_section (name, flags, decl);
25426 : return;
25427 : }
25428 :
25429 : /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
25430 : SPARC assembler. One cannot mix single-letter flags and #exclude, so
25431 : only emit the latter here. */
25432 : if (flags & SECTION_EXCLUDE)
25433 : {
25434 : fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
25435 : return;
25436 : }
25437 : #endif
25438 :
25439 : default_elf_asm_named_section (name, flags, decl);
25440 : }
25441 : #endif /* TARGET_SOLARIS */
25442 :
25443 : /* Return the mangling of TYPE if it is an extended fundamental type. */
25444 :
25445 : static const char *
25446 1035597378 : ix86_mangle_type (const_tree type)
25447 : {
25448 1035597378 : type = TYPE_MAIN_VARIANT (type);
25449 :
25450 1035597378 : if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25451 : && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25452 : return NULL;
25453 :
25454 561408502 : if (type == float128_type_node || type == float64x_type_node)
25455 : return NULL;
25456 :
25457 560708157 : switch (TYPE_MODE (type))
25458 : {
25459 : case E_BFmode:
25460 : return "DF16b";
25461 326185 : case E_HFmode:
25462 : /* _Float16 is "DF16_".
25463 : Align with clang's decision in https://reviews.llvm.org/D33719. */
25464 326185 : return "DF16_";
25465 643786 : case E_TFmode:
25466 : /* __float128 is "g". */
25467 643786 : return "g";
25468 7808326 : case E_XFmode:
25469 : /* "long double" or __float80 is "e". */
25470 7808326 : return "e";
25471 : default:
25472 : return NULL;
25473 : }
25474 : }
25475 :
25476 : /* Create C++ tinfo symbols for only conditionally available fundamental
25477 : types. */
25478 :
25479 : static void
25480 5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
25481 : {
25482 5 : extern tree ix86_float16_type_node;
25483 5 : extern tree ix86_bf16_type_node;
25484 :
25485 5 : if (!TARGET_SSE2)
25486 : {
25487 0 : if (!float16_type_node)
25488 0 : float16_type_node = ix86_float16_type_node;
25489 0 : if (!bfloat16_type_node)
25490 0 : bfloat16_type_node = ix86_bf16_type_node;
25491 0 : callback (float16_type_node);
25492 0 : callback (bfloat16_type_node);
25493 0 : float16_type_node = NULL_TREE;
25494 0 : bfloat16_type_node = NULL_TREE;
25495 : }
25496 5 : }
25497 :
25498 : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
25499 :
25500 : static tree
25501 341 : ix86_stack_protect_guard (void)
25502 : {
25503 341 : if (TARGET_SSP_TLS_GUARD)
25504 : {
25505 266 : tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
25506 266 : int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
25507 266 : tree type = build_qualified_type (type_node, qual);
25508 266 : tree t;
25509 :
25510 266 : if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25511 : {
25512 1 : t = ix86_tls_stack_chk_guard_decl;
25513 :
25514 1 : if (t == NULL)
25515 : {
25516 1 : rtx x;
25517 :
25518 1 : t = build_decl
25519 1 : (UNKNOWN_LOCATION, VAR_DECL,
25520 : get_identifier (ix86_stack_protector_guard_symbol_str),
25521 : type);
25522 1 : TREE_STATIC (t) = 1;
25523 1 : TREE_PUBLIC (t) = 1;
25524 1 : DECL_EXTERNAL (t) = 1;
25525 1 : TREE_USED (t) = 1;
25526 1 : TREE_THIS_VOLATILE (t) = 1;
25527 1 : DECL_ARTIFICIAL (t) = 1;
25528 1 : DECL_IGNORED_P (t) = 1;
25529 :
25530 : /* Do not share RTL as the declaration is visible outside of
25531 : current function. */
25532 1 : x = DECL_RTL (t);
25533 1 : RTX_FLAG (x, used) = 1;
25534 :
25535 1 : ix86_tls_stack_chk_guard_decl = t;
25536 : }
25537 : }
25538 : else
25539 : {
25540 265 : tree asptrtype = build_pointer_type (type);
25541 :
25542 265 : t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25543 265 : t = build2 (MEM_REF, asptrtype, t,
25544 : build_int_cst (asptrtype, 0));
25545 265 : TREE_THIS_VOLATILE (t) = 1;
25546 : }
25547 :
25548 266 : return t;
25549 : }
25550 :
25551 75 : return default_stack_protect_guard ();
25552 : }
25553 :
25554 : /* Implement TARGET_STACK_PROTECT_GUARD_SYMBOL_P. */
25555 :
25556 : static bool
25557 210517 : ix86_stack_protect_guard_symbol_p (void)
25558 : {
25559 210517 : return TARGET_SSP_GLOBAL_GUARD;
25560 : }
25561 :
25562 : static bool
25563 939 : ix86_stack_protect_runtime_enabled_p (void)
25564 : {
25565 : /* Naked functions should not enable stack protector. */
25566 939 : return !ix86_function_naked (current_function_decl);
25567 : }
25568 :
25569 : /* For 32-bit code we can save PIC register setup by using
25570 : __stack_chk_fail_local hidden function instead of calling
25571 : __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25572 : register, so it is better to call __stack_chk_fail directly. */
25573 :
25574 : static tree ATTRIBUTE_UNUSED
25575 334 : ix86_stack_protect_fail (void)
25576 : {
25577 334 : return TARGET_64BIT
25578 334 : ? default_external_stack_protect_fail ()
25579 1 : : default_hidden_stack_protect_fail ();
25580 : }
25581 :
25582 : /* Select a format to encode pointers in exception handling data. CODE
25583 : is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25584 : true if the symbol may be affected by dynamic relocations.
25585 :
25586 : ??? All x86 object file formats are capable of representing this.
25587 : After all, the relocation needed is the same as for the call insn.
25588 : Whether or not a particular assembler allows us to enter such, I
25589 : guess we'll have to see. */
25590 :
25591 : int
25592 801117 : asm_preferred_eh_data_format (int code, int global)
25593 : {
25594 : /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25595 801117 : if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25596 : {
25597 38311 : int type = DW_EH_PE_sdata8;
25598 38311 : if (ptr_mode == SImode
25599 24333 : || ix86_cmodel == CM_SMALL_PIC
25600 38397 : || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25601 : type = DW_EH_PE_sdata4;
25602 53724 : return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25603 : }
25604 :
25605 762806 : if (ix86_cmodel == CM_SMALL
25606 18662 : || (ix86_cmodel == CM_MEDIUM && code))
25607 744157 : return DW_EH_PE_udata4;
25608 :
25609 : return DW_EH_PE_absptr;
25610 : }
25611 :
25612 : /* Cost of constructing or destructing a vector in VECMODE from/to elements
25613 : of ELMODE. */
25614 : static int
25615 780652 : ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
25616 : {
25617 1561304 : if (GET_MODE_BITSIZE (vecmode) < 128)
25618 586166 : return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
25619 293083 : * ix86_cost->sse_op);
25620 :
25621 487569 : int n = GET_MODE_BITSIZE (vecmode) / 128;
25622 487569 : int cost = 0;
25623 : /* Element inserts/extracts into/from N SSE vectors, the possible
25624 : GPR <-> XMM moves have to be accounted for elsewhere. */
25625 975138 : if (GET_MODE_BITSIZE (elmode) < 128)
25626 973892 : cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
25627 487569 : if (GET_MODE_BITSIZE (vecmode) >= 256
25628 497295 : && GET_MODE_BITSIZE (elmode) < 256)
25629 : /* N/2 vinserti128/vextracti128 for SSE <-> AVX256. */
25630 9726 : cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
25631 975138 : if (GET_MODE_BITSIZE (vecmode) == 512)
25632 : /* One vinserti64x4/vextracti64x4 for AVX256 <-> AVX512. */
25633 1966 : cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
25634 : return cost;
25635 : }
25636 :
25637 : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
25638 : from ix86_vector_costs::add_stmt_cost. */
25639 : static int
25640 15161685 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
25641 : machine_mode mode)
25642 : {
25643 15161685 : bool fp = FLOAT_MODE_P (mode);
25644 15161685 : int index;
25645 15161685 : switch (type_of_cost)
25646 : {
25647 1735102 : case scalar_stmt:
25648 1735102 : return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25649 :
25650 1815116 : case scalar_load:
25651 : /* load/store costs are relative to register move which is 2. Recompute
25652 : it to COSTS_N_INSNS so everything have same base. */
25653 3630232 : return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25654 1815116 : : ix86_cost->int_load [2]) / 2;
25655 :
25656 3943219 : case scalar_store:
25657 7886438 : return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25658 3943219 : : ix86_cost->int_store [2]) / 2;
25659 :
25660 1172222 : case vector_stmt:
25661 2344444 : return ix86_vec_cost (mode,
25662 2344444 : fp ? ix86_cost->addss : ix86_cost->sse_op);
25663 :
25664 1927995 : case vector_load:
25665 1927995 : index = sse_store_index (mode);
25666 : /* See PR82713 - we may end up being called on non-vector type. */
25667 1927995 : if (index < 0)
25668 99077 : index = 2;
25669 1927995 : return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25670 :
25671 974560 : case vector_store:
25672 974560 : index = sse_store_index (mode);
25673 : /* See PR82713 - we may end up being called on non-vector type. */
25674 974560 : if (index < 0)
25675 91085 : index = 2;
25676 974560 : return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25677 :
25678 760646 : case vec_to_scalar:
25679 760646 : case scalar_to_vec:
25680 760646 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25681 :
25682 : /* We should have separate costs for unaligned loads and gather/scatter.
25683 : Do that incrementally. */
25684 509627 : case unaligned_load:
25685 509627 : index = sse_store_index (mode);
25686 : /* See PR82713 - we may end up being called on non-vector type. */
25687 509627 : if (index < 0)
25688 2708 : index = 2;
25689 509627 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25690 :
25691 839627 : case unaligned_store:
25692 839627 : index = sse_store_index (mode);
25693 : /* See PR82713 - we may end up being called on non-vector type. */
25694 839627 : if (index < 0)
25695 17206 : index = 2;
25696 839627 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25697 :
25698 0 : case vector_gather_load:
25699 0 : return ix86_vec_cost (mode,
25700 0 : COSTS_N_INSNS
25701 : (ix86_cost->gather_static
25702 : + ix86_cost->gather_per_elt
25703 0 : * GET_MODE_NUNITS (mode)) / 2);
25704 :
25705 0 : case vector_scatter_store:
25706 0 : return ix86_vec_cost (mode,
25707 0 : COSTS_N_INSNS
25708 : (ix86_cost->scatter_static
25709 : + ix86_cost->scatter_per_elt
25710 0 : * GET_MODE_NUNITS (mode)) / 2);
25711 :
25712 355297 : case cond_branch_taken:
25713 355297 : return ix86_cost->cond_taken_branch_cost;
25714 :
25715 8587 : case cond_branch_not_taken:
25716 8587 : return ix86_cost->cond_not_taken_branch_cost;
25717 :
25718 281977 : case vec_perm:
25719 281977 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25720 :
25721 89524 : case vec_promote_demote:
25722 89524 : if (fp)
25723 11664 : return vec_fp_conversion_cost (ix86_tune_cost, mode);
25724 77860 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25725 :
25726 748186 : case vec_construct:
25727 748186 : case vec_deconstruct:
25728 1496372 : return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
25729 :
25730 0 : default:
25731 0 : gcc_unreachable ();
25732 : }
25733 : }
25734 :
25735 : /* Implement targetm.vectorize.builtin_vectorization_cost. */
25736 : static int
25737 9118454 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25738 : tree vectype, int)
25739 : {
25740 9118454 : machine_mode mode = TImode;
25741 9118454 : if (vectype != NULL)
25742 5786963 : mode = TYPE_MODE (vectype);
25743 9118454 : return ix86_default_vector_cost (type_of_cost, mode);
25744 : }
25745 :
25746 :
25747 : /* This function returns the calling abi specific va_list type node.
25748 : It returns the FNDECL specific va_list type. */
25749 :
25750 : static tree
25751 47714 : ix86_fn_abi_va_list (tree fndecl)
25752 : {
25753 47714 : if (!TARGET_64BIT)
25754 726 : return va_list_type_node;
25755 46988 : gcc_assert (fndecl != NULL_TREE);
25756 :
25757 46988 : if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
25758 12868 : return ms_va_list_type_node;
25759 : else
25760 34120 : return sysv_va_list_type_node;
25761 : }
25762 :
25763 : /* Returns the canonical va_list type specified by TYPE. If there
25764 : is no valid TYPE provided, it return NULL_TREE. */
25765 :
25766 : static tree
25767 247334 : ix86_canonical_va_list_type (tree type)
25768 : {
25769 247334 : if (TARGET_64BIT)
25770 : {
25771 246832 : if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
25772 5944 : return ms_va_list_type_node;
25773 :
25774 240888 : if ((TREE_CODE (type) == ARRAY_TYPE
25775 50109 : && integer_zerop (array_type_nelts_minus_one (type)))
25776 240888 : || POINTER_TYPE_P (type))
25777 : {
25778 188877 : tree elem_type = TREE_TYPE (type);
25779 188877 : if (TREE_CODE (elem_type) == RECORD_TYPE
25780 340822 : && lookup_attribute ("sysv_abi va_list",
25781 151945 : TYPE_ATTRIBUTES (elem_type)))
25782 151945 : return sysv_va_list_type_node;
25783 : }
25784 :
25785 88943 : return NULL_TREE;
25786 : }
25787 :
25788 502 : return std_canonical_va_list_type (type);
25789 : }
25790 :
25791 : /* Iterate through the target-specific builtin types for va_list.
25792 : IDX denotes the iterator, *PTREE is set to the result type of
25793 : the va_list builtin, and *PNAME to its internal type.
25794 : Returns zero if there is no element for this index, otherwise
25795 : IDX should be increased upon the next call.
25796 : Note, do not iterate a base builtin's name like __builtin_va_list.
25797 : Used from c_common_nodes_and_builtins. */
25798 :
25799 : static int
25800 638177 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25801 : {
25802 638177 : if (TARGET_64BIT)
25803 : {
25804 632805 : switch (idx)
25805 : {
25806 : default:
25807 : break;
25808 :
25809 210935 : case 0:
25810 210935 : *ptree = ms_va_list_type_node;
25811 210935 : *pname = "__builtin_ms_va_list";
25812 210935 : return 1;
25813 :
25814 210935 : case 1:
25815 210935 : *ptree = sysv_va_list_type_node;
25816 210935 : *pname = "__builtin_sysv_va_list";
25817 210935 : return 1;
25818 : }
25819 : }
25820 :
25821 : return 0;
25822 : }
25823 :
25824 : #undef TARGET_SCHED_DISPATCH
25825 : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25826 : #undef TARGET_SCHED_DISPATCH_DO
25827 : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25828 : #undef TARGET_SCHED_REASSOCIATION_WIDTH
25829 : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25830 : #undef TARGET_SCHED_REORDER
25831 : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25832 : #undef TARGET_SCHED_ADJUST_PRIORITY
25833 : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25834 : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25835 : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25836 : ix86_dependencies_evaluation_hook
25837 :
25838 :
25839 : /* Implementation of reassociation_width target hook used by
25840 : reassoc phase to identify parallelism level in reassociated
25841 : tree. Statements tree_code is passed in OPC. Arguments type
25842 : is passed in MODE. */
25843 :
25844 : static int
25845 30004 : ix86_reassociation_width (unsigned int op, machine_mode mode)
25846 : {
25847 30004 : int width = 1;
25848 : /* Vector part. */
25849 30004 : if (VECTOR_MODE_P (mode))
25850 : {
25851 8609 : int div = 1;
25852 8609 : if (INTEGRAL_MODE_P (mode))
25853 2692 : width = ix86_cost->reassoc_vec_int;
25854 5917 : else if (FLOAT_MODE_P (mode))
25855 5917 : width = ix86_cost->reassoc_vec_fp;
25856 :
25857 8609 : if (width == 1)
25858 : return 1;
25859 :
25860 : /* Znver1-4 Integer vector instructions execute in FP unit
25861 : and can execute 3 additions and one multiplication per cycle. */
25862 8604 : if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25863 8604 : || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
25864 8604 : || ix86_tune == PROCESSOR_C86_4G_M4
25865 8604 : || ix86_tune == PROCESSOR_C86_4G_M6
25866 8604 : || ix86_tune == PROCESSOR_C86_4G_M7)
25867 2 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25868 : return 1;
25869 : /* Znver5 can do 2 integer multiplications per cycle with latency
25870 : of 3. */
25871 8604 : if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
25872 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25873 8604 : width = 6;
25874 :
25875 : /* Account for targets that splits wide vectors into multiple parts. */
25876 8606 : if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25877 0 : div = GET_MODE_BITSIZE (mode) / 256;
25878 8604 : else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25879 0 : div = GET_MODE_BITSIZE (mode) / 128;
25880 8604 : else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25881 0 : div = GET_MODE_BITSIZE (mode) / 64;
25882 8604 : width = (width + div - 1) / div;
25883 8604 : }
25884 : /* Scalar part. */
25885 : else if (INTEGRAL_MODE_P (mode))
25886 15539 : width = ix86_cost->reassoc_int;
25887 : else if (FLOAT_MODE_P (mode))
25888 5856 : width = ix86_cost->reassoc_fp;
25889 :
25890 : /* Avoid using too many registers in 32bit mode. */
25891 29999 : if (!TARGET_64BIT && width > 2)
25892 30004 : width = 2;
25893 : return width;
25894 : }
25895 :
25896 : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
25897 : place emms and femms instructions. */
25898 :
25899 : static machine_mode
25900 5152875 : ix86_preferred_simd_mode (scalar_mode mode)
25901 : {
25902 5152875 : if (!TARGET_SSE)
25903 859 : return word_mode;
25904 :
25905 5152016 : switch (mode)
25906 : {
25907 415560 : case E_QImode:
25908 415560 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25909 : return V64QImode;
25910 404132 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25911 : return V32QImode;
25912 : else
25913 383966 : return V16QImode;
25914 :
25915 195778 : case E_HImode:
25916 195778 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25917 : return V32HImode;
25918 185308 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25919 : return V16HImode;
25920 : else
25921 169225 : return V8HImode;
25922 :
25923 1522228 : case E_SImode:
25924 1522228 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25925 : return V16SImode;
25926 1454618 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25927 : return V8SImode;
25928 : else
25929 1301455 : return V4SImode;
25930 :
25931 1873147 : case E_DImode:
25932 1873147 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25933 : return V8DImode;
25934 1469054 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25935 : return V4DImode;
25936 : else
25937 1407145 : return V2DImode;
25938 :
25939 142653 : case E_HFmode:
25940 142653 : if (TARGET_AVX512FP16)
25941 : {
25942 141902 : if (TARGET_AVX512VL)
25943 : {
25944 69031 : if (TARGET_PREFER_AVX128)
25945 : return V8HFmode;
25946 68809 : else if (TARGET_PREFER_AVX256)
25947 : return V16HFmode;
25948 : }
25949 139559 : return V32HFmode;
25950 : }
25951 751 : return word_mode;
25952 :
25953 62894 : case E_BFmode:
25954 62894 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25955 : return V32BFmode;
25956 26462 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25957 : return V16BFmode;
25958 : else
25959 13459 : return V8BFmode;
25960 :
25961 612226 : case E_SFmode:
25962 612226 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25963 : return V16SFmode;
25964 412011 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25965 : return V8SFmode;
25966 : else
25967 345636 : return V4SFmode;
25968 :
25969 291906 : case E_DFmode:
25970 291906 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25971 : return V8DFmode;
25972 169991 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25973 : return V4DFmode;
25974 116420 : else if (TARGET_SSE2)
25975 : return V2DFmode;
25976 : /* FALLTHRU */
25977 :
25978 35680 : default:
25979 35680 : return word_mode;
25980 : }
25981 : }
25982 :
25983 : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
25984 : vectors. If AVX512F is enabled then try vectorizing with 512bit,
25985 : 256bit and 128bit vectors. */
25986 :
25987 : static unsigned int
25988 2174521 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25989 : {
25990 2174521 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25991 : {
25992 75244 : modes->safe_push (V64QImode);
25993 75244 : modes->safe_push (V32QImode);
25994 75244 : modes->safe_push (V16QImode);
25995 : }
25996 2099277 : else if (TARGET_AVX512F && all)
25997 : {
25998 558 : modes->safe_push (V32QImode);
25999 558 : modes->safe_push (V16QImode);
26000 558 : modes->safe_push (V64QImode);
26001 : }
26002 2098719 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
26003 : {
26004 28784 : modes->safe_push (V32QImode);
26005 28784 : modes->safe_push (V16QImode);
26006 : }
26007 2069935 : else if (TARGET_AVX && all)
26008 : {
26009 24 : modes->safe_push (V16QImode);
26010 24 : modes->safe_push (V32QImode);
26011 : }
26012 2069911 : else if (TARGET_SSE2)
26013 2067645 : modes->safe_push (V16QImode);
26014 :
26015 2174521 : if (TARGET_MMX_WITH_SSE)
26016 1777961 : modes->safe_push (V8QImode);
26017 :
26018 2174521 : if (TARGET_SSE2)
26019 2172255 : modes->safe_push (V4QImode);
26020 :
26021 2174521 : return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
26022 : }
26023 :
26024 : /* Implementation of targetm.vectorize.get_mask_mode. */
26025 :
26026 : static opt_machine_mode
26027 3366788 : ix86_get_mask_mode (machine_mode data_mode)
26028 : {
26029 3366788 : unsigned vector_size = GET_MODE_SIZE (data_mode);
26030 3366788 : unsigned nunits = GET_MODE_NUNITS (data_mode);
26031 3366788 : unsigned elem_size = vector_size / nunits;
26032 :
26033 : /* Scalar mask case. */
26034 478778 : if ((TARGET_AVX512F && vector_size == 64)
26035 3251628 : || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
26036 : /* AVX512FP16 only supports vector comparison
26037 : to kmask for _Float16. */
26038 3077127 : || (TARGET_AVX512VL && TARGET_AVX512FP16
26039 18335 : && GET_MODE_INNER (data_mode) == E_HFmode)
26040 6448587 : || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
26041 : {
26042 292685 : if (elem_size == 4
26043 292685 : || elem_size == 8
26044 135065 : || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
26045 262229 : return smallest_int_mode_for_size (nunits).require ();
26046 : }
26047 :
26048 3104559 : scalar_int_mode elem_mode
26049 3104559 : = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
26050 :
26051 3104559 : gcc_assert (elem_size * nunits == vector_size);
26052 :
26053 3104559 : return mode_for_vector (elem_mode, nunits);
26054 : }
26055 :
26056 :
26057 :
26058 : /* Return class of registers which could be used for pseudo of MODE
26059 : and of class RCLASS for spilling instead of memory. Return NO_REGS
26060 : if it is not possible or non-profitable. */
26061 :
26062 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26063 :
26064 : static reg_class_t
26065 6275391949 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
26066 : {
26067 6275391949 : if (0 && TARGET_GENERAL_REGS_SSE_SPILL
26068 : && TARGET_SSE2
26069 : && TARGET_INTER_UNIT_MOVES_TO_VEC
26070 : && TARGET_INTER_UNIT_MOVES_FROM_VEC
26071 : && (mode == SImode || (TARGET_64BIT && mode == DImode))
26072 : && INTEGER_CLASS_P (rclass))
26073 : return ALL_SSE_REGS;
26074 6275391949 : return NO_REGS;
26075 : }
26076 :
26077 : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
26078 : but returns a lower bound. */
26079 :
26080 : static unsigned int
26081 1826598 : ix86_max_noce_ifcvt_seq_cost (edge e)
26082 : {
26083 1826598 : bool predictable_p = predictable_edge_p (e);
26084 1826598 : if (predictable_p)
26085 : {
26086 144823 : if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
26087 8 : return param_max_rtl_if_conversion_predictable_cost;
26088 : }
26089 : else
26090 : {
26091 1681775 : if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
26092 73 : return param_max_rtl_if_conversion_unpredictable_cost;
26093 : }
26094 :
26095 : /* For modern machines with deeper pipeline, the penalty for branch
26096 : misprediction could be higher than before to reset the pipeline
26097 : slots. Add parameter br_mispredict_scale as a factor to describe
26098 : the impact of resetting the pipeline. */
26099 :
26100 1826517 : return BRANCH_COST (true, predictable_p)
26101 1826517 : * ix86_tune_cost->br_mispredict_scale;
26102 : }
26103 :
26104 : /* Return true if SEQ is a good candidate as a replacement for the
26105 : if-convertible sequence described in IF_INFO. */
26106 :
26107 : static bool
26108 201790 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
26109 : {
26110 201790 : if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
26111 : {
26112 : int cmov_cnt = 0;
26113 : /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
26114 : Maybe we should allow even more conditional moves as long as they
26115 : are used far enough not to stall the CPU, or also consider
26116 : IF_INFO->TEST_BB succ edge probabilities. */
26117 238 : for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
26118 : {
26119 196 : rtx set = single_set (insn);
26120 196 : if (!set)
26121 0 : continue;
26122 196 : if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
26123 154 : continue;
26124 42 : rtx src = SET_SRC (set);
26125 42 : machine_mode mode = GET_MODE (src);
26126 42 : if (GET_MODE_CLASS (mode) != MODE_INT
26127 0 : && GET_MODE_CLASS (mode) != MODE_FLOAT)
26128 0 : continue;
26129 42 : if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
26130 41 : || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
26131 1 : continue;
26132 : /* insn is CMOV or FCMOV. */
26133 41 : if (++cmov_cnt > 1)
26134 : return false;
26135 : }
26136 : }
26137 :
26138 : /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
26139 : for movdfcc/movsfcc, and could possibly fail cost comparison.
26140 : Increase branch cost will hurt performance for other modes, so
26141 : specially add some preference for floating point ifcvt. */
26142 201782 : if (!TARGET_SSE4_1 && if_info->x
26143 157824 : && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
26144 34079 : && if_info->speed_p)
26145 : {
26146 27058 : unsigned cost = seq_cost (seq, true);
26147 :
26148 27058 : if (cost <= if_info->original_cost)
26149 : return true;
26150 :
26151 25872 : return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
26152 : }
26153 :
26154 174724 : return default_noce_conversion_profitable_p (seq, if_info);
26155 : }
26156 :
26157 : /* x86-specific vector costs. */
26158 : class ix86_vector_costs : public vector_costs
26159 : {
26160 : public:
26161 : ix86_vector_costs (vec_info *, bool);
26162 :
26163 : unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
26164 : stmt_vec_info stmt_info, slp_tree node,
26165 : tree vectype, int misalign,
26166 : vect_cost_model_location where) override;
26167 : void finish_cost (const vector_costs *) override;
26168 : bool better_main_loop_than_p (const vector_costs *) const override;
26169 : bool better_epilogue_loop_than_p (const vector_costs *other,
26170 : loop_vec_info main_loop) const;
26171 :
26172 : private:
26173 :
26174 : /* Estimate register pressure of the vectorized code. */
26175 : void ix86_vect_estimate_reg_pressure ();
26176 : /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
26177 : estimation of register pressure.
26178 : ??? Currently it's only used by vec_construct/scalar_to_vec
26179 : where we know it's not loaded from memory. */
26180 : unsigned m_num_gpr_needed[3];
26181 : unsigned m_num_sse_needed[3];
26182 : /* Number of 256-bit vector permutation. */
26183 : unsigned m_num_avx256_vec_perm[3];
26184 : /* Number of 512-bit vector permutation. */
26185 : unsigned m_num_avx512_vec_perm[3];
26186 : /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
26187 : unsigned m_num_reduc[X86_REDUC_LAST];
26188 : /* Don't do unroll if m_prefer_unroll is false, default is true. */
26189 : bool m_prefer_unroll;
26190 : };
26191 :
26192 2095344 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
26193 : : vector_costs (vinfo, costing_for_scalar),
26194 2095344 : m_num_gpr_needed (),
26195 2095344 : m_num_sse_needed (),
26196 2095344 : m_num_avx256_vec_perm (),
26197 2095344 : m_num_avx512_vec_perm (),
26198 2095344 : m_num_reduc (),
26199 2095344 : m_prefer_unroll (true)
26200 2095344 : {}
26201 :
26202 : /* Implement targetm.vectorize.create_costs. */
26203 :
26204 : static vector_costs *
26205 2095344 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
26206 : {
26207 2095344 : return new ix86_vector_costs (vinfo, costing_for_scalar);
26208 : }
26209 :
26210 : /* Return true if a vec_perm should be counted as a cross-lane vector
26211 : permutation for a vector with NUNITS elements. */
26212 : static bool
26213 5344 : ix86_count_cross_lane_perm_p (vec_info *vinfo, slp_tree node, unsigned nunits)
26214 : {
26215 : /* TODO: For loop vectorization with no SLP load-permutation
26216 : information, conservatively treat these perms as cross-lane.
26217 : Repeated-index cases such as {0, 0, 0, 0} are emitted as
26218 : separate vec_perm_exprs for each index, so we cannot reliably
26219 : separate false positives from real cross-lane shuffles yet. */
26220 5344 : if (!node
26221 5339 : || !SLP_TREE_LOAD_PERMUTATION (node).exists ()
26222 9624 : || !is_a<bb_vec_info> (vinfo))
26223 : return true;
26224 :
26225 41 : unsigned half = nunits / 2;
26226 41 : bool allsame = true;
26227 41 : unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
26228 41 : bool cross_lane_p = false;
26229 :
26230 217 : for (unsigned i = 0; i != SLP_TREE_LANES (node); i++)
26231 : {
26232 215 : unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
26233 : /* allsame is just a broadcast. */
26234 215 : if (tmp != first)
26235 106 : allsame = false;
26236 :
26237 : /* The load permutation can cover multiple vectors, so compare
26238 : source and destination lanes modulo NUNITS. */
26239 215 : tmp = tmp & (nunits - 1);
26240 215 : unsigned index = i & (nunits - 1);
26241 215 : if ((index < half && tmp >= half) || (index >= half && tmp < half))
26242 67 : cross_lane_p = true;
26243 :
26244 215 : if (!allsame && cross_lane_p)
26245 : return true;
26246 : }
26247 :
26248 : return false;
26249 : }
26250 :
26251 : unsigned
26252 7470412 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
26253 : stmt_vec_info stmt_info, slp_tree node,
26254 : tree vectype, int,
26255 : vect_cost_model_location where)
26256 : {
26257 7470412 : unsigned retval = 0;
26258 7470412 : bool scalar_p
26259 : = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
26260 7470412 : int stmt_cost = - 1;
26261 :
26262 7470412 : bool fp = false;
26263 7470412 : machine_mode mode = scalar_p ? SImode : TImode;
26264 :
26265 7470412 : if (vectype != NULL)
26266 : {
26267 3358614 : fp = FLOAT_TYPE_P (vectype);
26268 3358614 : mode = TYPE_MODE (vectype);
26269 3358614 : if (scalar_p)
26270 283098 : mode = TYPE_MODE (TREE_TYPE (vectype));
26271 : }
26272 : /* When we are costing a scalar stmt use the scalar stmt to get at the
26273 : type of the operation. */
26274 4111798 : else if (scalar_p && stmt_info)
26275 4028581 : if (tree lhs = gimple_get_lhs (stmt_info->stmt))
26276 : {
26277 3853104 : fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
26278 3853104 : mode = TYPE_MODE (TREE_TYPE (lhs));
26279 : }
26280 :
26281 7470412 : if ((kind == vector_stmt || kind == scalar_stmt)
26282 1965246 : && stmt_info
26283 9426429 : && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
26284 : {
26285 1574566 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26286 : /*machine_mode inner_mode = mode;
26287 : if (VECTOR_MODE_P (mode))
26288 : inner_mode = GET_MODE_INNER (mode);*/
26289 :
26290 1574566 : switch (subcode)
26291 : {
26292 614485 : case PLUS_EXPR:
26293 614485 : case POINTER_PLUS_EXPR:
26294 614485 : case MINUS_EXPR:
26295 614485 : if (kind == scalar_stmt)
26296 : {
26297 388732 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26298 96728 : stmt_cost = ix86_cost->addss;
26299 292004 : else if (X87_FLOAT_MODE_P (mode))
26300 132 : stmt_cost = ix86_cost->fadd;
26301 : else
26302 291872 : stmt_cost = ix86_cost->add;
26303 : }
26304 : else
26305 225753 : stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
26306 : : ix86_cost->sse_op);
26307 : break;
26308 :
26309 252196 : case MULT_EXPR:
26310 : /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
26311 : take it as MULT_EXPR. */
26312 252196 : case MULT_HIGHPART_EXPR:
26313 252196 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26314 252196 : break;
26315 : /* There's no direct instruction for WIDEN_MULT_EXPR,
26316 : take emulation into account. */
26317 1074 : case WIDEN_MULT_EXPR:
26318 2148 : stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
26319 1074 : TYPE_UNSIGNED (vectype));
26320 1074 : break;
26321 :
26322 10671 : case NEGATE_EXPR:
26323 10671 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26324 3548 : stmt_cost = ix86_cost->sse_op;
26325 7123 : else if (X87_FLOAT_MODE_P (mode))
26326 0 : stmt_cost = ix86_cost->fchs;
26327 7123 : else if (VECTOR_MODE_P (mode))
26328 3625 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26329 : else
26330 3498 : stmt_cost = ix86_cost->add;
26331 : break;
26332 14091 : case TRUNC_DIV_EXPR:
26333 14091 : case CEIL_DIV_EXPR:
26334 14091 : case FLOOR_DIV_EXPR:
26335 14091 : case ROUND_DIV_EXPR:
26336 14091 : case TRUNC_MOD_EXPR:
26337 14091 : case CEIL_MOD_EXPR:
26338 14091 : case FLOOR_MOD_EXPR:
26339 14091 : case RDIV_EXPR:
26340 14091 : case ROUND_MOD_EXPR:
26341 14091 : case EXACT_DIV_EXPR:
26342 14091 : stmt_cost = ix86_division_cost (ix86_cost, mode);
26343 14091 : break;
26344 :
26345 72348 : case RSHIFT_EXPR:
26346 72348 : case LSHIFT_EXPR:
26347 72348 : case LROTATE_EXPR:
26348 72348 : case RROTATE_EXPR:
26349 72348 : {
26350 72348 : tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
26351 72348 : tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
26352 72348 : stmt_cost = ix86_shift_rotate_cost
26353 72348 : (ix86_cost,
26354 : (subcode == RSHIFT_EXPR
26355 37580 : && !TYPE_UNSIGNED (TREE_TYPE (op1)))
26356 : ? ASHIFTRT : LSHIFTRT, mode,
26357 72348 : TREE_CODE (op2) == INTEGER_CST,
26358 72348 : cst_and_fits_in_hwi (op2)
26359 41289 : ? int_cst_value (op2) : -1,
26360 : false, false, NULL, NULL);
26361 : }
26362 72348 : break;
26363 98399 : case NOP_EXPR:
26364 : /* Only sign-conversions are free. */
26365 98399 : if (tree_nop_conversion_p
26366 98399 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
26367 98399 : TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
26368 : stmt_cost = 0;
26369 98399 : else if (fp)
26370 10240 : stmt_cost = vec_fp_conversion_cost
26371 10240 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26372 : break;
26373 :
26374 23176 : case FLOAT_EXPR:
26375 23176 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26376 17403 : stmt_cost = ix86_cost->cvtsi2ss;
26377 5773 : else if (X87_FLOAT_MODE_P (mode))
26378 : /* TODO: We do not have cost tables for x87. */
26379 50 : stmt_cost = ix86_cost->fadd;
26380 : else
26381 5723 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26382 : break;
26383 :
26384 2203 : case FIX_TRUNC_EXPR:
26385 2203 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26386 0 : stmt_cost = ix86_cost->cvtss2si;
26387 2203 : else if (X87_FLOAT_MODE_P (mode))
26388 : /* TODO: We do not have cost tables for x87. */
26389 0 : stmt_cost = ix86_cost->fadd;
26390 : else
26391 2203 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26392 : break;
26393 :
26394 56133 : case COND_EXPR:
26395 56133 : {
26396 : /* SSE2 conditinal move sequence is:
26397 : pcmpgtd %xmm5, %xmm0 (accounted separately)
26398 : pand %xmm0, %xmm2
26399 : pandn %xmm1, %xmm0
26400 : por %xmm2, %xmm0
26401 : while SSE4 uses cmp + blend
26402 : and AVX512 masked moves.
26403 :
26404 : The condition is accounted separately since we usually have
26405 : p = a < b
26406 : c = p ? x : y
26407 : and we will account first statement as setcc. Exception is when
26408 : p is loaded from memory as bool and then we will not account
26409 : the compare, but there is no way to check for this. */
26410 :
26411 56133 : int ninsns = TARGET_SSE4_1 ? 1 : 3;
26412 :
26413 : /* If one of parameters is 0 or -1 the sequence will be simplified:
26414 : (if_true & mask) | (if_false & ~mask) -> if_true & mask */
26415 23466 : if (ninsns > 1
26416 23466 : && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26417 23112 : || zerop (gimple_assign_rhs3 (stmt_info->stmt))
26418 13209 : || integer_minus_onep
26419 13209 : (gimple_assign_rhs2 (stmt_info->stmt))
26420 12770 : || integer_minus_onep
26421 12770 : (gimple_assign_rhs3 (stmt_info->stmt))))
26422 : ninsns = 1;
26423 :
26424 56133 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26425 5044 : stmt_cost = ninsns * ix86_cost->sse_op;
26426 51089 : else if (X87_FLOAT_MODE_P (mode))
26427 : /* x87 requires conditional branch. We don't have cost for
26428 : that. */
26429 : ;
26430 51080 : else if (VECTOR_MODE_P (mode))
26431 21259 : stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
26432 : else
26433 : /* compare (accounted separately) + cmov. */
26434 29821 : stmt_cost = ix86_cost->add;
26435 : }
26436 : break;
26437 :
26438 26729 : case MIN_EXPR:
26439 26729 : case MAX_EXPR:
26440 26729 : if (fp)
26441 : {
26442 1474 : if (X87_FLOAT_MODE_P (mode)
26443 512 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26444 : /* x87 requires conditional branch. We don't have cost for
26445 : that. */
26446 : ;
26447 : else
26448 : /* minss */
26449 1474 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26450 : }
26451 : else
26452 : {
26453 25255 : if (VECTOR_MODE_P (mode))
26454 : {
26455 5151 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26456 : /* vpmin was introduced in SSE3.
26457 : SSE2 needs pcmpgtd + pand + pandn + pxor.
26458 : If one of parameters is 0 or -1 the sequence is simplified
26459 : to pcmpgtd + pand. */
26460 5151 : if (!TARGET_SSSE3)
26461 : {
26462 3194 : if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26463 4617 : || integer_minus_onep
26464 1423 : (gimple_assign_rhs2 (stmt_info->stmt)))
26465 1771 : stmt_cost *= 2;
26466 : else
26467 1423 : stmt_cost *= 4;
26468 : }
26469 : }
26470 : else
26471 : /* cmp + cmov. */
26472 20104 : stmt_cost = ix86_cost->add * 2;
26473 : }
26474 : break;
26475 :
26476 1313 : case ABS_EXPR:
26477 1313 : case ABSU_EXPR:
26478 1313 : if (fp)
26479 : {
26480 471 : if (X87_FLOAT_MODE_P (mode)
26481 171 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26482 : /* fabs. */
26483 0 : stmt_cost = ix86_cost->fabs;
26484 : else
26485 : /* andss of sign bit. */
26486 471 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26487 : }
26488 : else
26489 : {
26490 842 : if (VECTOR_MODE_P (mode))
26491 : {
26492 108 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26493 : /* vabs was introduced in SSE3.
26494 : SSE3 uses psrat + pxor + psub. */
26495 108 : if (!TARGET_SSSE3)
26496 78 : stmt_cost *= 3;
26497 : }
26498 : else
26499 : /* neg + cmov. */
26500 734 : stmt_cost = ix86_cost->add * 2;
26501 : }
26502 : break;
26503 :
26504 148184 : case BIT_IOR_EXPR:
26505 148184 : case BIT_XOR_EXPR:
26506 148184 : case BIT_AND_EXPR:
26507 148184 : case BIT_NOT_EXPR:
26508 148184 : gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
26509 : && !X87_FLOAT_MODE_P (mode));
26510 148184 : if (VECTOR_MODE_P (mode))
26511 50712 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26512 : else
26513 97472 : stmt_cost = ix86_cost->add;
26514 : break;
26515 :
26516 253564 : default:
26517 253564 : if (truth_value_p (subcode))
26518 : {
26519 99008 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26520 : /* CMPccS? insructions are cheap, so use sse_op. While they
26521 : produce a mask which may need to be turned to 0/1 by and,
26522 : expect that this will be optimized away in a common case. */
26523 0 : stmt_cost = ix86_cost->sse_op;
26524 99008 : else if (X87_FLOAT_MODE_P (mode))
26525 : /* fcmp + setcc. */
26526 0 : stmt_cost = ix86_cost->fadd + ix86_cost->add;
26527 99008 : else if (VECTOR_MODE_P (mode))
26528 20623 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26529 : else
26530 : /* setcc. */
26531 78385 : stmt_cost = ix86_cost->add;
26532 : break;
26533 : }
26534 : break;
26535 : }
26536 : }
26537 :
26538 : /* Record number of load/store/gather/scatter in vectorized body. */
26539 7470412 : if (where == vect_body && !m_costing_for_scalar)
26540 : {
26541 1938505 : int scale = 1;
26542 1938505 : if (vectype
26543 3868540 : && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
26544 59915 : && TARGET_AVX512_SPLIT_REGS)
26545 3859772 : || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26546 118330 : && TARGET_AVX256_SPLIT_REGS)))
26547 : scale = 2;
26548 :
26549 1938505 : switch (kind)
26550 : {
26551 : /* Emulated gather/scatter or any scalarization. */
26552 111519 : case scalar_load:
26553 111519 : case scalar_stmt:
26554 111519 : case scalar_store:
26555 111519 : case vector_gather_load:
26556 111519 : case vector_scatter_store:
26557 111519 : m_prefer_unroll = false;
26558 111519 : break;
26559 :
26560 557814 : case vector_stmt:
26561 557814 : case vec_to_scalar:
26562 : /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
26563 : unroll in the vectorizer will enable partial sum. */
26564 557814 : if (stmt_info
26565 557788 : && vect_is_reduction (stmt_info)
26566 626992 : && stmt_info->stmt)
26567 : {
26568 : /* Handle __builtin_fma. */
26569 69178 : if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
26570 : {
26571 11 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26572 11 : break;
26573 : }
26574 :
26575 69167 : if (!is_gimple_assign (stmt_info->stmt))
26576 : break;
26577 :
26578 66401 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26579 66401 : machine_mode inner_mode = GET_MODE_INNER (mode);
26580 66401 : tree rhs1, rhs2;
26581 66401 : bool native_vnni_p = true;
26582 66401 : gimple* def;
26583 66401 : machine_mode mode_rhs;
26584 66401 : switch (subcode)
26585 : {
26586 49730 : case PLUS_EXPR:
26587 49730 : case MINUS_EXPR:
26588 49730 : if (!fp || !flag_associative_math
26589 26417 : || flag_fp_contract_mode != FP_CONTRACT_FAST)
26590 : break;
26591 :
26592 : /* FMA condition for different modes. */
26593 26417 : if (((inner_mode == DFmode || inner_mode == SFmode)
26594 26387 : && !TARGET_FMA && !TARGET_AVX512VL)
26595 8624 : || (inner_mode == HFmode && !TARGET_AVX512FP16)
26596 8624 : || (inner_mode == BFmode && !TARGET_AVX10_2))
26597 : break;
26598 :
26599 : /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
26600 : to FMA/FNMA after vectorization. */
26601 8624 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26602 8624 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26603 8624 : if (subcode == PLUS_EXPR
26604 6771 : && TREE_CODE (rhs1) == SSA_NAME
26605 6771 : && (def = SSA_NAME_DEF_STMT (rhs1), true)
26606 6771 : && is_gimple_assign (def)
26607 12029 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26608 1992 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26609 6632 : else if (TREE_CODE (rhs2) == SSA_NAME
26610 6632 : && (def = SSA_NAME_DEF_STMT (rhs2), true)
26611 6632 : && is_gimple_assign (def)
26612 13175 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26613 6537 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26614 : break;
26615 :
26616 : /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
26617 : WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
26618 : SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
26619 610 : case DOT_PROD_EXPR:
26620 610 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26621 610 : mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
26622 610 : if (mode_rhs == QImode)
26623 : {
26624 337 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26625 337 : signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
26626 337 : signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
26627 :
26628 : /* vpdpbusd. */
26629 337 : if (signop1_p != signop2_p)
26630 85 : native_vnni_p
26631 85 : = (GET_MODE_SIZE (mode) == 64
26632 85 : ? TARGET_AVX512VNNI
26633 28 : : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
26634 85 : || TARGET_AVXVNNI));
26635 : else
26636 : /* vpdpbssd. */
26637 252 : native_vnni_p
26638 268 : = (GET_MODE_SIZE (mode) == 64
26639 252 : ? TARGET_AVX10_2
26640 236 : : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
26641 : }
26642 610 : m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
26643 :
26644 : /* Dislike to do unroll and partial sum for
26645 : emulated DOT_PROD_EXPR. */
26646 610 : if (!native_vnni_p)
26647 153 : m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
26648 : break;
26649 :
26650 106 : case SAD_EXPR:
26651 106 : m_num_reduc[X86_REDUC_SAD] += count * scale;
26652 106 : break;
26653 :
26654 : default:
26655 : break;
26656 : }
26657 : }
26658 :
26659 : default:
26660 : break;
26661 : }
26662 : }
26663 :
26664 :
26665 7470412 : combined_fn cfn;
26666 7470412 : if ((kind == vector_stmt || kind == scalar_stmt)
26667 1965246 : && stmt_info
26668 1956017 : && stmt_info->stmt
26669 9426429 : && is_gimple_call (stmt_info->stmt))
26670 : {
26671 26426 : tree fndecl = gimple_call_fndecl (stmt_info->stmt);
26672 26426 : cgraph_node *node;
26673 26426 : if ((fndecl
26674 5376 : && (node = cgraph_node::get (fndecl))
26675 5343 : && node->simd_clones)
26676 30785 : || gimple_call_internal_p (stmt_info->stmt, IFN_MASK_CALL))
26677 2484 : stmt_cost = 10 * ix86_vec_cost (mode,
26678 1242 : mode == SFmode ? ix86_cost->fmass
26679 : : ix86_cost->fmasd);
26680 25184 : else if ((cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
26681 23814 : switch (cfn)
26682 : {
26683 107 : case CFN_FMA:
26684 107 : stmt_cost = ix86_vec_cost (mode,
26685 107 : mode == SFmode ? ix86_cost->fmass
26686 : : ix86_cost->fmasd);
26687 107 : break;
26688 62 : case CFN_MULH:
26689 62 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26690 62 : break;
26691 : default:
26692 : break;
26693 : }
26694 : }
26695 :
26696 7470412 : if (kind == vec_promote_demote)
26697 : {
26698 61462 : int outer_size
26699 : = tree_to_uhwi
26700 61462 : (TYPE_SIZE
26701 61462 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
26702 61462 : int inner_size
26703 : = tree_to_uhwi
26704 61462 : (TYPE_SIZE
26705 61462 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
26706 61462 : bool inner_fp = FLOAT_TYPE_P
26707 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
26708 :
26709 5582 : if (fp && inner_fp)
26710 5077 : stmt_cost = vec_fp_conversion_cost
26711 5077 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26712 56385 : else if (fp && !inner_fp)
26713 6125 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26714 50260 : else if (!fp && inner_fp)
26715 505 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26716 : else
26717 49755 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26718 : /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
26719 : greater than inner size we will end up doing two conversions and
26720 : packing them. We always pack pairs; if the size difference is greater
26721 : it is split into multiple demote operations. */
26722 61462 : if (inner_size > outer_size)
26723 23313 : stmt_cost = stmt_cost * 2
26724 23313 : + ix86_vec_cost (mode, ix86_cost->sse_op);
26725 : }
26726 :
26727 : /* If we do elementwise loads into a vector then we are bound by
26728 : latency and execution resources for the many scalar loads
26729 : (AGU and load ports). Try to account for this by scaling the
26730 : construction cost by the number of elements involved. */
26731 7470412 : if ((kind == vec_construct || kind == vec_deconstruct)
26732 7470412 : && ((node
26733 306407 : && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26734 317628 : || SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP)
26735 42502 : && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26736 : (SLP_TREE_REPRESENTATIVE (node))))
26737 : != INTEGER_CST))
26738 22728 : || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
26739 : {
26740 32466 : auto lsdata = static_cast<vect_load_store_data *> (node->data);
26741 32466 : tree ls_type = lsdata->ls_type ? lsdata->ls_type : vectype;
26742 32466 : tree ls_eltype
26743 32466 : = lsdata->ls_eltype ? lsdata->ls_eltype : TREE_TYPE (ls_type);
26744 32466 : stmt_cost = ix86_vector_cd_cost (TYPE_MODE (ls_type),
26745 32466 : TYPE_MODE (ls_eltype));
26746 32466 : stmt_cost *= (GET_MODE_BITSIZE (TYPE_MODE (ls_type))
26747 64932 : / GET_MODE_BITSIZE (TYPE_MODE (ls_eltype)) + 1);
26748 : }
26749 7437946 : else if ((kind == vec_construct || kind == scalar_to_vec)
26750 481652 : && node
26751 450295 : && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26752 : {
26753 307651 : stmt_cost = ix86_default_vector_cost (kind, mode);
26754 307651 : unsigned i;
26755 307651 : tree op;
26756 1311420 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26757 696118 : if (TREE_CODE (op) == SSA_NAME)
26758 473305 : TREE_VISITED (op) = 0;
26759 1003769 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26760 : {
26761 696118 : if (TREE_CODE (op) != SSA_NAME
26762 473305 : || TREE_VISITED (op))
26763 256915 : continue;
26764 439203 : TREE_VISITED (op) = 1;
26765 439203 : gimple *def = SSA_NAME_DEF_STMT (op);
26766 439203 : tree tem;
26767 : /* Look through a conversion. */
26768 439203 : if (is_gimple_assign (def)
26769 250382 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26770 28508 : && ((tem = gimple_assign_rhs1 (def)), true)
26771 467711 : && TREE_CODE (tem) == SSA_NAME)
26772 28297 : def = SSA_NAME_DEF_STMT (tem);
26773 : /* When the component is loaded from memory without sign-
26774 : or zero-extension we can move it to a vector register and/or
26775 : insert it via vpinsr with a memory operand. */
26776 439203 : if (gimple_assign_load_p (def)
26777 131682 : && tree_nop_conversion_p (TREE_TYPE (op),
26778 131682 : TREE_TYPE (gimple_assign_lhs (def)))
26779 696287 : && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
26780 5584 : || TARGET_SSE4_1))
26781 : ;
26782 : /* When the component is extracted from a vector it is already
26783 : in a vector register. */
26784 315224 : else if (is_gimple_assign (def)
26785 121518 : && gimple_assign_rhs_code (def) == BIT_FIELD_REF
26786 317874 : && VECTOR_TYPE_P (TREE_TYPE
26787 : (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
26788 : ;
26789 : else
26790 : {
26791 312997 : if (fp)
26792 : {
26793 : /* Scalar FP values residing in x87 registers need to be
26794 : spilled and reloaded. */
26795 13822 : auto mode2 = TYPE_MODE (TREE_TYPE (op));
26796 13822 : if (IS_STACK_MODE (mode2))
26797 : {
26798 967 : int cost
26799 : = (ix86_cost->hard_register.fp_store[mode2 == SFmode
26800 967 : ? 0 : 1]
26801 967 : + ix86_cost->sse_load[sse_store_index (mode2)]);
26802 967 : stmt_cost += COSTS_N_INSNS (cost) / 2;
26803 : }
26804 13822 : m_num_sse_needed[where]++;
26805 : }
26806 : else
26807 : {
26808 299175 : m_num_gpr_needed[where]++;
26809 :
26810 299175 : stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26811 : }
26812 : }
26813 : }
26814 1003769 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26815 696118 : if (TREE_CODE (op) == SSA_NAME)
26816 473305 : TREE_VISITED (op) = 0;
26817 : }
26818 7470412 : if (stmt_cost == -1)
26819 5735580 : stmt_cost = ix86_default_vector_cost (kind, mode);
26820 :
26821 : /* BIT_FIELD_REF <vect_**, 64, 0> with count 0 costs 0 in body. */
26822 7470412 : if (kind == vec_perm && vectype && count != 0)
26823 : {
26824 99274 : unsigned vec_size = GET_MODE_SIZE (TYPE_MODE (vectype));
26825 99274 : unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
26826 99274 : unsigned *num_vec_perm = NULL;
26827 :
26828 99274 : if (vec_size == 32)
26829 4148 : num_vec_perm = m_num_avx256_vec_perm;
26830 95126 : else if (vec_size == 64)
26831 1196 : num_vec_perm = m_num_avx512_vec_perm;
26832 :
26833 5344 : if (num_vec_perm && ix86_count_cross_lane_perm_p (m_vinfo, node, nunits))
26834 : {
26835 5342 : num_vec_perm[where] += count;
26836 5342 : if (dump_file && (dump_flags & TDF_DETAILS))
26837 : {
26838 358 : fprintf (dump_file,
26839 : "Detected avx%u cross-lane permutation: ", vec_size * 8);
26840 358 : if (stmt_info)
26841 355 : print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
26842 358 : fprintf (dump_file, " \n");
26843 : }
26844 : }
26845 : }
26846 :
26847 : /* Penalize DFmode vector operations for Bonnell. */
26848 7470412 : if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26849 7470495 : && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26850 12 : stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26851 :
26852 : /* Statements in an inner loop relative to the loop being
26853 : vectorized are weighted more heavily. The value here is
26854 : arbitrary and could potentially be improved with analysis. */
26855 7470412 : retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26856 :
26857 : /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26858 : for Silvermont as it has out of order integer pipeline and can execute
26859 : 2 scalar instruction per tick, but has in order SIMD pipeline. */
26860 7470412 : if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26861 7470412 : || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26862 2458 : && stmt_info && stmt_info->stmt)
26863 : {
26864 2114 : tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26865 2114 : if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26866 1585 : retval = (retval * 17) / 10;
26867 : }
26868 :
26869 7470412 : m_costs[where] += retval;
26870 :
26871 7470412 : return retval;
26872 : }
26873 :
26874 : void
26875 1807703 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26876 : {
26877 1807703 : unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26878 1807703 : unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26879 :
26880 : /* Any better way to have target available fp registers, currently use SSE_REGS. */
26881 1807703 : unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26882 7230812 : for (unsigned i = 0; i != 3; i++)
26883 : {
26884 5423109 : if (m_num_gpr_needed[i] > target_avail_regs)
26885 694 : m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26886 : /* Only measure sse registers pressure. */
26887 5423109 : if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26888 94 : m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26889 : }
26890 1807703 : }
26891 :
26892 : void
26893 1807703 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26894 : {
26895 1807703 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
26896 488301 : if (loop_vinfo && !m_costing_for_scalar)
26897 : {
26898 : /* We are currently not asking the vectorizer to compare costs
26899 : between different vector mode sizes. When using predication
26900 : that will end up always choosing the preferred mode size even
26901 : if there's a smaller mode covering all lanes. Test for this
26902 : situation and artificially reject the larger mode attempt.
26903 : ??? We currently lack masked ops for sub-SSE sized modes,
26904 : so we could restrict this rejection to AVX and AVX512 modes
26905 : but error on the safe side for now. */
26906 124685 : if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26907 26 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26908 16 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26909 124695 : && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26910 20 : > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26911 8 : m_costs[vect_body] = INT_MAX;
26912 :
26913 : /* We'd like to avoid using masking if there's an in-order reduction
26914 : to vectorize because that will also perform in-order adds of
26915 : masked elements (as neutral value, of course) here, but there
26916 : is currently no way to indicate to try un-masked with the same
26917 : mode. */
26918 :
26919 124685 : bool any_reduc_p = false;
26920 495285 : for (int i = 0; i != X86_REDUC_LAST; i++)
26921 371895 : if (m_num_reduc[i])
26922 : {
26923 : any_reduc_p = true;
26924 : break;
26925 : }
26926 :
26927 124685 : if (any_reduc_p
26928 : /* Not much gain for loop with gather and scatter. */
26929 1295 : && m_prefer_unroll
26930 1138 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
26931 : {
26932 1814 : unsigned unroll_factor
26933 907 : = OPTION_SET_P (ix86_vect_unroll_limit)
26934 907 : ? ix86_vect_unroll_limit
26935 907 : : ix86_cost->vect_unroll_limit;
26936 :
26937 907 : if (unroll_factor > 1)
26938 : {
26939 3628 : for (int i = 0 ; i != X86_REDUC_LAST; i++)
26940 : {
26941 2721 : if (m_num_reduc[i])
26942 : {
26943 907 : unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
26944 : m_num_reduc[i]);
26945 2721 : unroll_factor = MIN (unroll_factor, tmp);
26946 : }
26947 : }
26948 :
26949 1814 : m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
26950 : }
26951 : }
26952 :
26953 : }
26954 :
26955 1807703 : ix86_vect_estimate_reg_pressure ();
26956 :
26957 7230812 : for (int i = 0; i != 3; i++)
26958 5423109 : if (m_num_avx256_vec_perm[i]
26959 521 : && TARGET_AVX256_AVOID_VEC_PERM)
26960 7 : m_costs[i] = INT_MAX;
26961 :
26962 7230812 : for (int i = 0; i != 3; i++)
26963 5423109 : if (m_num_avx512_vec_perm[i] && TARGET_AVX512_AVOID_VEC_PERM)
26964 5 : m_costs[i] = INT_MAX;
26965 :
26966 : /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26967 : a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26968 1807703 : if (loop_vinfo
26969 488301 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26970 43348 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26971 1808459 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26972 23 : m_suggested_epilogue_mode = V16QImode;
26973 : /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26974 : enable a 64bit SSE epilogue. */
26975 1807703 : if (loop_vinfo
26976 488301 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26977 43348 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26978 1810225 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26979 96 : m_suggested_epilogue_mode = V8QImode;
26980 :
26981 : /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
26982 : a masked epilogue if that doesn't seem detrimental. */
26983 1807703 : if (loop_vinfo
26984 488301 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26985 466627 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
26986 : /* Avoid a masked epilog if cascaded epilogues eventually get us
26987 : to one with VF 1 as that means no scalar epilog at all. */
26988 75940 : && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
26989 75940 : / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
26990 34 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26991 75939 : && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
26992 1807882 : && !OPTION_SET_P (param_vect_partial_vector_usage))
26993 : {
26994 161 : bool avoid = false;
26995 161 : if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26996 129 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
26997 : {
26998 129 : unsigned int peel_niter
26999 : = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
27000 129 : if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
27001 0 : peel_niter += 1;
27002 : /* When we know the number of scalar iterations of the epilogue,
27003 : avoid masking when a single vector epilog iteration handles
27004 : it in full. */
27005 129 : if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
27006 129 : % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
27007 : avoid = true;
27008 : }
27009 159 : if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
27010 14 : for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
27011 : {
27012 4 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
27013 : ;
27014 4 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
27015 : ;
27016 : else
27017 : {
27018 2 : int loop_depth
27019 4 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
27020 2 : DDR_LOOP_NEST (ddr));
27021 4 : if (DDR_NUM_DIST_VECTS (ddr) == 1
27022 2 : && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
27023 : {
27024 : /* Avoid the case when there's an outer loop that might
27025 : traverse a multi-dimensional array with the inner
27026 : loop just executing the masked epilogue with a
27027 : read-write where the next outer iteration might
27028 : read from the masked part of the previous write,
27029 : 'n' filling half a vector.
27030 : for (j = 0; j < m; ++j)
27031 : for (i = 0; i < n; ++i)
27032 : a[j][i] = c * a[j][i]; */
27033 : avoid = true;
27034 : break;
27035 : }
27036 : }
27037 : }
27038 : /* Avoid using masking if there's an in-order reduction
27039 : to vectorize because that will also perform in-order adds of
27040 : masked elements (as neutral value, of course). */
27041 161 : if (!avoid)
27042 : {
27043 632 : for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
27044 165 : if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
27045 165 : && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
27046 : == FOLD_LEFT_REDUCTION))
27047 : {
27048 : avoid = true;
27049 : break;
27050 : }
27051 : }
27052 157 : if (!avoid)
27053 : {
27054 153 : m_suggested_epilogue_mode = loop_vinfo->vector_mode;
27055 153 : m_masked_epilogue = 1;
27056 : }
27057 : }
27058 :
27059 1807703 : vector_costs::finish_cost (scalar_costs);
27060 1807703 : }
27061 :
27062 : /* Return true if THIS should be preferred over OTHER as main vector loop. */
27063 :
27064 : bool
27065 30979 : ix86_vector_costs::better_main_loop_than_p (const vector_costs *other) const
27066 : {
27067 30979 : loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->vinfo ());
27068 30979 : loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->vinfo ());
27069 :
27070 : /* If the other loop is masked it does not need an epilog. Prefer that
27071 : if the current loop cannot be vectorized fully with a vector
27072 : epilogs with at most one scalar iteration left. */
27073 21072 : if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
27074 21072 : && LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
27075 4 : && known_gt (LOOP_VINFO_VECT_FACTOR (other_loop_vinfo),
27076 : LOOP_VINFO_INT_NITERS (this_loop_vinfo))
27077 30983 : && (popcount_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo) & ~1)
27078 4 : > (param_vect_epilogues_nomask != 0)))
27079 : return false;
27080 :
27081 30975 : return vector_costs::better_main_loop_than_p (other);
27082 : }
27083 :
27084 : /* Return true if THIS should be preferred over OTHER as epilog vector
27085 : loop when vectorizing MAIN_LOOP. */
27086 :
27087 : bool
27088 1482 : ix86_vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
27089 : loop_vec_info main_loop) const
27090 : {
27091 1482 : loop_vec_info this_loop_info = as_a <loop_vec_info> (this->vinfo ());
27092 : /* The x86 target allows for multiple vector epilogues, if THIS is
27093 : the suggested epilog mode of OTHER then keep the latter unless
27094 : THIS has a VF of one which means no further epilog needed. */
27095 1482 : int tem;
27096 1482 : if (known_gt (LOOP_VINFO_VECT_FACTOR (this_loop_info), 1U)
27097 1482 : && (GET_MODE_SIZE (other->suggested_epilogue_mode (tem))
27098 2942 : == GET_MODE_SIZE (this_loop_info->vector_mode)))
27099 : return false;
27100 1409 : return vector_costs::better_epilogue_loop_than_p (other, main_loop);
27101 : }
27102 :
27103 : /* Validate target specific memory model bits in VAL. */
27104 :
27105 : static unsigned HOST_WIDE_INT
27106 410325 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
27107 : {
27108 410325 : enum memmodel model = memmodel_from_int (val);
27109 410325 : bool strong;
27110 :
27111 410325 : if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
27112 : |MEMMODEL_MASK)
27113 410321 : || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
27114 : {
27115 4 : warning (OPT_Winvalid_memory_model,
27116 : "unknown architecture specific memory model");
27117 4 : return MEMMODEL_SEQ_CST;
27118 : }
27119 410321 : strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
27120 410321 : if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
27121 : {
27122 0 : warning (OPT_Winvalid_memory_model,
27123 : "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
27124 : "memory model");
27125 0 : return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
27126 : }
27127 410321 : if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
27128 : {
27129 0 : warning (OPT_Winvalid_memory_model,
27130 : "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
27131 : "memory model");
27132 0 : return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
27133 : }
27134 : return val;
27135 : }
27136 :
27137 : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
27138 : CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
27139 : CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
27140 : or number of vecsize_mangle variants that should be emitted. */
27141 :
27142 : static int
27143 7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
27144 : struct cgraph_simd_clone *clonei,
27145 : tree base_type, int num,
27146 : bool explicit_p)
27147 : {
27148 7593 : int ret = 1;
27149 :
27150 7593 : if (clonei->simdlen
27151 7593 : && (clonei->simdlen < 2
27152 1321 : || clonei->simdlen > 1024
27153 1321 : || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
27154 : {
27155 0 : if (explicit_p)
27156 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27157 : "unsupported simdlen %wd", clonei->simdlen.to_constant ());
27158 0 : return 0;
27159 : }
27160 :
27161 7593 : tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
27162 7593 : if (TREE_CODE (ret_type) != VOID_TYPE)
27163 6801 : switch (TYPE_MODE (ret_type))
27164 : {
27165 6801 : case E_QImode:
27166 6801 : case E_HImode:
27167 6801 : case E_SImode:
27168 6801 : case E_DImode:
27169 6801 : case E_SFmode:
27170 6801 : case E_DFmode:
27171 : /* case E_SCmode: */
27172 : /* case E_DCmode: */
27173 6801 : if (!AGGREGATE_TYPE_P (ret_type))
27174 : break;
27175 : /* FALLTHRU */
27176 2 : default:
27177 2 : if (explicit_p)
27178 2 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27179 : "unsupported return type %qT for simd", ret_type);
27180 2 : return 0;
27181 : }
27182 :
27183 7591 : tree t;
27184 7591 : int i;
27185 7591 : tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
27186 7591 : bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
27187 :
27188 7591 : for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
27189 20438 : t && t != void_list_node; t = TREE_CHAIN (t), i++)
27190 : {
27191 16678 : tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
27192 12852 : switch (TYPE_MODE (arg_type))
27193 : {
27194 12833 : case E_QImode:
27195 12833 : case E_HImode:
27196 12833 : case E_SImode:
27197 12833 : case E_DImode:
27198 12833 : case E_SFmode:
27199 12833 : case E_DFmode:
27200 : /* case E_SCmode: */
27201 : /* case E_DCmode: */
27202 12833 : if (!AGGREGATE_TYPE_P (arg_type))
27203 : break;
27204 : /* FALLTHRU */
27205 41 : default:
27206 41 : if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
27207 : break;
27208 5 : if (explicit_p)
27209 5 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27210 : "unsupported argument type %qT for simd", arg_type);
27211 : return 0;
27212 : }
27213 : }
27214 :
27215 7586 : if (!TREE_PUBLIC (node->decl) || !explicit_p)
27216 : {
27217 : /* If the function isn't exported, we can pick up just one ISA
27218 : for the clones. */
27219 114 : if (TARGET_AVX512F)
27220 0 : clonei->vecsize_mangle = 'e';
27221 114 : else if (TARGET_AVX2)
27222 1 : clonei->vecsize_mangle = 'd';
27223 113 : else if (TARGET_AVX)
27224 88 : clonei->vecsize_mangle = 'c';
27225 : else
27226 25 : clonei->vecsize_mangle = 'b';
27227 : ret = 1;
27228 : }
27229 : else
27230 : {
27231 7472 : clonei->vecsize_mangle = "bcde"[num];
27232 7472 : ret = 4;
27233 : }
27234 7586 : clonei->mask_mode = VOIDmode;
27235 7586 : switch (clonei->vecsize_mangle)
27236 : {
27237 1893 : case 'b':
27238 1893 : clonei->vecsize_int = 128;
27239 1893 : clonei->vecsize_float = 128;
27240 1893 : break;
27241 1956 : case 'c':
27242 1956 : clonei->vecsize_int = 128;
27243 1956 : clonei->vecsize_float = 256;
27244 1956 : break;
27245 1869 : case 'd':
27246 1869 : clonei->vecsize_int = 256;
27247 1869 : clonei->vecsize_float = 256;
27248 1869 : break;
27249 1868 : case 'e':
27250 1868 : clonei->vecsize_int = 512;
27251 1868 : clonei->vecsize_float = 512;
27252 1868 : if (TYPE_MODE (base_type) == QImode)
27253 19 : clonei->mask_mode = DImode;
27254 : else
27255 1849 : clonei->mask_mode = SImode;
27256 : break;
27257 : }
27258 7586 : if (clonei->simdlen == 0)
27259 : {
27260 6265 : if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
27261 3297 : clonei->simdlen = clonei->vecsize_int;
27262 : else
27263 2968 : clonei->simdlen = clonei->vecsize_float;
27264 6265 : clonei->simdlen = clonei->simdlen
27265 12530 : / GET_MODE_BITSIZE (TYPE_MODE (base_type));
27266 : }
27267 1321 : else if (clonei->simdlen > 16)
27268 : {
27269 : /* For compatibility with ICC, use the same upper bounds
27270 : for simdlen. In particular, for CTYPE below, use the return type,
27271 : unless the function returns void, in that case use the characteristic
27272 : type. If it is possible for given SIMDLEN to pass CTYPE value
27273 : in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
27274 : for 64-bit code), accept that SIMDLEN, otherwise warn and don't
27275 : emit corresponding clone. */
27276 12 : tree ctype = ret_type;
27277 12 : if (VOID_TYPE_P (ret_type))
27278 0 : ctype = base_type;
27279 24 : int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
27280 12 : if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
27281 8 : cnt /= clonei->vecsize_int;
27282 : else
27283 4 : cnt /= clonei->vecsize_float;
27284 12 : if (cnt > (TARGET_64BIT ? 16 : 8))
27285 : {
27286 0 : if (explicit_p)
27287 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27288 : "unsupported simdlen %wd",
27289 : clonei->simdlen.to_constant ());
27290 0 : return 0;
27291 : }
27292 : }
27293 : return ret;
27294 : }
27295 :
27296 : /* If SIMD clone NODE can't be used in a vectorized loop
27297 : in current function, return -1, otherwise return a badness of using it
27298 : (0 if it is most desirable from vecsize_mangle point of view, 1
27299 : slightly less desirable, etc.). */
27300 :
27301 : static int
27302 1790 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
27303 : {
27304 1790 : switch (node->simdclone->vecsize_mangle)
27305 : {
27306 638 : case 'b':
27307 638 : if (!TARGET_SSE2)
27308 : return -1;
27309 638 : if (!TARGET_AVX)
27310 : return 0;
27311 537 : return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
27312 630 : case 'c':
27313 630 : if (!TARGET_AVX)
27314 : return -1;
27315 585 : return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
27316 334 : case 'd':
27317 334 : if (!TARGET_AVX2)
27318 : return -1;
27319 141 : return TARGET_AVX512F ? 1 : 0;
27320 188 : case 'e':
27321 188 : if (!TARGET_AVX512F)
27322 130 : return -1;
27323 : return 0;
27324 0 : default:
27325 0 : gcc_unreachable ();
27326 : }
27327 : }
27328 :
27329 : /* This function adjusts the unroll factor based on
27330 : the hardware capabilities. For ex, bdver3 has
27331 : a loop buffer which makes unrolling of smaller
27332 : loops less important. This function decides the
27333 : unroll factor using number of memory references
27334 : (value 32 is used) as a heuristic. */
27335 :
27336 : static unsigned
27337 799763 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
27338 : {
27339 799763 : basic_block *bbs;
27340 799763 : rtx_insn *insn;
27341 799763 : unsigned i;
27342 799763 : unsigned mem_count = 0;
27343 :
27344 : /* Unroll small size loop when unroll factor is not explicitly
27345 : specified. */
27346 799763 : if (ix86_unroll_only_small_loops && !loop->unroll)
27347 : {
27348 754587 : if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
27349 72135 : return MIN (nunroll, ix86_cost->small_unroll_factor);
27350 : else
27351 : return 1;
27352 : }
27353 :
27354 45176 : if (!TARGET_ADJUST_UNROLL)
27355 : return nunroll;
27356 :
27357 : /* Count the number of memory references within the loop body.
27358 : This value determines the unrolling factor for bdver3 and bdver4
27359 : architectures. */
27360 8 : subrtx_iterator::array_type array;
27361 8 : bbs = get_loop_body (loop);
27362 24 : for (i = 0; i < loop->num_nodes; i++)
27363 120 : FOR_BB_INSNS (bbs[i], insn)
27364 104 : if (NONDEBUG_INSN_P (insn))
27365 588 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
27366 516 : if (const_rtx x = *iter)
27367 516 : if (MEM_P (x))
27368 : {
27369 28 : machine_mode mode = GET_MODE (x);
27370 56 : unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27371 28 : if (n_words > 4)
27372 0 : mem_count += 2;
27373 : else
27374 28 : mem_count += 1;
27375 : }
27376 8 : free (bbs);
27377 :
27378 8 : if (mem_count && mem_count <=32)
27379 8 : return MIN (nunroll, 32 / mem_count);
27380 :
27381 : return nunroll;
27382 8 : }
27383 :
27384 :
27385 : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
27386 :
27387 : static bool
27388 431538 : ix86_float_exceptions_rounding_supported_p (void)
27389 : {
27390 : /* For x87 floating point with standard excess precision handling,
27391 : there is no adddf3 pattern (since x87 floating point only has
27392 : XFmode operations) so the default hook implementation gets this
27393 : wrong. */
27394 431538 : return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
27395 : }
27396 :
27397 : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
27398 :
27399 : static void
27400 7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27401 : {
27402 7054 : if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
27403 : return;
27404 7054 : tree exceptions_var = create_tmp_var_raw (integer_type_node);
27405 7054 : if (TARGET_80387)
27406 : {
27407 7054 : tree fenv_index_type = build_index_type (size_int (6));
27408 7054 : tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
27409 7054 : tree fenv_var = create_tmp_var_raw (fenv_type);
27410 7054 : TREE_ADDRESSABLE (fenv_var) = 1;
27411 7054 : tree fenv_ptr = build_pointer_type (fenv_type);
27412 7054 : tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
27413 7054 : fenv_addr = fold_convert (ptr_type_node, fenv_addr);
27414 7054 : tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
27415 7054 : tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
27416 7054 : tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
27417 7054 : tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
27418 7054 : tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
27419 7054 : tree hold_fnclex = build_call_expr (fnclex, 0);
27420 7054 : fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
27421 : NULL_TREE, NULL_TREE);
27422 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
27423 : hold_fnclex);
27424 7054 : *clear = build_call_expr (fnclex, 0);
27425 7054 : tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
27426 7054 : tree fnstsw_call = build_call_expr (fnstsw, 0);
27427 7054 : tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
27428 : fnstsw_call, NULL_TREE, NULL_TREE);
27429 7054 : tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
27430 7054 : tree update_mod = build4 (TARGET_EXPR, integer_type_node,
27431 : exceptions_var, exceptions_x87,
27432 : NULL_TREE, NULL_TREE);
27433 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node,
27434 : sw_mod, update_mod);
27435 7054 : tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
27436 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
27437 : }
27438 7054 : if (TARGET_SSE && TARGET_SSE_MATH)
27439 : {
27440 7054 : tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
27441 7054 : tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
27442 7054 : tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
27443 7054 : tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
27444 7054 : tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
27445 7054 : tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
27446 : mxcsr_orig_var, stmxcsr_hold_call,
27447 : NULL_TREE, NULL_TREE);
27448 7054 : tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
27449 : mxcsr_orig_var,
27450 : build_int_cst (unsigned_type_node, 0x1f80));
27451 7054 : hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
27452 : build_int_cst (unsigned_type_node, 0xffffffc0));
27453 7054 : tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
27454 : mxcsr_mod_var, hold_mod_val,
27455 : NULL_TREE, NULL_TREE);
27456 7054 : tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27457 7054 : tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
27458 : hold_assign_orig, hold_assign_mod);
27459 7054 : hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
27460 : ldmxcsr_hold_call);
27461 7054 : if (*hold)
27462 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
27463 : else
27464 0 : *hold = hold_all;
27465 7054 : tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27466 7054 : if (*clear)
27467 7054 : *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
27468 : ldmxcsr_clear_call);
27469 : else
27470 0 : *clear = ldmxcsr_clear_call;
27471 7054 : tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
27472 7054 : tree exceptions_sse = fold_convert (integer_type_node,
27473 : stxmcsr_update_call);
27474 7054 : if (*update)
27475 : {
27476 7054 : tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
27477 : exceptions_var, exceptions_sse);
27478 7054 : tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
27479 : exceptions_var, exceptions_mod);
27480 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
27481 : exceptions_assign);
27482 : }
27483 : else
27484 0 : *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
27485 : exceptions_sse, NULL_TREE, NULL_TREE);
27486 7054 : tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
27487 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27488 : ldmxcsr_update_call);
27489 : }
27490 7054 : tree atomic_feraiseexcept
27491 7054 : = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
27492 7054 : tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
27493 : 1, exceptions_var);
27494 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27495 : atomic_feraiseexcept_call);
27496 : }
27497 :
27498 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27499 : /* For i386, common symbol is local only for non-PIE binaries. For
27500 : x86-64, common symbol is local only for non-PIE binaries or linker
27501 : supports copy reloc in PIE binaries. */
27502 :
27503 : static bool
27504 769403497 : ix86_binds_local_p (const_tree exp)
27505 : {
27506 769403497 : bool direct_extern_access
27507 769403497 : = (ix86_direct_extern_access
27508 1535291295 : && !(VAR_OR_FUNCTION_DECL_P (exp)
27509 765887798 : && lookup_attribute ("nodirect_extern_access",
27510 765887798 : DECL_ATTRIBUTES (exp))));
27511 769403497 : if (!direct_extern_access)
27512 1225 : ix86_has_no_direct_extern_access = true;
27513 769403497 : return default_binds_local_p_3 (exp, flag_shlib != 0, true,
27514 : direct_extern_access,
27515 : (direct_extern_access
27516 769402272 : && (!flag_pic
27517 132147006 : || (TARGET_64BIT
27518 769403497 : && HAVE_LD_PIE_COPYRELOC != 0))));
27519 : }
27520 :
27521 : /* If flag_pic or ix86_direct_extern_access is false, then neither
27522 : local nor global relocs should be placed in readonly memory. */
27523 :
27524 : static int
27525 5147305 : ix86_reloc_rw_mask (void)
27526 : {
27527 5147305 : return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
27528 : }
27529 : #endif
27530 :
27531 : /* Return true iff ADDR can be used as a symbolic base address. */
27532 :
27533 : static bool
27534 3026 : symbolic_base_address_p (rtx addr)
27535 : {
27536 0 : if (SYMBOL_REF_P (addr))
27537 : return true;
27538 :
27539 3002 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
27540 0 : return true;
27541 :
27542 : return false;
27543 : }
27544 :
27545 : /* Return true iff ADDR can be used as a base address. */
27546 :
27547 : static bool
27548 4581 : base_address_p (rtx addr)
27549 : {
27550 0 : if (REG_P (addr))
27551 : return true;
27552 :
27553 2841 : if (symbolic_base_address_p (addr))
27554 0 : return true;
27555 :
27556 : return false;
27557 : }
27558 :
27559 : /* If MEM is in the form of [(base+symbase)+offset], extract the three
27560 : parts of address and set to BASE, SYMBASE and OFFSET, otherwise
27561 : return false. */
27562 :
27563 : static bool
27564 2948 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
27565 : {
27566 2948 : rtx addr;
27567 :
27568 2948 : gcc_assert (MEM_P (mem));
27569 :
27570 2948 : addr = XEXP (mem, 0);
27571 :
27572 2948 : if (GET_CODE (addr) == CONST)
27573 10 : addr = XEXP (addr, 0);
27574 :
27575 2948 : if (base_address_p (addr))
27576 : {
27577 1315 : *base = addr;
27578 1315 : *symbase = const0_rtx;
27579 1315 : *offset = const0_rtx;
27580 1315 : return true;
27581 : }
27582 :
27583 1633 : if (GET_CODE (addr) == PLUS
27584 1633 : && base_address_p (XEXP (addr, 0)))
27585 : {
27586 449 : rtx addend = XEXP (addr, 1);
27587 :
27588 449 : if (GET_CODE (addend) == CONST)
27589 0 : addend = XEXP (addend, 0);
27590 :
27591 449 : if (CONST_INT_P (addend))
27592 : {
27593 264 : *base = XEXP (addr, 0);
27594 264 : *symbase = const0_rtx;
27595 264 : *offset = addend;
27596 264 : return true;
27597 : }
27598 :
27599 : /* Also accept REG + symbolic ref, with or without a CONST_INT
27600 : offset. */
27601 185 : if (REG_P (XEXP (addr, 0)))
27602 : {
27603 185 : if (symbolic_base_address_p (addend))
27604 : {
27605 0 : *base = XEXP (addr, 0);
27606 0 : *symbase = addend;
27607 0 : *offset = const0_rtx;
27608 0 : return true;
27609 : }
27610 :
27611 185 : if (GET_CODE (addend) == PLUS
27612 0 : && symbolic_base_address_p (XEXP (addend, 0))
27613 185 : && CONST_INT_P (XEXP (addend, 1)))
27614 : {
27615 0 : *base = XEXP (addr, 0);
27616 0 : *symbase = XEXP (addend, 0);
27617 0 : *offset = XEXP (addend, 1);
27618 0 : return true;
27619 : }
27620 : }
27621 : }
27622 :
27623 : return false;
27624 : }
27625 :
27626 : /* Given OPERANDS of consecutive load/store, check if we can merge
27627 : them into move multiple. LOAD is true if they are load instructions.
27628 : MODE is the mode of memory operands. */
27629 :
27630 : bool
27631 1629 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
27632 : machine_mode mode)
27633 : {
27634 1629 : HOST_WIDE_INT offval_1, offval_2, msize;
27635 1629 : rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
27636 : symbase_1, symbase_2, offset_1, offset_2;
27637 :
27638 1629 : if (load)
27639 : {
27640 1317 : mem_1 = operands[1];
27641 1317 : mem_2 = operands[3];
27642 1317 : reg_1 = operands[0];
27643 1317 : reg_2 = operands[2];
27644 : }
27645 : else
27646 : {
27647 312 : mem_1 = operands[0];
27648 312 : mem_2 = operands[2];
27649 312 : reg_1 = operands[1];
27650 312 : reg_2 = operands[3];
27651 : }
27652 :
27653 1629 : gcc_assert (REG_P (reg_1) && REG_P (reg_2));
27654 :
27655 1629 : if (REGNO (reg_1) != REGNO (reg_2))
27656 : return false;
27657 :
27658 : /* Check if the addresses are in the form of [base+offset]. */
27659 1627 : if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
27660 : return false;
27661 1321 : if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
27662 : return false;
27663 :
27664 : /* Check if the bases are the same. */
27665 258 : if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
27666 115 : return false;
27667 :
27668 143 : offval_1 = INTVAL (offset_1);
27669 143 : offval_2 = INTVAL (offset_2);
27670 143 : msize = GET_MODE_SIZE (mode);
27671 : /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
27672 143 : if (offval_1 + msize != offval_2)
27673 : return false;
27674 :
27675 : return true;
27676 : }
27677 :
27678 : /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27679 :
27680 : static bool
27681 367668 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
27682 : optimization_type opt_type)
27683 : {
27684 367668 : switch (op)
27685 : {
27686 231 : case asin_optab:
27687 231 : case acos_optab:
27688 231 : case log1p_optab:
27689 231 : case exp_optab:
27690 231 : case exp10_optab:
27691 231 : case exp2_optab:
27692 231 : case expm1_optab:
27693 231 : case ldexp_optab:
27694 231 : case scalb_optab:
27695 231 : case round_optab:
27696 231 : case lround_optab:
27697 231 : return opt_type == OPTIMIZE_FOR_SPEED;
27698 :
27699 286 : case rint_optab:
27700 286 : if (SSE_FLOAT_MODE_P (mode1)
27701 139 : && TARGET_SSE_MATH
27702 127 : && !flag_trapping_math
27703 21 : && !TARGET_SSE4_1
27704 : && mode1 != HFmode)
27705 21 : return opt_type == OPTIMIZE_FOR_SPEED;
27706 : return true;
27707 :
27708 1971 : case floor_optab:
27709 1971 : case ceil_optab:
27710 1971 : case btrunc_optab:
27711 1971 : if ((SSE_FLOAT_MODE_P (mode1)
27712 1581 : && TARGET_SSE_MATH
27713 1514 : && TARGET_SSE4_1)
27714 1904 : || mode1 == HFmode)
27715 : return true;
27716 1835 : return opt_type == OPTIMIZE_FOR_SPEED;
27717 :
27718 66 : case rsqrt_optab:
27719 66 : return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
27720 :
27721 : default:
27722 : return true;
27723 : }
27724 : }
27725 :
27726 : /* Address space support.
27727 :
27728 : This is not "far pointers" in the 16-bit sense, but an easy way
27729 : to use %fs and %gs segment prefixes. Therefore:
27730 :
27731 : (a) All address spaces have the same modes,
27732 : (b) All address spaces have the same address forms,
27733 : (c) While %fs and %gs are technically subsets of the generic
27734 : address space, they are probably not subsets of each other.
27735 : (d) Since we have no access to the segment base register values
27736 : without resorting to a system call, we cannot convert a
27737 : non-default address space to a default address space.
27738 : Therefore we do not claim %fs or %gs are subsets of generic.
27739 :
27740 : Therefore we can (mostly) use the default hooks. */
27741 :
27742 : /* All use of segmentation is assumed to make address 0 valid. */
27743 :
27744 : static bool
27745 67378083 : ix86_addr_space_zero_address_valid (addr_space_t as)
27746 : {
27747 67378083 : return as != ADDR_SPACE_GENERIC;
27748 : }
27749 :
27750 : static void
27751 789623 : ix86_init_libfuncs (void)
27752 : {
27753 789623 : if (TARGET_64BIT)
27754 : {
27755 774673 : set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
27756 774673 : set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
27757 : }
27758 : else
27759 : {
27760 14950 : set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
27761 14950 : set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
27762 : }
27763 :
27764 : #if TARGET_MACHO
27765 : darwin_rename_builtins ();
27766 : #endif
27767 789623 : }
27768 :
27769 : /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
27770 : FPU, assume that the fpcw is set to extended precision; when using
27771 : only SSE, rounding is correct; when using both SSE and the FPU,
27772 : the rounding precision is indeterminate, since either may be chosen
27773 : apparently at random. */
27774 :
27775 : static enum flt_eval_method
27776 89476100 : ix86_get_excess_precision (enum excess_precision_type type)
27777 : {
27778 89476100 : switch (type)
27779 : {
27780 85382905 : case EXCESS_PRECISION_TYPE_FAST:
27781 : /* The fastest type to promote to will always be the native type,
27782 : whether that occurs with implicit excess precision or
27783 : otherwise. */
27784 85382905 : return TARGET_AVX512FP16
27785 85382905 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
27786 85382905 : : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27787 4093114 : case EXCESS_PRECISION_TYPE_STANDARD:
27788 4093114 : case EXCESS_PRECISION_TYPE_IMPLICIT:
27789 : /* Otherwise, the excess precision we want when we are
27790 : in a standards compliant mode, and the implicit precision we
27791 : provide would be identical were it not for the unpredictable
27792 : cases. */
27793 4093114 : if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
27794 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27795 4087134 : else if (!TARGET_80387)
27796 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27797 4081012 : else if (!TARGET_MIX_SSE_I387)
27798 : {
27799 4080840 : if (!(TARGET_SSE && TARGET_SSE_MATH))
27800 : return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
27801 3092222 : else if (TARGET_SSE2)
27802 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27803 : }
27804 :
27805 : /* If we are in standards compliant mode, but we know we will
27806 : calculate in unpredictable precision, return
27807 : FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
27808 : excess precision if the target can't guarantee it will honor
27809 : it. */
27810 320 : return (type == EXCESS_PRECISION_TYPE_STANDARD
27811 320 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
27812 : : FLT_EVAL_METHOD_UNPREDICTABLE);
27813 81 : case EXCESS_PRECISION_TYPE_FLOAT16:
27814 81 : if (TARGET_80387
27815 75 : && !(TARGET_SSE_MATH && TARGET_SSE))
27816 4 : error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
27817 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27818 0 : default:
27819 0 : gcc_unreachable ();
27820 : }
27821 :
27822 : return FLT_EVAL_METHOD_UNPREDICTABLE;
27823 : }
27824 :
27825 : /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
27826 : bool
27827 361991 : ix86_bitint_type_info (int n, struct bitint_info *info)
27828 : {
27829 361991 : if (n <= 8)
27830 9116 : info->limb_mode = QImode;
27831 352875 : else if (n <= 16)
27832 1893 : info->limb_mode = HImode;
27833 350982 : else if (n <= 32 || (!TARGET_64BIT && n > 64))
27834 45753 : info->limb_mode = SImode;
27835 : else
27836 305229 : info->limb_mode = DImode;
27837 361991 : info->abi_limb_mode = info->limb_mode;
27838 361991 : info->big_endian = false;
27839 361991 : info->extended = bitint_ext_undef;
27840 361991 : return true;
27841 : }
27842 :
27843 : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
27844 : or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
27845 : based on long double bits, go with the default one for the others. */
27846 :
27847 : static machine_mode
27848 3781238 : ix86_c_mode_for_floating_type (enum tree_index ti)
27849 : {
27850 3781238 : if (ti == TI_LONG_DOUBLE_TYPE)
27851 630690 : return (TARGET_LONG_DOUBLE_64 ? DFmode
27852 630658 : : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
27853 3150548 : return default_mode_for_floating_type (ti);
27854 : }
27855 :
27856 : /* Returns modified FUNCTION_TYPE for cdtor callabi. */
27857 : tree
27858 14292 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
27859 : {
27860 14292 : if (TARGET_64BIT
27861 71 : || TARGET_RTD
27862 14363 : || ix86_function_type_abi (fntype) != MS_ABI)
27863 14292 : return fntype;
27864 : /* For 32-bit MS ABI add thiscall attribute. */
27865 0 : tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
27866 0 : TYPE_ATTRIBUTES (fntype));
27867 0 : return build_type_attribute_variant (fntype, attribs);
27868 : }
27869 :
27870 : /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
27871 : decrements by exactly 2 no matter what the position was, there is no pushb.
27872 :
27873 : But as CIE data alignment factor on this arch is -4 for 32bit targets
27874 : and -8 for 64bit targets, we need to make sure all stack pointer adjustments
27875 : are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
27876 :
27877 : poly_int64
27878 273319416 : ix86_push_rounding (poly_int64 bytes)
27879 : {
27880 353034264 : return ROUND_UP (bytes, UNITS_PER_WORD);
27881 : }
27882 :
27883 : /* Use 8 bits metadata start from bit48 for LAM_U48,
27884 : 6 bits metadata start from bit57 for LAM_U57. */
27885 : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
27886 : ? 48 \
27887 : : (ix86_lam_type == lam_u57 ? 57 : 0))
27888 : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
27889 : ? 8 \
27890 : : (ix86_lam_type == lam_u57 ? 6 : 0))
27891 :
27892 : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
27893 : bool
27894 6214346 : ix86_memtag_can_tag_addresses ()
27895 : {
27896 6214346 : return ix86_lam_type != lam_none && TARGET_LP64;
27897 : }
27898 :
27899 : /* Implement TARGET_MEMTAG_TAG_BITSIZE. */
27900 : unsigned char
27901 450 : ix86_memtag_tag_bitsize ()
27902 : {
27903 450 : return IX86_HWASAN_TAG_SIZE;
27904 : }
27905 :
27906 : /* Implement TARGET_MEMTAG_SET_TAG. */
27907 : rtx
27908 106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
27909 : {
27910 : /* default_memtag_insert_random_tag may
27911 : generate tag with value more than 6 bits. */
27912 106 : if (ix86_lam_type == lam_u57)
27913 : {
27914 106 : unsigned HOST_WIDE_INT and_imm
27915 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27916 :
27917 106 : emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27918 : }
27919 106 : tag = expand_simple_binop (Pmode, ASHIFT, tag,
27920 106 : GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27921 : /* unsignedp = */1, OPTAB_WIDEN);
27922 106 : rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27923 : /* unsignedp = */1, OPTAB_DIRECT);
27924 106 : return ret;
27925 : }
27926 :
27927 : /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27928 : rtx
27929 180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27930 : {
27931 180 : rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27932 180 : GEN_INT (IX86_HWASAN_SHIFT), target,
27933 : /* unsignedp = */0,
27934 : OPTAB_DIRECT);
27935 180 : rtx ret = gen_reg_rtx (QImode);
27936 : /* Mask off bit63 when LAM_U57. */
27937 180 : if (ix86_lam_type == lam_u57)
27938 : {
27939 180 : unsigned HOST_WIDE_INT and_imm
27940 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27941 180 : emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27942 180 : gen_int_mode (and_imm, QImode)));
27943 : }
27944 : else
27945 0 : emit_move_insn (ret, gen_lowpart (QImode, tag));
27946 180 : return ret;
27947 : }
27948 :
27949 : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27950 : rtx
27951 114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27952 : {
27953 : /* Leave bit63 alone. */
27954 114 : rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27955 114 : + (HOST_WIDE_INT_1U << 63) - 1),
27956 114 : Pmode);
27957 114 : rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27958 : tag_mask, target, true,
27959 : OPTAB_DIRECT);
27960 114 : gcc_assert (untagged_base);
27961 114 : return untagged_base;
27962 : }
27963 :
27964 : /* Implement TARGET_MEMTAG_ADD_TAG. */
27965 : rtx
27966 90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27967 : {
27968 90 : rtx base_tag = gen_reg_rtx (QImode);
27969 90 : rtx base_addr = gen_reg_rtx (Pmode);
27970 90 : rtx tagged_addr = gen_reg_rtx (Pmode);
27971 90 : rtx new_tag = gen_reg_rtx (QImode);
27972 180 : unsigned HOST_WIDE_INT and_imm
27973 90 : = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27974 :
27975 : /* When there's "overflow" in tag adding,
27976 : need to mask the most significant bit off. */
27977 90 : emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
27978 90 : emit_move_insn (base_addr,
27979 : ix86_memtag_untagged_pointer (base, NULL_RTX));
27980 90 : emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27981 90 : emit_move_insn (new_tag, base_tag);
27982 90 : emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27983 90 : emit_move_insn (tagged_addr,
27984 : ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
27985 90 : return plus_constant (Pmode, tagged_addr, offset);
27986 : }
27987 :
27988 : /* Implement TARGET_HAVE_CCMP. */
27989 : static bool
27990 8065547 : ix86_have_ccmp ()
27991 : {
27992 8065547 : return (bool) TARGET_APX_CCMP;
27993 : }
27994 :
27995 : /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27996 : static bool
27997 4578281 : ix86_mode_can_transfer_bits (machine_mode mode)
27998 : {
27999 4578281 : if (GET_MODE_CLASS (mode) == MODE_FLOAT
28000 4531148 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
28001 112800 : switch (GET_MODE_INNER (mode))
28002 : {
28003 54192 : case E_SFmode:
28004 54192 : case E_DFmode:
28005 : /* These suffer from normalization upon load when not using SSE. */
28006 54192 : return !(ix86_fpmath & FPMATH_387);
28007 : default:
28008 : return true;
28009 : }
28010 :
28011 : return true;
28012 : }
28013 :
28014 : /* Implement TARGET_REDZONE_CLOBBER. */
28015 : static rtx
28016 2 : ix86_redzone_clobber ()
28017 : {
28018 2 : cfun->machine->asm_redzone_clobber_seen = true;
28019 2 : if (ix86_using_red_zone ())
28020 : {
28021 2 : rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
28022 2 : rtx mem = gen_rtx_MEM (BLKmode, base);
28023 2 : set_mem_size (mem, RED_ZONE_SIZE);
28024 2 : return mem;
28025 : }
28026 : return NULL_RTX;
28027 : }
28028 :
28029 : /* Target-specific selftests. */
28030 :
28031 : #if CHECKING_P
28032 :
28033 : namespace selftest {
28034 :
28035 : /* Verify that hard regs are dumped as expected (in compact mode). */
28036 :
28037 : static void
28038 4 : ix86_test_dumping_hard_regs ()
28039 : {
28040 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
28041 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
28042 4 : }
28043 :
28044 : /* Test dumping an insn with repeated references to the same SCRATCH,
28045 : to verify the rtx_reuse code. */
28046 :
28047 : static void
28048 4 : ix86_test_dumping_memory_blockage ()
28049 : {
28050 4 : set_new_first_and_last_insn (NULL, NULL);
28051 :
28052 4 : rtx pat = gen_memory_blockage ();
28053 4 : rtx_reuse_manager r;
28054 4 : r.preprocess (pat);
28055 :
28056 : /* Verify that the repeated references to the SCRATCH show use
28057 : reuse IDS. The first should be prefixed with a reuse ID,
28058 : and the second should be dumped as a "reuse_rtx" of that ID.
28059 : The expected string assumes Pmode == DImode. */
28060 4 : if (Pmode == DImode)
28061 4 : ASSERT_RTL_DUMP_EQ_WITH_REUSE
28062 : ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
28063 : " (unspec:BLK [\n"
28064 : " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
28065 : " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
28066 4 : }
28067 :
28068 : /* Verify loading an RTL dump; specifically a dump of copying
28069 : a param on x86_64 from a hard reg into the frame.
28070 : This test is target-specific since the dump contains target-specific
28071 : hard reg names. */
28072 :
28073 : static void
28074 4 : ix86_test_loading_dump_fragment_1 ()
28075 : {
28076 4 : rtl_dump_test t (SELFTEST_LOCATION,
28077 4 : locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
28078 :
28079 4 : rtx_insn *insn = get_insn_by_uid (1);
28080 :
28081 : /* The block structure and indentation here is purely for
28082 : readability; it mirrors the structure of the rtx. */
28083 4 : tree mem_expr;
28084 4 : {
28085 4 : rtx pat = PATTERN (insn);
28086 4 : ASSERT_EQ (SET, GET_CODE (pat));
28087 4 : {
28088 4 : rtx dest = SET_DEST (pat);
28089 4 : ASSERT_EQ (MEM, GET_CODE (dest));
28090 : /* Verify the "/c" was parsed. */
28091 4 : ASSERT_TRUE (RTX_FLAG (dest, call));
28092 4 : ASSERT_EQ (SImode, GET_MODE (dest));
28093 4 : {
28094 4 : rtx addr = XEXP (dest, 0);
28095 4 : ASSERT_EQ (PLUS, GET_CODE (addr));
28096 4 : ASSERT_EQ (DImode, GET_MODE (addr));
28097 4 : {
28098 4 : rtx lhs = XEXP (addr, 0);
28099 : /* Verify that the "frame" REG was consolidated. */
28100 4 : ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
28101 : }
28102 4 : {
28103 4 : rtx rhs = XEXP (addr, 1);
28104 4 : ASSERT_EQ (CONST_INT, GET_CODE (rhs));
28105 4 : ASSERT_EQ (-4, INTVAL (rhs));
28106 : }
28107 : }
28108 : /* Verify the "[1 i+0 S4 A32]" was parsed. */
28109 4 : ASSERT_EQ (1, MEM_ALIAS_SET (dest));
28110 : /* "i" should have been handled by synthesizing a global int
28111 : variable named "i". */
28112 4 : mem_expr = MEM_EXPR (dest);
28113 4 : ASSERT_NE (mem_expr, NULL);
28114 4 : ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
28115 4 : ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
28116 4 : ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
28117 4 : ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
28118 : /* "+0". */
28119 4 : ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
28120 4 : ASSERT_EQ (0, MEM_OFFSET (dest));
28121 : /* "S4". */
28122 4 : ASSERT_EQ (4, MEM_SIZE (dest));
28123 : /* "A32. */
28124 4 : ASSERT_EQ (32, MEM_ALIGN (dest));
28125 : }
28126 4 : {
28127 4 : rtx src = SET_SRC (pat);
28128 4 : ASSERT_EQ (REG, GET_CODE (src));
28129 4 : ASSERT_EQ (SImode, GET_MODE (src));
28130 4 : ASSERT_EQ (5, REGNO (src));
28131 4 : tree reg_expr = REG_EXPR (src);
28132 : /* "i" here should point to the same var as for the MEM_EXPR. */
28133 4 : ASSERT_EQ (reg_expr, mem_expr);
28134 : }
28135 : }
28136 4 : }
28137 :
28138 : /* Verify that the RTL loader copes with a call_insn dump.
28139 : This test is target-specific since the dump contains a target-specific
28140 : hard reg name. */
28141 :
28142 : static void
28143 4 : ix86_test_loading_call_insn ()
28144 : {
28145 : /* The test dump includes register "xmm0", where requires TARGET_SSE
28146 : to exist. */
28147 4 : if (!TARGET_SSE)
28148 0 : return;
28149 :
28150 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
28151 :
28152 4 : rtx_insn *insn = get_insns ();
28153 4 : ASSERT_EQ (CALL_INSN, GET_CODE (insn));
28154 :
28155 : /* "/j". */
28156 4 : ASSERT_TRUE (RTX_FLAG (insn, jump));
28157 :
28158 4 : rtx pat = PATTERN (insn);
28159 4 : ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
28160 :
28161 : /* Verify REG_NOTES. */
28162 4 : {
28163 : /* "(expr_list:REG_CALL_DECL". */
28164 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
28165 4 : rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
28166 4 : ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
28167 :
28168 : /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
28169 4 : rtx_expr_list *note1 = note0->next ();
28170 4 : ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
28171 :
28172 4 : ASSERT_EQ (NULL, note1->next ());
28173 : }
28174 :
28175 : /* Verify CALL_INSN_FUNCTION_USAGE. */
28176 4 : {
28177 : /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
28178 4 : rtx_expr_list *usage
28179 4 : = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
28180 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
28181 4 : ASSERT_EQ (DFmode, GET_MODE (usage));
28182 4 : ASSERT_EQ (USE, GET_CODE (usage->element ()));
28183 4 : ASSERT_EQ (NULL, usage->next ());
28184 : }
28185 4 : }
28186 :
28187 : /* Verify that the RTL loader copes a dump from print_rtx_function.
28188 : This test is target-specific since the dump contains target-specific
28189 : hard reg names. */
28190 :
28191 : static void
28192 4 : ix86_test_loading_full_dump ()
28193 : {
28194 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
28195 :
28196 4 : ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28197 :
28198 4 : rtx_insn *insn_1 = get_insn_by_uid (1);
28199 4 : ASSERT_EQ (NOTE, GET_CODE (insn_1));
28200 :
28201 4 : rtx_insn *insn_7 = get_insn_by_uid (7);
28202 4 : ASSERT_EQ (INSN, GET_CODE (insn_7));
28203 4 : ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
28204 :
28205 4 : rtx_insn *insn_15 = get_insn_by_uid (15);
28206 4 : ASSERT_EQ (INSN, GET_CODE (insn_15));
28207 4 : ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
28208 :
28209 : /* Verify crtl->return_rtx. */
28210 4 : ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
28211 4 : ASSERT_EQ (0, REGNO (crtl->return_rtx));
28212 4 : ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
28213 4 : }
28214 :
28215 : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
28216 : In particular, verify that it correctly loads the 2nd operand.
28217 : This test is target-specific since these are machine-specific
28218 : operands (and enums). */
28219 :
28220 : static void
28221 4 : ix86_test_loading_unspec ()
28222 : {
28223 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
28224 :
28225 4 : ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28226 :
28227 4 : ASSERT_TRUE (cfun);
28228 :
28229 : /* Test of an UNSPEC. */
28230 4 : rtx_insn *insn = get_insns ();
28231 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28232 4 : rtx set = single_set (insn);
28233 4 : ASSERT_NE (NULL, set);
28234 4 : rtx dst = SET_DEST (set);
28235 4 : ASSERT_EQ (MEM, GET_CODE (dst));
28236 4 : rtx src = SET_SRC (set);
28237 4 : ASSERT_EQ (UNSPEC, GET_CODE (src));
28238 4 : ASSERT_EQ (BLKmode, GET_MODE (src));
28239 4 : ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
28240 :
28241 4 : rtx v0 = XVECEXP (src, 0, 0);
28242 :
28243 : /* Verify that the two uses of the first SCRATCH have pointer
28244 : equality. */
28245 4 : rtx scratch_a = XEXP (dst, 0);
28246 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
28247 :
28248 4 : rtx scratch_b = XEXP (v0, 0);
28249 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
28250 :
28251 4 : ASSERT_EQ (scratch_a, scratch_b);
28252 :
28253 : /* Verify that the two mems are thus treated as equal. */
28254 4 : ASSERT_TRUE (rtx_equal_p (dst, v0));
28255 :
28256 : /* Verify that the insn is recognized. */
28257 4 : ASSERT_NE(-1, recog_memoized (insn));
28258 :
28259 : /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
28260 4 : insn = NEXT_INSN (insn);
28261 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28262 :
28263 4 : set = single_set (insn);
28264 4 : ASSERT_NE (NULL, set);
28265 :
28266 4 : src = SET_SRC (set);
28267 4 : ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
28268 4 : ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
28269 4 : }
28270 :
28271 : /* Run all target-specific selftests. */
28272 :
28273 : static void
28274 4 : ix86_run_selftests (void)
28275 : {
28276 4 : ix86_test_dumping_hard_regs ();
28277 4 : ix86_test_dumping_memory_blockage ();
28278 :
28279 : /* Various tests of loading RTL dumps, here because they contain
28280 : ix86-isms (e.g. names of hard regs). */
28281 4 : ix86_test_loading_dump_fragment_1 ();
28282 4 : ix86_test_loading_call_insn ();
28283 4 : ix86_test_loading_full_dump ();
28284 4 : ix86_test_loading_unspec ();
28285 4 : }
28286 :
28287 : } // namespace selftest
28288 :
28289 : #endif /* CHECKING_P */
28290 :
28291 : static const scoped_attribute_specs *const ix86_attribute_table[] =
28292 : {
28293 : &ix86_gnu_attribute_table
28294 : };
28295 :
28296 : /* Initialize the GCC target structure. */
28297 : #undef TARGET_RETURN_IN_MEMORY
28298 : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28299 :
28300 : #undef TARGET_LEGITIMIZE_ADDRESS
28301 : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
28302 :
28303 : #undef TARGET_ATTRIBUTE_TABLE
28304 : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28305 : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
28306 : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
28307 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28308 : # undef TARGET_MERGE_DECL_ATTRIBUTES
28309 : # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28310 : #endif
28311 :
28312 : #undef TARGET_INVALID_CONVERSION
28313 : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
28314 :
28315 : #undef TARGET_INVALID_UNARY_OP
28316 : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
28317 :
28318 : #undef TARGET_INVALID_BINARY_OP
28319 : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
28320 :
28321 : #undef TARGET_COMP_TYPE_ATTRIBUTES
28322 : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28323 :
28324 : #undef TARGET_INIT_BUILTINS
28325 : #define TARGET_INIT_BUILTINS ix86_init_builtins
28326 : #undef TARGET_BUILTIN_DECL
28327 : #define TARGET_BUILTIN_DECL ix86_builtin_decl
28328 : #undef TARGET_EXPAND_BUILTIN
28329 : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28330 :
28331 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28332 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
28333 : ix86_builtin_vectorized_function
28334 :
28335 : #undef TARGET_VECTORIZE_BUILTIN_GATHER
28336 : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
28337 :
28338 : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
28339 : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
28340 :
28341 : #undef TARGET_BUILTIN_RECIPROCAL
28342 : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
28343 :
28344 : #undef TARGET_ASM_FUNCTION_EPILOGUE
28345 : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
28346 :
28347 : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
28348 : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
28349 : ix86_print_patchable_function_entry
28350 :
28351 : #undef TARGET_ENCODE_SECTION_INFO
28352 : #ifndef SUBTARGET_ENCODE_SECTION_INFO
28353 : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28354 : #else
28355 : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28356 : #endif
28357 :
28358 : #undef TARGET_ASM_OPEN_PAREN
28359 : #define TARGET_ASM_OPEN_PAREN ""
28360 : #undef TARGET_ASM_CLOSE_PAREN
28361 : #define TARGET_ASM_CLOSE_PAREN ""
28362 :
28363 : #undef TARGET_ASM_BYTE_OP
28364 : #define TARGET_ASM_BYTE_OP ASM_BYTE
28365 :
28366 : #undef TARGET_ASM_ALIGNED_HI_OP
28367 : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28368 : #undef TARGET_ASM_ALIGNED_SI_OP
28369 : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28370 : #ifdef ASM_QUAD
28371 : #undef TARGET_ASM_ALIGNED_DI_OP
28372 : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28373 : #endif
28374 :
28375 : #undef TARGET_PROFILE_BEFORE_PROLOGUE
28376 : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
28377 :
28378 : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
28379 : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
28380 :
28381 : #undef TARGET_ASM_UNALIGNED_HI_OP
28382 : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28383 : #undef TARGET_ASM_UNALIGNED_SI_OP
28384 : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28385 : #undef TARGET_ASM_UNALIGNED_DI_OP
28386 : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28387 :
28388 : #undef TARGET_PRINT_OPERAND
28389 : #define TARGET_PRINT_OPERAND ix86_print_operand
28390 : #undef TARGET_PRINT_OPERAND_ADDRESS
28391 : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
28392 : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
28393 : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
28394 : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
28395 : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
28396 :
28397 : #undef TARGET_SCHED_INIT_GLOBAL
28398 : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
28399 : #undef TARGET_SCHED_ADJUST_COST
28400 : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28401 : #undef TARGET_SCHED_ISSUE_RATE
28402 : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28403 : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28404 : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28405 : ia32_multipass_dfa_lookahead
28406 : #undef TARGET_SCHED_MACRO_FUSION_P
28407 : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
28408 : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
28409 : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
28410 :
28411 : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28412 : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28413 :
28414 : #undef TARGET_MEMMODEL_CHECK
28415 : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
28416 :
28417 : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
28418 : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
28419 :
28420 : #ifdef HAVE_AS_TLS
28421 : #undef TARGET_HAVE_TLS
28422 : #define TARGET_HAVE_TLS true
28423 : #endif
28424 : #undef TARGET_CANNOT_FORCE_CONST_MEM
28425 : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28426 : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28427 : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28428 :
28429 : #undef TARGET_DELEGITIMIZE_ADDRESS
28430 : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28431 :
28432 : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
28433 : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
28434 :
28435 : #undef TARGET_MS_BITFIELD_LAYOUT_P
28436 : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28437 :
28438 : #if TARGET_MACHO
28439 : #undef TARGET_BINDS_LOCAL_P
28440 : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28441 : #else
28442 : #undef TARGET_BINDS_LOCAL_P
28443 : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
28444 : #endif
28445 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28446 : #undef TARGET_BINDS_LOCAL_P
28447 : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28448 : #endif
28449 :
28450 : #undef TARGET_ASM_OUTPUT_MI_THUNK
28451 : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28452 : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28453 : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28454 :
28455 : #undef TARGET_ASM_FILE_START
28456 : #define TARGET_ASM_FILE_START x86_file_start
28457 :
28458 : #undef TARGET_OPTION_OVERRIDE
28459 : #define TARGET_OPTION_OVERRIDE ix86_option_override
28460 :
28461 : #undef TARGET_REGISTER_MOVE_COST
28462 : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
28463 : #undef TARGET_MEMORY_MOVE_COST
28464 : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
28465 : #undef TARGET_RTX_COSTS
28466 : #define TARGET_RTX_COSTS ix86_rtx_costs
28467 : #undef TARGET_INSN_COST
28468 : #define TARGET_INSN_COST ix86_insn_cost
28469 : #undef TARGET_ADDRESS_COST
28470 : #define TARGET_ADDRESS_COST ix86_address_cost
28471 :
28472 : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
28473 : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
28474 : ix86_use_by_pieces_infrastructure_p
28475 :
28476 : #undef TARGET_OVERLAP_OP_BY_PIECES_P
28477 : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
28478 :
28479 : #undef TARGET_FLAGS_REGNUM
28480 : #define TARGET_FLAGS_REGNUM FLAGS_REG
28481 : #undef TARGET_FIXED_CONDITION_CODE_REGS
28482 : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28483 : #undef TARGET_CC_MODES_COMPATIBLE
28484 : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28485 :
28486 : #undef TARGET_MACHINE_DEPENDENT_REORG
28487 : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28488 :
28489 : #undef TARGET_BUILD_BUILTIN_VA_LIST
28490 : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28491 :
28492 : #undef TARGET_FOLD_BUILTIN
28493 : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
28494 :
28495 : #undef TARGET_GIMPLE_FOLD_BUILTIN
28496 : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
28497 :
28498 : #undef TARGET_COMPARE_VERSION_PRIORITY
28499 : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
28500 :
28501 : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
28502 : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
28503 : ix86_generate_version_dispatcher_body
28504 :
28505 : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
28506 : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
28507 : ix86_get_function_versions_dispatcher
28508 :
28509 : #undef TARGET_ENUM_VA_LIST_P
28510 : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
28511 :
28512 : #undef TARGET_FN_ABI_VA_LIST
28513 : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28514 :
28515 : #undef TARGET_CANONICAL_VA_LIST_TYPE
28516 : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28517 :
28518 : #undef TARGET_EXPAND_BUILTIN_VA_START
28519 : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28520 :
28521 : #undef TARGET_MD_ASM_ADJUST
28522 : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
28523 :
28524 : #undef TARGET_C_EXCESS_PRECISION
28525 : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
28526 : #undef TARGET_C_BITINT_TYPE_INFO
28527 : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
28528 : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
28529 : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
28530 : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
28531 : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
28532 : #undef TARGET_PROMOTE_PROTOTYPES
28533 : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28534 : #undef TARGET_PUSH_ARGUMENT
28535 : #define TARGET_PUSH_ARGUMENT ix86_push_argument
28536 : #undef TARGET_SETUP_INCOMING_VARARGS
28537 : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28538 : #undef TARGET_MUST_PASS_IN_STACK
28539 : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28540 : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
28541 : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
28542 : #undef TARGET_FUNCTION_ARG_ADVANCE
28543 : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
28544 : #undef TARGET_FUNCTION_ARG
28545 : #define TARGET_FUNCTION_ARG ix86_function_arg
28546 : #undef TARGET_INIT_PIC_REG
28547 : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
28548 : #undef TARGET_USE_PSEUDO_PIC_REG
28549 : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
28550 : #undef TARGET_FUNCTION_ARG_BOUNDARY
28551 : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
28552 : #undef TARGET_PASS_BY_REFERENCE
28553 : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28554 : #undef TARGET_INTERNAL_ARG_POINTER
28555 : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28556 : #undef TARGET_UPDATE_STACK_BOUNDARY
28557 : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28558 : #undef TARGET_GET_DRAP_RTX
28559 : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28560 : #undef TARGET_STRICT_ARGUMENT_NAMING
28561 : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28562 : #undef TARGET_STATIC_CHAIN
28563 : #define TARGET_STATIC_CHAIN ix86_static_chain
28564 : #undef TARGET_TRAMPOLINE_INIT
28565 : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
28566 : #undef TARGET_RETURN_POPS_ARGS
28567 : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
28568 :
28569 : #undef TARGET_WARN_FUNC_RETURN
28570 : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
28571 :
28572 : #undef TARGET_LEGITIMATE_COMBINED_INSN
28573 : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
28574 :
28575 : #undef TARGET_ASAN_SHADOW_OFFSET
28576 : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
28577 :
28578 : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
28579 : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28580 :
28581 : #undef TARGET_SCALAR_MODE_SUPPORTED_P
28582 : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28583 :
28584 : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
28585 : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
28586 : ix86_libgcc_floating_mode_supported_p
28587 :
28588 : #undef TARGET_VECTOR_MODE_SUPPORTED_P
28589 : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28590 :
28591 : #undef TARGET_C_MODE_FOR_SUFFIX
28592 : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
28593 :
28594 : #ifdef HAVE_AS_TLS
28595 : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
28596 : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
28597 : #endif
28598 :
28599 : #ifdef SUBTARGET_INSERT_ATTRIBUTES
28600 : #undef TARGET_INSERT_ATTRIBUTES
28601 : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
28602 : #endif
28603 :
28604 : #undef TARGET_MANGLE_TYPE
28605 : #define TARGET_MANGLE_TYPE ix86_mangle_type
28606 :
28607 : #undef TARGET_EMIT_SUPPORT_TINFOS
28608 : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
28609 :
28610 : #undef TARGET_STACK_PROTECT_GUARD
28611 : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
28612 :
28613 : #undef TARGET_STACK_PROTECT_GUARD_SYMBOL_P
28614 : #define TARGET_STACK_PROTECT_GUARD_SYMBOL_P \
28615 : ix86_stack_protect_guard_symbol_p
28616 :
28617 : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
28618 : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
28619 : ix86_stack_protect_runtime_enabled_p
28620 :
28621 : #if !TARGET_MACHO
28622 : #undef TARGET_STACK_PROTECT_FAIL
28623 : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
28624 : #endif
28625 :
28626 : #undef TARGET_FUNCTION_VALUE
28627 : #define TARGET_FUNCTION_VALUE ix86_function_value
28628 :
28629 : #undef TARGET_FUNCTION_VALUE_REGNO_P
28630 : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
28631 :
28632 : #undef TARGET_ZERO_CALL_USED_REGS
28633 : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
28634 :
28635 : #undef TARGET_PROMOTE_FUNCTION_MODE
28636 : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
28637 :
28638 : #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
28639 : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
28640 :
28641 : #undef TARGET_MEMBER_TYPE_FORCES_BLK
28642 : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
28643 :
28644 : #undef TARGET_INSTANTIATE_DECLS
28645 : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
28646 :
28647 : #undef TARGET_SECONDARY_RELOAD
28648 : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
28649 : #undef TARGET_SECONDARY_MEMORY_NEEDED
28650 : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
28651 : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
28652 : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
28653 :
28654 : #undef TARGET_CLASS_MAX_NREGS
28655 : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
28656 :
28657 : #undef TARGET_PREFERRED_RELOAD_CLASS
28658 : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
28659 : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
28660 : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
28661 : /* When this hook returns true for MODE, the compiler allows
28662 : registers explicitly used in the rtl to be used as spill registers
28663 : but prevents the compiler from extending the lifetime of these
28664 : registers. */
28665 : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
28666 : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
28667 : #undef TARGET_CLASS_LIKELY_SPILLED_P
28668 : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
28669 : #undef TARGET_CALLEE_SAVE_COST
28670 : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
28671 :
28672 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
28673 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
28674 : ix86_builtin_vectorization_cost
28675 : #undef TARGET_VECTORIZE_VEC_PERM_CONST
28676 : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
28677 : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
28678 : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
28679 : ix86_preferred_simd_mode
28680 : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
28681 : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
28682 : ix86_split_reduction
28683 : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
28684 : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
28685 : ix86_autovectorize_vector_modes
28686 : #undef TARGET_VECTORIZE_GET_MASK_MODE
28687 : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
28688 : #undef TARGET_VECTORIZE_CREATE_COSTS
28689 : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
28690 :
28691 : #undef TARGET_SET_CURRENT_FUNCTION
28692 : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
28693 :
28694 : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
28695 : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
28696 :
28697 : #undef TARGET_OPTION_SAVE
28698 : #define TARGET_OPTION_SAVE ix86_function_specific_save
28699 :
28700 : #undef TARGET_OPTION_RESTORE
28701 : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
28702 :
28703 : #undef TARGET_OPTION_POST_STREAM_IN
28704 : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
28705 :
28706 : #undef TARGET_OPTION_PRINT
28707 : #define TARGET_OPTION_PRINT ix86_function_specific_print
28708 :
28709 : #undef TARGET_CAN_INLINE_P
28710 : #define TARGET_CAN_INLINE_P ix86_can_inline_p
28711 :
28712 : #undef TARGET_LEGITIMATE_ADDRESS_P
28713 : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
28714 :
28715 : #undef TARGET_REGISTER_PRIORITY
28716 : #define TARGET_REGISTER_PRIORITY ix86_register_priority
28717 :
28718 : #undef TARGET_REGISTER_USAGE_LEVELING_P
28719 : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
28720 :
28721 : #undef TARGET_LEGITIMATE_CONSTANT_P
28722 : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
28723 :
28724 : #undef TARGET_COMPUTE_FRAME_LAYOUT
28725 : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
28726 :
28727 : #undef TARGET_FRAME_POINTER_REQUIRED
28728 : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
28729 :
28730 : #undef TARGET_CAN_ELIMINATE
28731 : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
28732 :
28733 : #undef TARGET_EXTRA_LIVE_ON_ENTRY
28734 : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
28735 :
28736 : #undef TARGET_ASM_CODE_END
28737 : #define TARGET_ASM_CODE_END ix86_code_end
28738 :
28739 : #undef TARGET_CONDITIONAL_REGISTER_USAGE
28740 : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
28741 :
28742 : #undef TARGET_CANONICALIZE_COMPARISON
28743 : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
28744 :
28745 : #undef TARGET_LOOP_UNROLL_ADJUST
28746 : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
28747 :
28748 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
28749 : #undef TARGET_SPILL_CLASS
28750 : #define TARGET_SPILL_CLASS ix86_spill_class
28751 :
28752 : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
28753 : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
28754 : ix86_simd_clone_compute_vecsize_and_simdlen
28755 :
28756 : #undef TARGET_SIMD_CLONE_ADJUST
28757 : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
28758 :
28759 : #undef TARGET_SIMD_CLONE_USABLE
28760 : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
28761 :
28762 : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
28763 : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
28764 :
28765 : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
28766 : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
28767 : ix86_float_exceptions_rounding_supported_p
28768 :
28769 : #undef TARGET_MODE_EMIT
28770 : #define TARGET_MODE_EMIT ix86_emit_mode_set
28771 :
28772 : #undef TARGET_MODE_NEEDED
28773 : #define TARGET_MODE_NEEDED ix86_mode_needed
28774 :
28775 : #undef TARGET_MODE_AFTER
28776 : #define TARGET_MODE_AFTER ix86_mode_after
28777 :
28778 : #undef TARGET_MODE_ENTRY
28779 : #define TARGET_MODE_ENTRY ix86_mode_entry
28780 :
28781 : #undef TARGET_MODE_EXIT
28782 : #define TARGET_MODE_EXIT ix86_mode_exit
28783 :
28784 : #undef TARGET_MODE_PRIORITY
28785 : #define TARGET_MODE_PRIORITY ix86_mode_priority
28786 :
28787 : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
28788 : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
28789 :
28790 : #undef TARGET_OFFLOAD_OPTIONS
28791 : #define TARGET_OFFLOAD_OPTIONS \
28792 : ix86_offload_options
28793 :
28794 : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
28795 : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
28796 :
28797 : #undef TARGET_OPTAB_SUPPORTED_P
28798 : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
28799 :
28800 : #undef TARGET_HARD_REGNO_SCRATCH_OK
28801 : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
28802 :
28803 : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
28804 : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
28805 :
28806 : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
28807 : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
28808 :
28809 : #undef TARGET_INIT_LIBFUNCS
28810 : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
28811 :
28812 : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
28813 : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
28814 :
28815 : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
28816 : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
28817 :
28818 : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
28819 : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
28820 :
28821 : #undef TARGET_HARD_REGNO_NREGS
28822 : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
28823 : #undef TARGET_HARD_REGNO_MODE_OK
28824 : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
28825 :
28826 : #undef TARGET_MODES_TIEABLE_P
28827 : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
28828 :
28829 : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
28830 : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
28831 : ix86_hard_regno_call_part_clobbered
28832 :
28833 : #undef TARGET_CAN_CHANGE_MODE_CLASS
28834 : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
28835 :
28836 : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
28837 : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
28838 :
28839 : #undef TARGET_STATIC_RTX_ALIGNMENT
28840 : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
28841 : #undef TARGET_CONSTANT_ALIGNMENT
28842 : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
28843 :
28844 : #undef TARGET_EMPTY_RECORD_P
28845 : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
28846 :
28847 : #undef TARGET_WARN_PARAMETER_PASSING_ABI
28848 : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
28849 :
28850 : #undef TARGET_GET_MULTILIB_ABI_NAME
28851 : #define TARGET_GET_MULTILIB_ABI_NAME \
28852 : ix86_get_multilib_abi_name
28853 :
28854 : #undef TARGET_IFUNC_REF_LOCAL_OK
28855 : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
28856 :
28857 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
28858 : # undef TARGET_ASM_RELOC_RW_MASK
28859 : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
28860 : #endif
28861 :
28862 : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
28863 : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
28864 :
28865 : #undef TARGET_MEMTAG_ADD_TAG
28866 : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
28867 :
28868 : #undef TARGET_MEMTAG_SET_TAG
28869 : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
28870 :
28871 : #undef TARGET_MEMTAG_EXTRACT_TAG
28872 : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
28873 :
28874 : #undef TARGET_MEMTAG_UNTAGGED_POINTER
28875 : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
28876 :
28877 : #undef TARGET_MEMTAG_TAG_BITSIZE
28878 : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
28879 :
28880 : #undef TARGET_GEN_CCMP_FIRST
28881 : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
28882 :
28883 : #undef TARGET_GEN_CCMP_NEXT
28884 : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
28885 :
28886 : #undef TARGET_HAVE_CCMP
28887 : #define TARGET_HAVE_CCMP ix86_have_ccmp
28888 :
28889 : #undef TARGET_MODE_CAN_TRANSFER_BITS
28890 : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
28891 :
28892 : #undef TARGET_REDZONE_CLOBBER
28893 : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
28894 :
28895 : static bool
28896 92996 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
28897 : {
28898 : #ifdef OPTION_GLIBC
28899 92996 : if (OPTION_GLIBC)
28900 92996 : return (built_in_function)fcode == BUILT_IN_MEMPCPY;
28901 : else
28902 : return false;
28903 : #else
28904 : return false;
28905 : #endif
28906 : }
28907 :
28908 : #undef TARGET_LIBC_HAS_FAST_FUNCTION
28909 : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
28910 :
28911 : static unsigned
28912 78204 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
28913 : bool boundary_p)
28914 : {
28915 : #ifdef OPTION_GLIBC
28916 78204 : bool glibc_p = OPTION_GLIBC;
28917 : #else
28918 : bool glibc_p = false;
28919 : #endif
28920 78204 : if (glibc_p)
28921 : {
28922 : /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28923 78204 : unsigned int libmvec_ret = 0;
28924 78204 : if (!flag_trapping_math
28925 8300 : && flag_unsafe_math_optimizations
28926 3378 : && flag_finite_math_only
28927 3352 : && !flag_signed_zeros
28928 3352 : && !flag_errno_math)
28929 3352 : switch (cfn)
28930 : {
28931 1396 : CASE_CFN_COS:
28932 1396 : CASE_CFN_COS_FN:
28933 1396 : CASE_CFN_SIN:
28934 1396 : CASE_CFN_SIN_FN:
28935 1396 : if (!boundary_p)
28936 : {
28937 : /* With non-default rounding modes, libmvec provides
28938 : complete garbage in results. E.g.
28939 : _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28940 : returns 0.00333309174f rather than 1.40129846e-45f. */
28941 587 : if (flag_rounding_math)
28942 : return ~0U;
28943 : /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28944 : claims libmvec maximum error is 4ulps.
28945 : My own random testing indicates 2ulps for SFmode and
28946 : 0.5ulps for DFmode, but let's go with the 4ulps. */
28947 : libmvec_ret = 4;
28948 : }
28949 : break;
28950 : default:
28951 : break;
28952 : }
28953 78204 : unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28954 : boundary_p);
28955 78204 : return MAX (ret, libmvec_ret);
28956 : }
28957 0 : return default_libm_function_max_error (cfn, mode, boundary_p);
28958 : }
28959 :
28960 : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
28961 : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28962 :
28963 : #if TARGET_MACHO
28964 : static bool
28965 : ix86_cannot_copy_insn_p (rtx_insn *insn)
28966 : {
28967 : if (TARGET_64BIT)
28968 : return false;
28969 :
28970 : rtx set = single_set (insn);
28971 : if (set)
28972 : {
28973 : rtx src = SET_SRC (set);
28974 : if (GET_CODE (src) == UNSPEC
28975 : && XINT (src, 1) == UNSPEC_SET_GOT)
28976 : return true;
28977 : }
28978 : return false;
28979 : }
28980 :
28981 : #undef TARGET_CANNOT_COPY_INSN_P
28982 : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28983 :
28984 : #endif
28985 :
28986 : #if CHECKING_P
28987 : #undef TARGET_RUN_TARGET_SELFTESTS
28988 : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28989 : #endif /* #if CHECKING_P */
28990 :
28991 : #undef TARGET_DOCUMENTATION_NAME
28992 : #define TARGET_DOCUMENTATION_NAME "x86"
28993 :
28994 : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28995 : sbitmap
28996 733358 : ix86_get_separate_components (void)
28997 : {
28998 733358 : HOST_WIDE_INT offset, to_allocate;
28999 733358 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
29000 733358 : bitmap_clear (components);
29001 733358 : struct machine_function *m = cfun->machine;
29002 :
29003 733358 : offset = m->frame.stack_pointer_offset;
29004 733358 : to_allocate = offset - m->frame.sse_reg_save_offset;
29005 :
29006 : /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
29007 : Experiments show that APX PPX can speed up the prologue. If the function
29008 : does not exit early during actual execution, then using APX PPX is faster.
29009 : If the function always exits early during actual execution, then shrink
29010 : wrap separate reduces the number of MOV (PUSH/POP) instructions actually
29011 : executed, thus speeding up execution.
29012 : foo:
29013 : movl $1, %eax
29014 : testq %rdi, %rdi
29015 : jne.L60
29016 : ret ---> early return.
29017 : .L60:
29018 : subq $88, %rsp ---> belong to prologue.
29019 : xorl %eax, %eax
29020 : movq %rbx, 40 (%rsp) ---> belong to prologue.
29021 : movq 8 (%rdi), %rbx
29022 : movq %rbp, 48 (%rsp) ---> belong to prologue.
29023 : movq %rdi, %rbp
29024 : testq %rbx, %rbx
29025 : jne.L61
29026 : movq 40 (%rsp), %rbx
29027 : movq 48 (%rsp), %rbp
29028 : addq $88, %rsp
29029 : ret
29030 : .L61:
29031 : movq %r12, 56 (%rsp) ---> belong to prologue.
29032 : movq %r13, 64 (%rsp) ---> belong to prologue.
29033 : movq %r14, 72 (%rsp) ---> belong to prologue.
29034 : ... ...
29035 :
29036 : Disable shrink wrap separate when PPX is enabled. */
29037 733358 : if ((TARGET_APX_PPX && !crtl->calls_eh_return)
29038 732890 : || cfun->machine->func_type != TYPE_NORMAL
29039 : || TARGET_SEH
29040 732792 : || crtl->stack_realign_needed
29041 723195 : || m->call_ms2sysv)
29042 : return components;
29043 :
29044 : /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
29045 : Disable shrink wrap separate when MOV is prohibited. */
29046 721273 : if (save_regs_using_push_pop (to_allocate))
29047 : return components;
29048 :
29049 32485458 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29050 32136152 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29051 : {
29052 : /* Skip registers with large offsets, where a pseudo may be needed. */
29053 601224 : if (IN_RANGE (offset, -0x8000, 0x7fff))
29054 600157 : bitmap_set_bit (components, regno);
29055 647210 : offset += UNITS_PER_WORD;
29056 : }
29057 :
29058 : /* Don't mess with the following registers. */
29059 349306 : if (frame_pointer_needed)
29060 6349 : bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29061 :
29062 349306 : if (crtl->drap_reg)
29063 129 : bitmap_clear_bit (components, REGNO (crtl->drap_reg));
29064 :
29065 349306 : if (pic_offset_table_rtx)
29066 29898 : bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
29067 :
29068 : return components;
29069 : }
29070 :
29071 : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29072 : sbitmap
29073 9254040 : ix86_components_for_bb (basic_block bb)
29074 : {
29075 9254040 : bitmap in = DF_LIVE_IN (bb);
29076 9254040 : bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29077 9254040 : bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29078 :
29079 9254040 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
29080 9254040 : bitmap_clear (components);
29081 :
29082 9254040 : function_abi_aggregator callee_abis;
29083 9254040 : rtx_insn *insn;
29084 107864257 : FOR_BB_INSNS (bb, insn)
29085 98610217 : if (CALL_P (insn))
29086 3032706 : callee_abis.note_callee_abi (insn_callee_abi (insn));
29087 9254040 : HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
29088 :
29089 : /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
29090 860625720 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29091 851371680 : if (!fixed_regs[regno]
29092 851371680 : && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
29093 432682042 : || bitmap_bit_p (in, regno)
29094 407561131 : || bitmap_bit_p (gen, regno)
29095 395189744 : || bitmap_bit_p (kill, regno)))
29096 37757647 : bitmap_set_bit (components, regno);
29097 :
29098 9254040 : return components;
29099 : }
29100 :
29101 : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29102 : void
29103 468615 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
29104 : {
29105 : /* Nothing to do for x86. */
29106 468615 : }
29107 :
29108 : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29109 : void
29110 160913 : ix86_emit_prologue_components (sbitmap components)
29111 : {
29112 160913 : HOST_WIDE_INT cfa_offset;
29113 160913 : struct machine_function *m = cfun->machine;
29114 :
29115 160913 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
29116 160913 : - m->frame.stack_pointer_offset;
29117 14964909 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29118 14803996 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29119 : {
29120 732460 : if (bitmap_bit_p (components, regno))
29121 189407 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
29122 781903 : cfa_offset -= UNITS_PER_WORD;
29123 : }
29124 160913 : }
29125 :
29126 : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29127 : void
29128 143689 : ix86_emit_epilogue_components (sbitmap components)
29129 : {
29130 143689 : HOST_WIDE_INT cfa_offset;
29131 143689 : struct machine_function *m = cfun->machine;
29132 143689 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
29133 143689 : - m->frame.stack_pointer_offset;
29134 :
29135 13363077 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29136 13219388 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29137 : {
29138 655773 : if (bitmap_bit_p (components, regno))
29139 : {
29140 254041 : rtx reg = gen_rtx_REG (word_mode, regno);
29141 254041 : rtx mem;
29142 254041 : rtx_insn *insn;
29143 :
29144 254041 : mem = choose_baseaddr (cfa_offset, NULL);
29145 254041 : mem = gen_frame_mem (word_mode, mem);
29146 254041 : insn = emit_move_insn (reg, mem);
29147 :
29148 254041 : RTX_FRAME_RELATED_P (insn) = 1;
29149 254041 : add_reg_note (insn, REG_CFA_RESTORE, reg);
29150 : }
29151 711108 : cfa_offset -= UNITS_PER_WORD;
29152 : }
29153 143689 : }
29154 :
29155 : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29156 : void
29157 44085 : ix86_set_handled_components (sbitmap components)
29158 : {
29159 4099905 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29160 4055820 : if (bitmap_bit_p (components, regno))
29161 : {
29162 104539 : cfun->machine->reg_is_wrapped_separately[regno] = true;
29163 104539 : cfun->machine->use_fast_prologue_epilogue = true;
29164 104539 : cfun->machine->frame.save_regs_using_mov = true;
29165 : }
29166 44085 : }
29167 :
29168 : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
29169 : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
29170 : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
29171 : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
29172 : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
29173 : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
29174 : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
29175 : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
29176 : ix86_emit_prologue_components
29177 : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
29178 : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
29179 : ix86_emit_epilogue_components
29180 : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
29181 : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
29182 :
29183 : struct gcc_target targetm = TARGET_INITIALIZER;
29184 :
29185 : #include "gt-i386.h"
|