Line data Source code
1 : /* Subroutines used for code generation on IA-32.
2 : Copyright (C) 1988-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #define INCLUDE_STRING
21 : #define IN_TARGET_CODE 1
22 :
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "memmodel.h"
30 : #include "gimple.h"
31 : #include "cfghooks.h"
32 : #include "cfgloop.h"
33 : #include "df.h"
34 : #include "tm_p.h"
35 : #include "stringpool.h"
36 : #include "expmed.h"
37 : #include "optabs.h"
38 : #include "regs.h"
39 : #include "emit-rtl.h"
40 : #include "recog.h"
41 : #include "cgraph.h"
42 : #include "diagnostic.h"
43 : #include "cfgbuild.h"
44 : #include "alias.h"
45 : #include "fold-const.h"
46 : #include "attribs.h"
47 : #include "calls.h"
48 : #include "stor-layout.h"
49 : #include "varasm.h"
50 : #include "output.h"
51 : #include "insn-attr.h"
52 : #include "flags.h"
53 : #include "except.h"
54 : #include "explow.h"
55 : #include "expr.h"
56 : #include "cfgrtl.h"
57 : #include "common/common-target.h"
58 : #include "langhooks.h"
59 : #include "reload.h"
60 : #include "gimplify.h"
61 : #include "dwarf2.h"
62 : #include "tm-constrs.h"
63 : #include "cselib.h"
64 : #include "sched-int.h"
65 : #include "opts.h"
66 : #include "tree-pass.h"
67 : #include "context.h"
68 : #include "pass_manager.h"
69 : #include "target-globals.h"
70 : #include "gimple-iterator.h"
71 : #include "gimple-fold.h"
72 : #include "tree-vectorizer.h"
73 : #include "shrink-wrap.h"
74 : #include "builtins.h"
75 : #include "rtl-iter.h"
76 : #include "tree-iterator.h"
77 : #include "dbgcnt.h"
78 : #include "case-cfn-macros.h"
79 : #include "dojump.h"
80 : #include "fold-const-call.h"
81 : #include "tree-vrp.h"
82 : #include "tree-ssanames.h"
83 : #include "selftest.h"
84 : #include "selftest-rtl.h"
85 : #include "print-rtl.h"
86 : #include "intl.h"
87 : #include "ifcvt.h"
88 : #include "symbol-summary.h"
89 : #include "sreal.h"
90 : #include "ipa-cp.h"
91 : #include "ipa-prop.h"
92 : #include "ipa-fnsummary.h"
93 : #include "wide-int-bitmask.h"
94 : #include "tree-vector-builder.h"
95 : #include "debug.h"
96 : #include "dwarf2out.h"
97 : #include "i386-options.h"
98 : #include "i386-builtins.h"
99 : #include "i386-expand.h"
100 : #include "i386-features.h"
101 : #include "function-abi.h"
102 : #include "rtl-error.h"
103 : #include "gimple-pretty-print.h"
104 :
105 : /* This file should be included last. */
106 : #include "target-def.h"
107 :
108 : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109 : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110 :
111 :
112 : #ifndef CHECK_STACK_LIMIT
113 : #define CHECK_STACK_LIMIT (-1)
114 : #endif
115 :
116 : /* Return index of given mode in mult and division cost tables. */
117 : #define MODE_INDEX(mode) \
118 : ((mode) == QImode ? 0 \
119 : : (mode) == HImode ? 1 \
120 : : (mode) == SImode ? 2 \
121 : : (mode) == DImode ? 3 \
122 : : 4)
123 :
124 :
125 : /* Set by -mtune. */
126 : const struct processor_costs *ix86_tune_cost = NULL;
127 :
128 : /* Set by -mtune or -Os. */
129 : const struct processor_costs *ix86_cost = NULL;
130 :
131 : /* In case the average insn count for single function invocation is
132 : lower than this constant, emit fast (but longer) prologue and
133 : epilogue code. */
134 : #define FAST_PROLOGUE_INSN_COUNT 20
135 :
136 : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137 : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138 : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139 : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140 :
141 : /* Array of the smallest class containing reg number REGNO, indexed by
142 : REGNO. Used by REGNO_REG_CLASS in i386.h. */
143 :
144 : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145 : {
146 : /* ax, dx, cx, bx */
147 : AREG, DREG, CREG, BREG,
148 : /* si, di, bp, sp */
149 : SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 : /* FP registers */
151 : FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 : FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 : /* arg pointer, flags, fpsr, frame */
154 : NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 : /* SSE registers */
156 : SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 : /* MMX registers */
159 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 : /* REX registers */
162 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 : /* SSE REX registers */
165 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 : /* AVX-512 SSE registers */
168 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 : /* Mask registers. */
173 : ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 : MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 : /* REX2 registers */
176 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 : };
181 :
182 : /* The "default" register map used in 32bit mode. */
183 :
184 : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185 : {
186 : /* general regs */
187 : 0, 2, 1, 3, 6, 7, 4, 5,
188 : /* fp regs */
189 : 12, 13, 14, 15, 16, 17, 18, 19,
190 : /* arg, flags, fpsr, frame */
191 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 : /* SSE */
194 : 21, 22, 23, 24, 25, 26, 27, 28,
195 : /* MMX */
196 : 29, 30, 31, 32, 33, 34, 35, 36,
197 : /* extended integer registers */
198 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 : /* extended sse registers */
201 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 : /* AVX-512 registers 16-23 */
204 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 : /* AVX-512 registers 24-31 */
207 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 : /* Mask registers */
210 : 93, 94, 95, 96, 97, 98, 99, 100
211 : };
212 :
213 : /* The "default" register map used in 64bit mode. */
214 :
215 : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216 : {
217 : /* general regs */
218 : 0, 1, 2, 3, 4, 5, 6, 7,
219 : /* fp regs */
220 : 33, 34, 35, 36, 37, 38, 39, 40,
221 : /* arg, flags, fpsr, frame */
222 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 : /* SSE */
225 : 17, 18, 19, 20, 21, 22, 23, 24,
226 : /* MMX */
227 : 41, 42, 43, 44, 45, 46, 47, 48,
228 : /* extended integer registers */
229 : 8, 9, 10, 11, 12, 13, 14, 15,
230 : /* extended SSE registers */
231 : 25, 26, 27, 28, 29, 30, 31, 32,
232 : /* AVX-512 registers 16-23 */
233 : 67, 68, 69, 70, 71, 72, 73, 74,
234 : /* AVX-512 registers 24-31 */
235 : 75, 76, 77, 78, 79, 80, 81, 82,
236 : /* Mask registers */
237 : 118, 119, 120, 121, 122, 123, 124, 125,
238 : /* rex2 extend interger registers */
239 : 130, 131, 132, 133, 134, 135, 136, 137,
240 : 138, 139, 140, 141, 142, 143, 144, 145
241 : };
242 :
243 : /* Define the register numbers to be used in Dwarf debugging information.
244 : The SVR4 reference port C compiler uses the following register numbers
245 : in its Dwarf output code:
246 : 0 for %eax (gcc regno = 0)
247 : 1 for %ecx (gcc regno = 2)
248 : 2 for %edx (gcc regno = 1)
249 : 3 for %ebx (gcc regno = 3)
250 : 4 for %esp (gcc regno = 7)
251 : 5 for %ebp (gcc regno = 6)
252 : 6 for %esi (gcc regno = 4)
253 : 7 for %edi (gcc regno = 5)
254 : The following three DWARF register numbers are never generated by
255 : the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 : believed these numbers have these meanings.
257 : 8 for %eip (no gcc equivalent)
258 : 9 for %eflags (gcc regno = 17)
259 : 10 for %trapno (no gcc equivalent)
260 : It is not at all clear how we should number the FP stack registers
261 : for the x86 architecture. If the version of SDB on x86/svr4 were
262 : a bit less brain dead with respect to floating-point then we would
263 : have a precedent to follow with respect to DWARF register numbers
264 : for x86 FP registers, but the SDB on x86/svr4 was so completely
265 : broken with respect to FP registers that it is hardly worth thinking
266 : of it as something to strive for compatibility with.
267 : The version of x86/svr4 SDB I had does (partially)
268 : seem to believe that DWARF register number 11 is associated with
269 : the x86 register %st(0), but that's about all. Higher DWARF
270 : register numbers don't seem to be associated with anything in
271 : particular, and even for DWARF regno 11, SDB only seemed to under-
272 : stand that it should say that a variable lives in %st(0) (when
273 : asked via an `=' command) if we said it was in DWARF regno 11,
274 : but SDB still printed garbage when asked for the value of the
275 : variable in question (via a `/' command).
276 : (Also note that the labels SDB printed for various FP stack regs
277 : when doing an `x' command were all wrong.)
278 : Note that these problems generally don't affect the native SVR4
279 : C compiler because it doesn't allow the use of -O with -g and
280 : because when it is *not* optimizing, it allocates a memory
281 : location for each floating-point variable, and the memory
282 : location is what gets described in the DWARF AT_location
283 : attribute for the variable in question.
284 : Regardless of the severe mental illness of the x86/svr4 SDB, we
285 : do something sensible here and we use the following DWARF
286 : register numbers. Note that these are all stack-top-relative
287 : numbers.
288 : 11 for %st(0) (gcc regno = 8)
289 : 12 for %st(1) (gcc regno = 9)
290 : 13 for %st(2) (gcc regno = 10)
291 : 14 for %st(3) (gcc regno = 11)
292 : 15 for %st(4) (gcc regno = 12)
293 : 16 for %st(5) (gcc regno = 13)
294 : 17 for %st(6) (gcc regno = 14)
295 : 18 for %st(7) (gcc regno = 15)
296 : */
297 : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298 : {
299 : /* general regs */
300 : 0, 2, 1, 3, 6, 7, 5, 4,
301 : /* fp regs */
302 : 11, 12, 13, 14, 15, 16, 17, 18,
303 : /* arg, flags, fpsr, frame */
304 : IGNORED_DWARF_REGNUM, 9,
305 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 : /* SSE registers */
307 : 21, 22, 23, 24, 25, 26, 27, 28,
308 : /* MMX registers */
309 : 29, 30, 31, 32, 33, 34, 35, 36,
310 : /* extended integer registers */
311 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 : /* extended sse registers */
314 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 : /* AVX-512 registers 16-23 */
317 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 : /* AVX-512 registers 24-31 */
320 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 : /* Mask registers */
323 : 93, 94, 95, 96, 97, 98, 99, 100
324 : };
325 :
326 : /* Define parameter passing and return registers. */
327 :
328 : static int const x86_64_int_parameter_registers[6] =
329 : {
330 : DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331 : };
332 :
333 : static int const x86_64_ms_abi_int_parameter_registers[4] =
334 : {
335 : CX_REG, DX_REG, R8_REG, R9_REG
336 : };
337 :
338 : /* Similar as Clang's preserve_none function parameter passing.
339 : NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p. */
340 :
341 : static int const x86_64_preserve_none_int_parameter_registers[6] =
342 : {
343 : R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
344 : };
345 :
346 : static int const x86_64_int_return_registers[4] =
347 : {
348 : AX_REG, DX_REG, DI_REG, SI_REG
349 : };
350 :
351 : /* Define the structure for the machine field in struct function. */
352 :
353 : struct GTY(()) stack_local_entry {
354 : unsigned short mode;
355 : unsigned short n;
356 : rtx rtl;
357 : struct stack_local_entry *next;
358 : };
359 :
360 : /* Which cpu are we scheduling for. */
361 : enum attr_cpu ix86_schedule;
362 :
363 : /* Which cpu are we optimizing for. */
364 : enum processor_type ix86_tune;
365 :
366 : /* Which instruction set architecture to use. */
367 : enum processor_type ix86_arch;
368 :
369 : /* True if processor has SSE prefetch instruction. */
370 : unsigned char ix86_prefetch_sse;
371 :
372 : /* Preferred alignment for stack boundary in bits. */
373 : unsigned int ix86_preferred_stack_boundary;
374 :
375 : /* Alignment for incoming stack boundary in bits specified at
376 : command line. */
377 : unsigned int ix86_user_incoming_stack_boundary;
378 :
379 : /* Default alignment for incoming stack boundary in bits. */
380 : unsigned int ix86_default_incoming_stack_boundary;
381 :
382 : /* Alignment for incoming stack boundary in bits. */
383 : unsigned int ix86_incoming_stack_boundary;
384 :
385 : /* True if there is no direct access to extern symbols. */
386 : bool ix86_has_no_direct_extern_access;
387 :
388 : /* Calling abi specific va_list type nodes. */
389 : tree sysv_va_list_type_node;
390 : tree ms_va_list_type_node;
391 :
392 : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
393 : char internal_label_prefix[16];
394 : int internal_label_prefix_len;
395 :
396 : /* Fence to use after loop using movnt. */
397 : tree x86_mfence;
398 :
399 : /* Register class used for passing given 64bit part of the argument.
400 : These represent classes as documented by the PS ABI, with the exception
401 : of SSESF, SSEDF classes, that are basically SSE class, just gcc will
402 : use SF or DFmode move instead of DImode to avoid reformatting penalties.
403 :
404 : Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
405 : whenever possible (upper half does contain padding). */
406 : enum x86_64_reg_class
407 : {
408 : X86_64_NO_CLASS,
409 : X86_64_INTEGER_CLASS,
410 : X86_64_INTEGERSI_CLASS,
411 : X86_64_SSE_CLASS,
412 : X86_64_SSEHF_CLASS,
413 : X86_64_SSESF_CLASS,
414 : X86_64_SSEDF_CLASS,
415 : X86_64_SSEUP_CLASS,
416 : X86_64_X87_CLASS,
417 : X86_64_X87UP_CLASS,
418 : X86_64_COMPLEX_X87_CLASS,
419 : X86_64_MEMORY_CLASS
420 : };
421 :
422 : #define MAX_CLASSES 8
423 :
424 : /* Table of constants used by fldpi, fldln2, etc.... */
425 : static REAL_VALUE_TYPE ext_80387_constants_table [5];
426 : static bool ext_80387_constants_init;
427 :
428 :
429 : static rtx ix86_function_value (const_tree, const_tree, bool);
430 : static bool ix86_function_value_regno_p (const unsigned int);
431 : static unsigned int ix86_function_arg_boundary (machine_mode,
432 : const_tree);
433 : static rtx ix86_static_chain (const_tree, bool);
434 : static int ix86_function_regparm (const_tree, const_tree);
435 : static void ix86_compute_frame_layout (void);
436 : static tree ix86_canonical_va_list_type (tree);
437 : static unsigned int split_stack_prologue_scratch_regno (void);
438 : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
439 :
440 : static bool ix86_can_inline_p (tree, tree);
441 : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
442 :
443 : typedef enum ix86_flags_cc
444 : {
445 : X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
446 : X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
447 : X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
448 : X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
449 : } ix86_cc;
450 :
451 : static const char *ix86_ccmp_dfv_mapping[] =
452 : {
453 : "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
454 : "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
455 : "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
456 : "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
457 : };
458 :
459 :
460 : /* Whether -mtune= or -march= were specified */
461 : int ix86_tune_defaulted;
462 : int ix86_arch_specified;
463 :
464 : /* Return true if a red-zone is in use. We can't use red-zone when
465 : there are local indirect jumps, like "indirect_jump" or "tablejump",
466 : which jumps to another place in the function, since "call" in the
467 : indirect thunk pushes the return address onto stack, destroying
468 : red-zone.
469 :
470 : NB: Don't use red-zone for functions with no_caller_saved_registers
471 : and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
472 : for 31 GPRs or 15 GPRs + 16 XMM registers.
473 :
474 : TODO: If we can reserve the first 2 WORDs, for PUSH and, another
475 : for CALL, in red-zone, we can allow local indirect jumps with
476 : indirect thunk. */
477 :
478 : bool
479 9889340 : ix86_using_red_zone (void)
480 : {
481 9889340 : return (TARGET_RED_ZONE
482 8947516 : && !TARGET_64BIT_MS_ABI
483 8645038 : && ((!TARGET_APX_EGPR && !TARGET_SSE)
484 8622039 : || (cfun->machine->call_saved_registers
485 8622039 : != TYPE_NO_CALLER_SAVED_REGISTERS))
486 18534317 : && (!cfun->machine->has_local_indirect_jump
487 47544 : || cfun->machine->indirect_branch_type == indirect_branch_keep));
488 : }
489 :
490 : /* Return true, if profiling code should be emitted before
491 : prologue. Otherwise it returns false.
492 : Note: For x86 with "hotfix" it is sorried. */
493 : static bool
494 4492288 : ix86_profile_before_prologue (void)
495 : {
496 4492288 : return flag_fentry != 0;
497 : }
498 :
499 : /* Update register usage after having seen the compiler flags. */
500 :
501 : static void
502 826625 : ix86_conditional_register_usage (void)
503 : {
504 826625 : int i, c_mask;
505 :
506 : /* If there are no caller-saved registers, preserve all registers.
507 : except fixed_regs and registers used for function return value
508 : since aggregate_value_p checks call_used_regs[regno] on return
509 : value. */
510 826625 : if (cfun
511 64165 : && (cfun->machine->call_saved_registers
512 64165 : == TYPE_NO_CALLER_SAVED_REGISTERS))
513 407247 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
514 402868 : if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
515 363035 : call_used_regs[i] = 0;
516 :
517 : /* For 32-bit targets, disable the REX registers. */
518 826625 : if (! TARGET_64BIT)
519 : {
520 134622 : for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
521 119664 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
522 134622 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
523 119664 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
524 254286 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
525 239328 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
526 : }
527 :
528 : /* See the definition of CALL_USED_REGISTERS in i386.h. */
529 826625 : c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
530 :
531 826625 : CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
532 :
533 76876125 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
534 : {
535 : /* Set/reset conditionally defined registers from
536 : CALL_USED_REGISTERS initializer. */
537 76049500 : if (call_used_regs[i] > 1)
538 13164639 : call_used_regs[i] = !!(call_used_regs[i] & c_mask);
539 :
540 : /* Calculate registers of CLOBBERED_REGS register set
541 : as call used registers from GENERAL_REGS register set. */
542 76049500 : if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
543 76049500 : && call_used_regs[i])
544 23045237 : SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
545 : }
546 :
547 : /* If MMX is disabled, disable the registers. */
548 826625 : if (! TARGET_MMX)
549 399330 : accessible_reg_set &= ~reg_class_contents[MMX_REGS];
550 :
551 : /* If SSE is disabled, disable the registers. */
552 826625 : if (! TARGET_SSE)
553 393352 : accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
554 :
555 : /* If the FPU is disabled, disable the registers. */
556 826625 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
557 394576 : accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
558 :
559 : /* If AVX512F is disabled, disable the registers. */
560 826625 : if (! TARGET_AVX512F)
561 : {
562 9959994 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
563 9374112 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
564 :
565 1171764 : accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
566 : }
567 :
568 : /* If APX is disabled, disable the registers. */
569 826625 : if (! (TARGET_APX_EGPR && TARGET_64BIT))
570 : {
571 14041218 : for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
572 13215264 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
573 : }
574 826625 : }
575 :
576 : /* Canonicalize a comparison from one we don't have to one we do have. */
577 :
578 : static void
579 24073177 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
580 : bool op0_preserve_value)
581 : {
582 : /* The order of operands in x87 ficom compare is forced by combine in
583 : simplify_comparison () function. Float operator is treated as RTX_OBJ
584 : with a precedence over other operators and is always put in the first
585 : place. Swap condition and operands to match ficom instruction. */
586 24073177 : if (!op0_preserve_value
587 23264143 : && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
588 : {
589 12 : enum rtx_code scode = swap_condition ((enum rtx_code) *code);
590 :
591 : /* We are called only for compares that are split to SAHF instruction.
592 : Ensure that we have setcc/jcc insn for the swapped condition. */
593 12 : if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
594 : {
595 6 : std::swap (*op0, *op1);
596 6 : *code = (int) scode;
597 6 : return;
598 : }
599 : }
600 :
601 : /* SUB (a, b) underflows precisely when a < b. Convert
602 : (compare (minus (a b)) a) to (compare (a b))
603 : to match *sub<mode>_3 pattern. */
604 23264137 : if (!op0_preserve_value
605 23264137 : && (*code == GTU || *code == LEU)
606 1806678 : && GET_CODE (*op0) == MINUS
607 80984 : && rtx_equal_p (XEXP (*op0, 0), *op1))
608 : {
609 488 : *op1 = XEXP (*op0, 1);
610 488 : *op0 = XEXP (*op0, 0);
611 488 : *code = (int) swap_condition ((enum rtx_code) *code);
612 488 : return;
613 : }
614 :
615 : /* Swap operands of GTU comparison to canonicalize
616 : addcarry/subborrow comparison. */
617 24072683 : if (!op0_preserve_value
618 23263649 : && *code == GTU
619 836233 : && GET_CODE (*op0) == PLUS
620 315054 : && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
621 43911 : && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
622 24112390 : && GET_CODE (*op1) == ZERO_EXTEND)
623 : {
624 36401 : std::swap (*op0, *op1);
625 36401 : *code = (int) swap_condition ((enum rtx_code) *code);
626 36401 : return;
627 : }
628 : }
629 :
630 : /* Hook to determine if one function can safely inline another. */
631 :
632 : static bool
633 9848945 : ix86_can_inline_p (tree caller, tree callee)
634 : {
635 9848945 : tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
636 9848945 : tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
637 :
638 : /* Changes of those flags can be tolerated for always inlines. Lets hope
639 : user knows what he is doing. */
640 9848945 : unsigned HOST_WIDE_INT always_inline_safe_mask
641 : = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
642 : | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
643 : | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
644 : | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
645 : | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
646 : | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
647 : | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
648 :
649 :
650 9848945 : if (!callee_tree)
651 9252447 : callee_tree = target_option_default_node;
652 9848945 : if (!caller_tree)
653 9252507 : caller_tree = target_option_default_node;
654 9848945 : if (callee_tree == caller_tree)
655 : return true;
656 :
657 5287 : struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
658 5287 : struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
659 5287 : bool ret = false;
660 5287 : bool always_inline
661 5287 : = (DECL_DISREGARD_INLINE_LIMITS (callee)
662 9932 : && lookup_attribute ("always_inline",
663 4645 : DECL_ATTRIBUTES (callee)));
664 :
665 : /* If callee only uses GPRs, ignore MASK_80387. */
666 5287 : if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
667 1028 : always_inline_safe_mask |= MASK_80387;
668 :
669 5287 : cgraph_node *callee_node = cgraph_node::get (callee);
670 : /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
671 : function can inline a SSE2 function but a SSE2 function can't inline
672 : a SSE4 function. */
673 5287 : if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
674 : != callee_opts->x_ix86_isa_flags)
675 5054 : || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
676 : != callee_opts->x_ix86_isa_flags2))
677 : ret = false;
678 :
679 : /* See if we have the same non-isa options. */
680 5017 : else if ((!always_inline
681 388 : && caller_opts->x_target_flags != callee_opts->x_target_flags)
682 4973 : || (caller_opts->x_target_flags & ~always_inline_safe_mask)
683 4973 : != (callee_opts->x_target_flags & ~always_inline_safe_mask))
684 : ret = false;
685 :
686 4973 : else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
687 : /* If the calle doesn't use FP expressions differences in
688 : ix86_fpmath can be ignored. We are called from FEs
689 : for multi-versioning call optimization, so beware of
690 : ipa_fn_summaries not available. */
691 1245 : && (! ipa_fn_summaries
692 1245 : || ipa_fn_summaries->get (callee_node) == NULL
693 1245 : || ipa_fn_summaries->get (callee_node)->fp_expressions))
694 : ret = false;
695 :
696 : /* At this point we cannot identify whether arch or tune setting
697 : comes from target attribute or not. So the most conservative way
698 : is to allow the callee that uses default arch and tune string to
699 : be inlined. */
700 4699 : else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
701 1428 : && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
702 : ret = true;
703 :
704 : /* See if arch, tune, etc. are the same. As previous ISA flags already
705 : checks if callee's ISA is subset of caller's, do not block
706 : always_inline attribute for callee even it has different arch. */
707 3279 : else if (!always_inline && caller_opts->arch != callee_opts->arch)
708 : ret = false;
709 :
710 15 : else if (!always_inline && caller_opts->tune != callee_opts->tune)
711 : ret = false;
712 :
713 3279 : else if (!always_inline
714 15 : && caller_opts->branch_cost != callee_opts->branch_cost)
715 : ret = false;
716 :
717 : else
718 9848357 : ret = true;
719 :
720 : return ret;
721 : }
722 :
723 : /* Return true if this goes in large data/bss. */
724 :
725 : static bool
726 80664491 : ix86_in_large_data_p (tree exp)
727 : {
728 80664491 : if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
729 80664253 : && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
730 : return false;
731 :
732 1147 : if (exp == NULL_TREE)
733 : return false;
734 :
735 : /* Functions are never large data. */
736 1147 : if (TREE_CODE (exp) == FUNCTION_DECL)
737 : return false;
738 :
739 : /* Automatic variables are never large data. */
740 279 : if (VAR_P (exp) && !is_global_var (exp))
741 : return false;
742 :
743 279 : if (VAR_P (exp) && DECL_SECTION_NAME (exp))
744 : {
745 51 : const char *section = DECL_SECTION_NAME (exp);
746 51 : if (strcmp (section, ".ldata") == 0
747 51 : || startswith (section, ".ldata.")
748 51 : || strcmp (section, ".lbss") == 0
749 51 : || startswith (section, ".lbss.")
750 99 : || startswith (section, ".gnu.linkonce.lb."))
751 : return true;
752 : return false;
753 : }
754 : else
755 : {
756 228 : HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
757 :
758 : /* If this is an incomplete type with size 0, then we can't put it
759 : in data because it might be too big when completed. Also,
760 : int_size_in_bytes returns -1 if size can vary or is larger than
761 : an integer in which case also it is safer to assume that it goes in
762 : large data. */
763 228 : if (size <= 0 || size > ix86_section_threshold)
764 : return true;
765 : }
766 :
767 : return false;
768 : }
769 :
770 : /* i386-specific section flag to mark large sections. */
771 : #define SECTION_LARGE SECTION_MACH_DEP
772 :
773 : /* Switch to the appropriate section for output of DECL.
774 : DECL is either a `VAR_DECL' node or a constant of some sort.
775 : RELOC indicates whether forming the initial value of DECL requires
776 : link-time relocations. */
777 :
778 : ATTRIBUTE_UNUSED static section *
779 1658313 : x86_64_elf_select_section (tree decl, int reloc,
780 : unsigned HOST_WIDE_INT align)
781 : {
782 1658313 : if (ix86_in_large_data_p (decl))
783 : {
784 6 : const char *sname = NULL;
785 6 : unsigned int flags = SECTION_WRITE | SECTION_LARGE;
786 6 : switch (categorize_decl_for_section (decl, reloc))
787 : {
788 1 : case SECCAT_DATA:
789 1 : sname = ".ldata";
790 1 : break;
791 0 : case SECCAT_DATA_REL:
792 0 : sname = ".ldata.rel";
793 0 : break;
794 0 : case SECCAT_DATA_REL_LOCAL:
795 0 : sname = ".ldata.rel.local";
796 0 : break;
797 0 : case SECCAT_DATA_REL_RO:
798 0 : sname = ".ldata.rel.ro";
799 0 : break;
800 0 : case SECCAT_DATA_REL_RO_LOCAL:
801 0 : sname = ".ldata.rel.ro.local";
802 0 : break;
803 0 : case SECCAT_BSS:
804 0 : sname = ".lbss";
805 0 : flags |= SECTION_BSS;
806 0 : break;
807 : case SECCAT_RODATA:
808 : case SECCAT_RODATA_MERGE_STR:
809 : case SECCAT_RODATA_MERGE_STR_INIT:
810 : case SECCAT_RODATA_MERGE_CONST:
811 : sname = ".lrodata";
812 : flags &= ~SECTION_WRITE;
813 : break;
814 0 : case SECCAT_SRODATA:
815 0 : case SECCAT_SDATA:
816 0 : case SECCAT_SBSS:
817 0 : gcc_unreachable ();
818 : case SECCAT_TEXT:
819 : case SECCAT_TDATA:
820 : case SECCAT_TBSS:
821 : /* We don't split these for medium model. Place them into
822 : default sections and hope for best. */
823 : break;
824 : }
825 1 : if (sname)
826 : {
827 : /* We might get called with string constants, but get_named_section
828 : doesn't like them as they are not DECLs. Also, we need to set
829 : flags in that case. */
830 6 : if (!DECL_P (decl))
831 3 : return get_section (sname, flags, NULL);
832 3 : return get_named_section (decl, sname, reloc);
833 : }
834 : }
835 1658307 : return default_elf_select_section (decl, reloc, align);
836 : }
837 :
838 : /* Select a set of attributes for section NAME based on the properties
839 : of DECL and whether or not RELOC indicates that DECL's initializer
840 : might contain runtime relocations. */
841 :
842 : static unsigned int ATTRIBUTE_UNUSED
843 67294513 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
844 : {
845 67294513 : unsigned int flags = default_section_type_flags (decl, name, reloc);
846 :
847 67294513 : if (ix86_in_large_data_p (decl))
848 10 : flags |= SECTION_LARGE;
849 :
850 67294513 : if (decl == NULL_TREE
851 375 : && (strcmp (name, ".ldata.rel.ro") == 0
852 375 : || strcmp (name, ".ldata.rel.ro.local") == 0))
853 0 : flags |= SECTION_RELRO;
854 :
855 67294513 : if (strcmp (name, ".lbss") == 0
856 67294509 : || startswith (name, ".lbss.")
857 134589019 : || startswith (name, ".gnu.linkonce.lb."))
858 : {
859 7 : flags |= SECTION_BSS;
860 : /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
861 : default_elf_asm_named_section. */
862 7 : flags &= ~SECTION_NOTYPE;
863 : }
864 :
865 67294513 : return flags;
866 : }
867 :
868 : /* Build up a unique section name, expressed as a
869 : STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
870 : RELOC indicates whether the initial value of EXP requires
871 : link-time relocations. */
872 :
873 : static void ATTRIBUTE_UNUSED
874 1802095 : x86_64_elf_unique_section (tree decl, int reloc)
875 : {
876 1802095 : if (ix86_in_large_data_p (decl))
877 : {
878 3 : const char *prefix = NULL;
879 : /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
880 3 : bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
881 :
882 3 : switch (categorize_decl_for_section (decl, reloc))
883 : {
884 0 : case SECCAT_DATA:
885 0 : case SECCAT_DATA_REL:
886 0 : case SECCAT_DATA_REL_LOCAL:
887 0 : case SECCAT_DATA_REL_RO:
888 0 : case SECCAT_DATA_REL_RO_LOCAL:
889 0 : prefix = one_only ? ".ld" : ".ldata";
890 : break;
891 3 : case SECCAT_BSS:
892 3 : prefix = one_only ? ".lb" : ".lbss";
893 : break;
894 : case SECCAT_RODATA:
895 : case SECCAT_RODATA_MERGE_STR:
896 : case SECCAT_RODATA_MERGE_STR_INIT:
897 : case SECCAT_RODATA_MERGE_CONST:
898 : prefix = one_only ? ".lr" : ".lrodata";
899 : break;
900 0 : case SECCAT_SRODATA:
901 0 : case SECCAT_SDATA:
902 0 : case SECCAT_SBSS:
903 0 : gcc_unreachable ();
904 : case SECCAT_TEXT:
905 : case SECCAT_TDATA:
906 : case SECCAT_TBSS:
907 : /* We don't split these for medium model. Place them into
908 : default sections and hope for best. */
909 : break;
910 : }
911 3 : if (prefix)
912 : {
913 3 : const char *name, *linkonce;
914 3 : char *string;
915 :
916 3 : name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
917 3 : name = targetm.strip_name_encoding (name);
918 :
919 : /* If we're using one_only, then there needs to be a .gnu.linkonce
920 : prefix to the section name. */
921 3 : linkonce = one_only ? ".gnu.linkonce" : "";
922 :
923 3 : string = ACONCAT ((linkonce, prefix, ".", name, NULL));
924 :
925 3 : set_decl_section_name (decl, string);
926 3 : return;
927 : }
928 : }
929 1802092 : default_unique_section (decl, reloc);
930 : }
931 :
932 : /* Return true if TYPE has no_callee_saved_registers or preserve_none
933 : attribute. */
934 :
935 : bool
936 7532982 : ix86_type_no_callee_saved_registers_p (const_tree type)
937 : {
938 15065964 : return (lookup_attribute ("no_callee_saved_registers",
939 7532982 : TYPE_ATTRIBUTES (type)) != NULL
940 15065833 : || lookup_attribute ("preserve_none",
941 7532851 : TYPE_ATTRIBUTES (type)) != NULL);
942 : }
943 :
944 : #ifdef COMMON_ASM_OP
945 :
946 : #ifndef LARGECOMM_SECTION_ASM_OP
947 : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
948 : #endif
949 :
950 : /* This says how to output assembler code to declare an
951 : uninitialized external linkage data object.
952 :
953 : For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
954 : large objects. */
955 : void
956 170828 : x86_elf_aligned_decl_common (FILE *file, tree decl,
957 : const char *name, unsigned HOST_WIDE_INT size,
958 : unsigned align)
959 : {
960 170828 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
961 170822 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
962 7 : && size > (unsigned int)ix86_section_threshold)
963 : {
964 1 : switch_to_section (get_named_section (decl, ".lbss", 0));
965 1 : fputs (LARGECOMM_SECTION_ASM_OP, file);
966 : }
967 : else
968 170827 : fputs (COMMON_ASM_OP, file);
969 170828 : assemble_name (file, name);
970 170828 : fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
971 : size, align / BITS_PER_UNIT);
972 170828 : }
973 : #endif
974 :
975 : /* Utility function for targets to use in implementing
976 : ASM_OUTPUT_ALIGNED_BSS. */
977 :
978 : void
979 767737 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
980 : unsigned HOST_WIDE_INT size, unsigned align)
981 : {
982 767737 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
983 767727 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
984 42 : && size > (unsigned int)ix86_section_threshold)
985 3 : switch_to_section (get_named_section (decl, ".lbss", 0));
986 : else
987 767734 : switch_to_section (bss_section);
988 921743 : ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
989 : #ifdef ASM_DECLARE_OBJECT_NAME
990 767737 : last_assemble_variable_decl = decl;
991 767737 : ASM_DECLARE_OBJECT_NAME (file, name, decl);
992 : #else
993 : /* Standard thing is just output label for the object. */
994 : ASM_OUTPUT_LABEL (file, name);
995 : #endif /* ASM_DECLARE_OBJECT_NAME */
996 767737 : ASM_OUTPUT_SKIP (file, size ? size : 1);
997 767737 : }
998 :
999 : /* Decide whether we must probe the stack before any space allocation
1000 : on this target. It's essentially TARGET_STACK_PROBE except when
1001 : -fstack-check causes the stack to be already probed differently. */
1002 :
1003 : bool
1004 869089 : ix86_target_stack_probe (void)
1005 : {
1006 : /* Do not probe the stack twice if static stack checking is enabled. */
1007 869089 : if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
1008 : return false;
1009 :
1010 869089 : return TARGET_STACK_PROBE;
1011 : }
1012 :
1013 : /* Decide whether we can make a sibling call to a function. DECL is the
1014 : declaration of the function being targeted by the call and EXP is the
1015 : CALL_EXPR representing the call. */
1016 :
1017 : static bool
1018 138975 : ix86_function_ok_for_sibcall (tree decl, tree exp)
1019 : {
1020 138975 : tree type, decl_or_type;
1021 138975 : rtx a, b;
1022 138975 : bool bind_global = decl && !targetm.binds_local_p (decl);
1023 :
1024 138975 : if (ix86_function_naked (current_function_decl))
1025 : return false;
1026 :
1027 : /* Sibling call isn't OK if there are no caller-saved registers
1028 : since all registers must be preserved before return. */
1029 138973 : if (cfun->machine->call_saved_registers
1030 138973 : == TYPE_NO_CALLER_SAVED_REGISTERS)
1031 : return false;
1032 :
1033 : /* If we are generating position-independent code, we cannot sibcall
1034 : optimize direct calls to global functions, as the PLT requires
1035 : %ebx be live. (Darwin does not have a PLT.) */
1036 138944 : if (!TARGET_MACHO
1037 138944 : && !TARGET_64BIT
1038 11329 : && flag_pic
1039 8405 : && flag_plt
1040 8405 : && bind_global)
1041 : return false;
1042 :
1043 : /* If we need to align the outgoing stack, then sibcalling would
1044 : unalign the stack, which may break the called function. */
1045 134298 : if (ix86_minimum_incoming_stack_boundary (true)
1046 134298 : < PREFERRED_STACK_BOUNDARY)
1047 : return false;
1048 :
1049 133717 : if (decl)
1050 : {
1051 122807 : decl_or_type = decl;
1052 122807 : type = TREE_TYPE (decl);
1053 : }
1054 : else
1055 : {
1056 : /* We're looking at the CALL_EXPR, we need the type of the function. */
1057 10910 : type = CALL_EXPR_FN (exp); /* pointer expression */
1058 10910 : type = TREE_TYPE (type); /* pointer type */
1059 10910 : type = TREE_TYPE (type); /* function type */
1060 10910 : decl_or_type = type;
1061 : }
1062 :
1063 : /* Sibling call isn't OK if callee has no callee-saved registers
1064 : and the calling function has callee-saved registers. */
1065 133717 : if ((cfun->machine->call_saved_registers
1066 133717 : != TYPE_NO_CALLEE_SAVED_REGISTERS)
1067 133717 : && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
1068 133717 : && ix86_type_no_callee_saved_registers_p (type))
1069 : return false;
1070 :
1071 : /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1072 133701 : if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1073 133701 : != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1074 266655 : || (REG_PARM_STACK_SPACE (decl_or_type)
1075 132954 : != REG_PARM_STACK_SPACE (current_function_decl)))
1076 : {
1077 747 : maybe_complain_about_tail_call (exp,
1078 : "inconsistent size of stack space"
1079 : " allocated for arguments which are"
1080 : " passed in registers");
1081 747 : return false;
1082 : }
1083 :
1084 : /* Check that the return value locations are the same. Like
1085 : if we are returning floats on the 80387 register stack, we cannot
1086 : make a sibcall from a function that doesn't return a float to a
1087 : function that does or, conversely, from a function that does return
1088 : a float to a function that doesn't; the necessary stack adjustment
1089 : would not be executed. This is also the place we notice
1090 : differences in the return value ABI. Note that it is ok for one
1091 : of the functions to have void return type as long as the return
1092 : value of the other is passed in a register. */
1093 132954 : a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1094 132954 : b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1095 132954 : cfun->decl, false);
1096 132954 : if (STACK_REG_P (a) || STACK_REG_P (b))
1097 : {
1098 1020 : if (!rtx_equal_p (a, b))
1099 : return false;
1100 : }
1101 131934 : else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1102 : ;
1103 24852 : else if (!rtx_equal_p (a, b))
1104 : return false;
1105 :
1106 132573 : if (TARGET_64BIT)
1107 : {
1108 : /* The SYSV ABI has more call-clobbered registers;
1109 : disallow sibcalls from MS to SYSV. */
1110 125890 : if (cfun->machine->call_abi == MS_ABI
1111 125890 : && ix86_function_type_abi (type) == SYSV_ABI)
1112 : return false;
1113 : }
1114 : else
1115 : {
1116 : /* If this call is indirect, we'll need to be able to use a
1117 : call-clobbered register for the address of the target function.
1118 : Make sure that all such registers are not used for passing
1119 : parameters. Note that DLLIMPORT functions and call to global
1120 : function via GOT slot are indirect. */
1121 6683 : if (!decl
1122 4768 : || (bind_global && flag_pic && !flag_plt)
1123 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1124 4768 : || flag_force_indirect_call)
1125 : {
1126 : /* Check if regparm >= 3 since arg_reg_available is set to
1127 : false if regparm == 0. If regparm is 1 or 2, there is
1128 : always a call-clobbered register available.
1129 :
1130 : ??? The symbol indirect call doesn't need a call-clobbered
1131 : register. But we don't know if this is a symbol indirect
1132 : call or not here. */
1133 1915 : if (ix86_function_regparm (type, decl) >= 3
1134 1915 : && !cfun->machine->arg_reg_available)
1135 : return false;
1136 : }
1137 : }
1138 :
1139 132573 : if (decl && ix86_use_pseudo_pic_reg ())
1140 : {
1141 : /* When PIC register is used, it must be restored after ifunc
1142 : function returns. */
1143 2059 : cgraph_node *node = cgraph_node::get (decl);
1144 2059 : if (node && node->ifunc_resolver)
1145 : return false;
1146 : }
1147 :
1148 : /* Disable sibcall if callee has indirect_return attribute and
1149 : caller doesn't since callee will return to the caller's caller
1150 : via an indirect jump. */
1151 132573 : if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1152 : == (CF_RETURN | CF_BRANCH))
1153 53690 : && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1154 132577 : && !lookup_attribute ("indirect_return",
1155 4 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1156 : return false;
1157 :
1158 : /* Otherwise okay. That also includes certain types of indirect calls. */
1159 : return true;
1160 : }
1161 :
1162 : /* This function determines from TYPE the calling-convention. */
1163 :
1164 : unsigned int
1165 6196930 : ix86_get_callcvt (const_tree type)
1166 : {
1167 6196930 : unsigned int ret = 0;
1168 6196930 : bool is_stdarg;
1169 6196930 : tree attrs;
1170 :
1171 6196930 : if (TARGET_64BIT)
1172 : return IX86_CALLCVT_CDECL;
1173 :
1174 3269720 : attrs = TYPE_ATTRIBUTES (type);
1175 3269720 : if (attrs != NULL_TREE)
1176 : {
1177 67147 : if (lookup_attribute ("cdecl", attrs))
1178 : ret |= IX86_CALLCVT_CDECL;
1179 67147 : else if (lookup_attribute ("stdcall", attrs))
1180 : ret |= IX86_CALLCVT_STDCALL;
1181 67147 : else if (lookup_attribute ("fastcall", attrs))
1182 : ret |= IX86_CALLCVT_FASTCALL;
1183 67138 : else if (lookup_attribute ("thiscall", attrs))
1184 : ret |= IX86_CALLCVT_THISCALL;
1185 :
1186 : /* Regparam isn't allowed for thiscall and fastcall. */
1187 : if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1188 : {
1189 67138 : if (lookup_attribute ("regparm", attrs))
1190 15828 : ret |= IX86_CALLCVT_REGPARM;
1191 67138 : if (lookup_attribute ("sseregparm", attrs))
1192 0 : ret |= IX86_CALLCVT_SSEREGPARM;
1193 : }
1194 :
1195 67147 : if (IX86_BASE_CALLCVT(ret) != 0)
1196 9 : return ret;
1197 : }
1198 :
1199 3269711 : is_stdarg = stdarg_p (type);
1200 3269711 : if (TARGET_RTD && !is_stdarg)
1201 0 : return IX86_CALLCVT_STDCALL | ret;
1202 :
1203 3269711 : if (ret != 0
1204 3269711 : || is_stdarg
1205 3244885 : || TREE_CODE (type) != METHOD_TYPE
1206 3406704 : || ix86_function_type_abi (type) != MS_ABI)
1207 3269711 : return IX86_CALLCVT_CDECL | ret;
1208 :
1209 : return IX86_CALLCVT_THISCALL;
1210 : }
1211 :
1212 : /* Return 0 if the attributes for two types are incompatible, 1 if they
1213 : are compatible, and 2 if they are nearly compatible (which causes a
1214 : warning to be generated). */
1215 :
1216 : static int
1217 1481426 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
1218 : {
1219 1481426 : unsigned int ccvt1, ccvt2;
1220 :
1221 1481426 : if (TREE_CODE (type1) != FUNCTION_TYPE
1222 1481426 : && TREE_CODE (type1) != METHOD_TYPE)
1223 : return 1;
1224 :
1225 1474729 : ccvt1 = ix86_get_callcvt (type1);
1226 1474729 : ccvt2 = ix86_get_callcvt (type2);
1227 1474729 : if (ccvt1 != ccvt2)
1228 : return 0;
1229 2927330 : if (ix86_function_regparm (type1, NULL)
1230 1463665 : != ix86_function_regparm (type2, NULL))
1231 : return 0;
1232 :
1233 1425904 : if (ix86_type_no_callee_saved_registers_p (type1)
1234 712952 : != ix86_type_no_callee_saved_registers_p (type2))
1235 : return 0;
1236 :
1237 : /* preserve_none attribute uses a different calling convention is
1238 : only for 64-bit. */
1239 712826 : if (TARGET_64BIT
1240 1425592 : && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
1241 712766 : != lookup_attribute ("preserve_none",
1242 712766 : TYPE_ATTRIBUTES (type2))))
1243 : return 0;
1244 :
1245 : return 1;
1246 : }
1247 :
1248 : /* Return the regparm value for a function with the indicated TYPE and DECL.
1249 : DECL may be NULL when calling function indirectly
1250 : or considering a libcall. */
1251 :
1252 : static int
1253 4198840 : ix86_function_regparm (const_tree type, const_tree decl)
1254 : {
1255 4198840 : tree attr;
1256 4198840 : int regparm;
1257 4198840 : unsigned int ccvt;
1258 :
1259 4198840 : if (TARGET_64BIT)
1260 2927210 : return (ix86_function_type_abi (type) == SYSV_ABI
1261 2927210 : ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1262 1271630 : ccvt = ix86_get_callcvt (type);
1263 1271630 : regparm = ix86_regparm;
1264 :
1265 1271630 : if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1266 : {
1267 2020 : attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1268 2020 : if (attr)
1269 : {
1270 2020 : regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1271 2020 : return regparm;
1272 : }
1273 : }
1274 1269610 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1275 : return 2;
1276 1269610 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1277 : return 1;
1278 :
1279 : /* Use register calling convention for local functions when possible. */
1280 1269610 : if (decl
1281 1205476 : && TREE_CODE (decl) == FUNCTION_DECL)
1282 : {
1283 1195381 : cgraph_node *target = cgraph_node::get (decl);
1284 1195381 : if (target)
1285 1187865 : target = target->function_symbol ();
1286 :
1287 : /* Caller and callee must agree on the calling convention, so
1288 : checking here just optimize means that with
1289 : __attribute__((optimize (...))) caller could use regparm convention
1290 : and callee not, or vice versa. Instead look at whether the callee
1291 : is optimized or not. */
1292 1187865 : if (target && opt_for_fn (target->decl, optimize)
1293 2374838 : && !(profile_flag && !flag_fentry))
1294 : {
1295 1186973 : if (target->local && target->can_change_signature)
1296 : {
1297 140024 : int local_regparm, globals = 0, regno;
1298 :
1299 : /* Make sure no regparm register is taken by a
1300 : fixed register variable. */
1301 140024 : for (local_regparm = 0; local_regparm < REGPARM_MAX;
1302 : local_regparm++)
1303 105018 : if (fixed_regs[local_regparm])
1304 : break;
1305 :
1306 : /* We don't want to use regparm(3) for nested functions as
1307 : these use a static chain pointer in the third argument. */
1308 35006 : if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1309 : local_regparm = 2;
1310 :
1311 : /* Save a register for the split stack. */
1312 35006 : if (flag_split_stack)
1313 : {
1314 20760 : if (local_regparm == 3)
1315 : local_regparm = 2;
1316 707 : else if (local_regparm == 2
1317 707 : && DECL_STATIC_CHAIN (target->decl))
1318 : local_regparm = 1;
1319 : }
1320 :
1321 : /* Each fixed register usage increases register pressure,
1322 : so less registers should be used for argument passing.
1323 : This functionality can be overriden by an explicit
1324 : regparm value. */
1325 245042 : for (regno = AX_REG; regno <= DI_REG; regno++)
1326 210036 : if (fixed_regs[regno])
1327 0 : globals++;
1328 :
1329 35006 : local_regparm
1330 35006 : = globals < local_regparm ? local_regparm - globals : 0;
1331 :
1332 35006 : if (local_regparm > regparm)
1333 4198840 : regparm = local_regparm;
1334 : }
1335 : }
1336 : }
1337 :
1338 : return regparm;
1339 : }
1340 :
1341 : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1342 : DFmode (2) arguments in SSE registers for a function with the
1343 : indicated TYPE and DECL. DECL may be NULL when calling function
1344 : indirectly or considering a libcall. Return -1 if any FP parameter
1345 : should be rejected by error. This is used in siutation we imply SSE
1346 : calling convetion but the function is called from another function with
1347 : SSE disabled. Otherwise return 0. */
1348 :
1349 : static int
1350 1077243 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1351 : {
1352 1077243 : gcc_assert (!TARGET_64BIT);
1353 :
1354 : /* Use SSE registers to pass SFmode and DFmode arguments if requested
1355 : by the sseregparm attribute. */
1356 1077243 : if (TARGET_SSEREGPARM
1357 1077243 : || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1358 : {
1359 0 : if (!TARGET_SSE)
1360 : {
1361 0 : if (warn)
1362 : {
1363 0 : if (decl)
1364 0 : error ("calling %qD with attribute sseregparm without "
1365 : "SSE/SSE2 enabled", decl);
1366 : else
1367 0 : error ("calling %qT with attribute sseregparm without "
1368 : "SSE/SSE2 enabled", type);
1369 : }
1370 0 : return 0;
1371 : }
1372 :
1373 : return 2;
1374 : }
1375 :
1376 1077243 : if (!decl)
1377 : return 0;
1378 :
1379 978874 : cgraph_node *target = cgraph_node::get (decl);
1380 978874 : if (target)
1381 971365 : target = target->function_symbol ();
1382 :
1383 : /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1384 : (and DFmode for SSE2) arguments in SSE registers. */
1385 971365 : if (target
1386 : /* TARGET_SSE_MATH */
1387 971365 : && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1388 1296 : && opt_for_fn (target->decl, optimize)
1389 972661 : && !(profile_flag && !flag_fentry))
1390 : {
1391 1296 : if (target->local && target->can_change_signature)
1392 : {
1393 : /* Refuse to produce wrong code when local function with SSE enabled
1394 : is called from SSE disabled function.
1395 : FIXME: We need a way to detect these cases cross-ltrans partition
1396 : and avoid using SSE calling conventions on local functions called
1397 : from function with SSE disabled. For now at least delay the
1398 : warning until we know we are going to produce wrong code.
1399 : See PR66047 */
1400 0 : if (!TARGET_SSE && warn)
1401 : return -1;
1402 0 : return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1403 0 : ->x_ix86_isa_flags) ? 2 : 1;
1404 : }
1405 : }
1406 :
1407 : return 0;
1408 : }
1409 :
1410 : /* Return true if EAX is live at the start of the function. Used by
1411 : ix86_expand_prologue to determine if we need special help before
1412 : calling allocate_stack_worker. */
1413 :
1414 : static bool
1415 7090 : ix86_eax_live_at_start_p (void)
1416 : {
1417 : /* Cheat. Don't bother working forward from ix86_function_regparm
1418 : to the function type to whether an actual argument is located in
1419 : eax. Instead just look at cfg info, which is still close enough
1420 : to correct at this point. This gives false positives for broken
1421 : functions that might use uninitialized data that happens to be
1422 : allocated in eax, but who cares? */
1423 7090 : return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1424 : }
1425 :
1426 : static bool
1427 159954 : ix86_keep_aggregate_return_pointer (tree fntype)
1428 : {
1429 159954 : tree attr;
1430 :
1431 159954 : if (!TARGET_64BIT)
1432 : {
1433 159954 : attr = lookup_attribute ("callee_pop_aggregate_return",
1434 159954 : TYPE_ATTRIBUTES (fntype));
1435 159954 : if (attr)
1436 0 : return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1437 :
1438 : /* For 32-bit MS-ABI the default is to keep aggregate
1439 : return pointer. */
1440 159954 : if (ix86_function_type_abi (fntype) == MS_ABI)
1441 : return true;
1442 : }
1443 : return KEEP_AGGREGATE_RETURN_POINTER != 0;
1444 : }
1445 :
1446 : /* Value is the number of bytes of arguments automatically
1447 : popped when returning from a subroutine call.
1448 : FUNDECL is the declaration node of the function (as a tree),
1449 : FUNTYPE is the data type of the function (as a tree),
1450 : or for a library call it is an identifier node for the subroutine name.
1451 : SIZE is the number of bytes of arguments passed on the stack.
1452 :
1453 : On the 80386, the RTD insn may be used to pop them if the number
1454 : of args is fixed, but if the number is variable then the caller
1455 : must pop them all. RTD can't be used for library calls now
1456 : because the library is compiled with the Unix compiler.
1457 : Use of RTD is a selectable option, since it is incompatible with
1458 : standard Unix calling sequences. If the option is not selected,
1459 : the caller must always pop the args.
1460 :
1461 : The attribute stdcall is equivalent to RTD on a per module basis. */
1462 :
1463 : static poly_int64
1464 7582995 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1465 : {
1466 7582995 : unsigned int ccvt;
1467 :
1468 : /* None of the 64-bit ABIs pop arguments. */
1469 7582995 : if (TARGET_64BIT)
1470 6709772 : return 0;
1471 :
1472 873223 : ccvt = ix86_get_callcvt (funtype);
1473 :
1474 873223 : if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1475 : | IX86_CALLCVT_THISCALL)) != 0
1476 873223 : && ! stdarg_p (funtype))
1477 3 : return size;
1478 :
1479 : /* Lose any fake structure return argument if it is passed on the stack. */
1480 873220 : if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1481 873220 : && !ix86_keep_aggregate_return_pointer (funtype))
1482 : {
1483 159954 : int nregs = ix86_function_regparm (funtype, fundecl);
1484 159954 : if (nregs == 0)
1485 458907 : return GET_MODE_SIZE (Pmode);
1486 : }
1487 :
1488 720251 : return 0;
1489 : }
1490 :
1491 : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1492 :
1493 : static bool
1494 10029983 : ix86_legitimate_combined_insn (rtx_insn *insn)
1495 : {
1496 10029983 : int i;
1497 :
1498 : /* Check operand constraints in case hard registers were propagated
1499 : into insn pattern. This check prevents combine pass from
1500 : generating insn patterns with invalid hard register operands.
1501 : These invalid insns can eventually confuse reload to error out
1502 : with a spill failure. See also PRs 46829 and 46843. */
1503 :
1504 10029983 : gcc_assert (INSN_CODE (insn) >= 0);
1505 :
1506 10029983 : extract_insn (insn);
1507 10029983 : preprocess_constraints (insn);
1508 :
1509 10029983 : int n_operands = recog_data.n_operands;
1510 10029983 : int n_alternatives = recog_data.n_alternatives;
1511 34249944 : for (i = 0; i < n_operands; i++)
1512 : {
1513 24223486 : rtx op = recog_data.operand[i];
1514 24223486 : machine_mode mode = GET_MODE (op);
1515 24223486 : const operand_alternative *op_alt;
1516 24223486 : int offset = 0;
1517 24223486 : bool win;
1518 24223486 : int j;
1519 :
1520 : /* A unary operator may be accepted by the predicate, but it
1521 : is irrelevant for matching constraints. */
1522 24223486 : if (UNARY_P (op))
1523 48612 : op = XEXP (op, 0);
1524 :
1525 24223486 : if (SUBREG_P (op))
1526 : {
1527 876585 : if (REG_P (SUBREG_REG (op))
1528 876585 : && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1529 55 : offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1530 55 : GET_MODE (SUBREG_REG (op)),
1531 55 : SUBREG_BYTE (op),
1532 55 : GET_MODE (op));
1533 876585 : op = SUBREG_REG (op);
1534 : }
1535 :
1536 24223486 : if (!(REG_P (op) && HARD_REGISTER_P (op)))
1537 23919306 : continue;
1538 :
1539 304180 : op_alt = recog_op_alt;
1540 :
1541 : /* Operand has no constraints, anything is OK. */
1542 304180 : win = !n_alternatives;
1543 :
1544 304180 : alternative_mask preferred = get_preferred_alternatives (insn);
1545 833301 : for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1546 : {
1547 525517 : if (!TEST_BIT (preferred, j))
1548 137151 : continue;
1549 388366 : if (op_alt[i].anything_ok
1550 202847 : || (op_alt[i].matches != -1
1551 33822 : && operands_match_p
1552 33822 : (recog_data.operand[i],
1553 33822 : recog_data.operand[op_alt[i].matches]))
1554 587101 : || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1555 : {
1556 : win = true;
1557 : break;
1558 : }
1559 : }
1560 :
1561 304180 : if (!win)
1562 : return false;
1563 : }
1564 :
1565 : return true;
1566 : }
1567 :
1568 : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1569 :
1570 : static unsigned HOST_WIDE_INT
1571 4616 : ix86_asan_shadow_offset (void)
1572 : {
1573 4616 : return SUBTARGET_SHADOW_OFFSET;
1574 : }
1575 :
1576 : /* Argument support functions. */
1577 :
1578 : /* Return true when register may be used to pass function parameters. */
1579 : bool
1580 1479502617 : ix86_function_arg_regno_p (int regno)
1581 : {
1582 1479502617 : int i;
1583 1479502617 : enum calling_abi call_abi;
1584 1479502617 : const int *parm_regs;
1585 :
1586 1476056754 : if (TARGET_SSE && SSE_REGNO_P (regno)
1587 2446768039 : && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1588 : return true;
1589 :
1590 1360045431 : if (!TARGET_64BIT)
1591 129167428 : return (regno < REGPARM_MAX
1592 129167428 : || (TARGET_MMX && MMX_REGNO_P (regno)
1593 11609184 : && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1594 :
1595 : /* TODO: The function should depend on current function ABI but
1596 : builtins.cc would need updating then. Therefore we use the
1597 : default ABI. */
1598 1230878003 : call_abi = ix86_cfun_abi ();
1599 :
1600 : /* RAX is used as hidden argument to va_arg functions. */
1601 1230878003 : if (call_abi == SYSV_ABI && regno == AX_REG)
1602 : return true;
1603 :
1604 1216654770 : if (cfun
1605 1216654438 : && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
1606 : parm_regs = x86_64_preserve_none_int_parameter_registers;
1607 1216636510 : else if (call_abi == MS_ABI)
1608 : parm_regs = x86_64_ms_abi_int_parameter_registers;
1609 : else
1610 1180674222 : parm_regs = x86_64_int_parameter_registers;
1611 :
1612 16283290598 : for (i = 0; i < (call_abi == MS_ABI
1613 8141645299 : ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1614 7012011130 : if (regno == parm_regs[i])
1615 : return true;
1616 : return false;
1617 : }
1618 :
1619 : /* Return if we do not know how to pass ARG solely in registers. */
1620 :
1621 : static bool
1622 404046318 : ix86_must_pass_in_stack (const function_arg_info &arg)
1623 : {
1624 404046318 : if (must_pass_in_stack_var_size_or_pad (arg))
1625 : return true;
1626 :
1627 : /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1628 : The layout_type routine is crafty and tries to trick us into passing
1629 : currently unsupported vector types on the stack by using TImode. */
1630 1771411 : return (!TARGET_64BIT && arg.mode == TImode
1631 404046281 : && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1632 : }
1633 :
1634 : /* It returns the size, in bytes, of the area reserved for arguments passed
1635 : in registers for the function represented by fndecl dependent to the used
1636 : abi format. */
1637 : int
1638 10691520 : ix86_reg_parm_stack_space (const_tree fndecl)
1639 : {
1640 10691520 : enum calling_abi call_abi = SYSV_ABI;
1641 10691520 : if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1642 10380011 : call_abi = ix86_function_abi (fndecl);
1643 : else
1644 311509 : call_abi = ix86_function_type_abi (fndecl);
1645 10691520 : if (TARGET_64BIT && call_abi == MS_ABI)
1646 119238 : return 32;
1647 : return 0;
1648 : }
1649 :
1650 : /* We add this as a workaround in order to use libc_has_function
1651 : hook in i386.md. */
1652 : bool
1653 0 : ix86_libc_has_function (enum function_class fn_class)
1654 : {
1655 0 : return targetm.libc_has_function (fn_class, NULL_TREE);
1656 : }
1657 :
1658 : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1659 : specifying the call abi used. */
1660 : enum calling_abi
1661 439778280 : ix86_function_type_abi (const_tree fntype)
1662 : {
1663 439778280 : enum calling_abi abi = ix86_abi;
1664 :
1665 439778280 : if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1666 : return abi;
1667 :
1668 17603693 : if (abi == SYSV_ABI
1669 17603693 : && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1670 : {
1671 2606722 : static int warned;
1672 2606722 : if (TARGET_X32 && !warned)
1673 : {
1674 1 : error ("X32 does not support %<ms_abi%> attribute");
1675 1 : warned = 1;
1676 : }
1677 :
1678 : abi = MS_ABI;
1679 : }
1680 14996971 : else if (abi == MS_ABI
1681 14996971 : && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1682 : abi = SYSV_ABI;
1683 :
1684 : return abi;
1685 : }
1686 :
1687 : enum calling_abi
1688 217245494 : ix86_function_abi (const_tree fndecl)
1689 : {
1690 217245494 : return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1691 : }
1692 :
1693 : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1694 : specifying the call abi used. */
1695 : enum calling_abi
1696 2080594938 : ix86_cfun_abi (void)
1697 : {
1698 2080594938 : return cfun ? cfun->machine->call_abi : ix86_abi;
1699 : }
1700 :
1701 : bool
1702 5028361 : ix86_function_ms_hook_prologue (const_tree fn)
1703 : {
1704 5028361 : if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1705 : {
1706 8 : if (decl_function_context (fn) != NULL_TREE)
1707 0 : error_at (DECL_SOURCE_LOCATION (fn),
1708 : "%<ms_hook_prologue%> attribute is not compatible "
1709 : "with nested function");
1710 : else
1711 : return true;
1712 : }
1713 : return false;
1714 : }
1715 :
1716 : bool
1717 115107867 : ix86_function_naked (const_tree fn)
1718 : {
1719 115107867 : if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1720 : return true;
1721 :
1722 : return false;
1723 : }
1724 :
1725 : /* Write the extra assembler code needed to declare a function properly. */
1726 :
1727 : void
1728 1550370 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
1729 : tree decl)
1730 : {
1731 1550370 : bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1732 :
1733 1550370 : if (cfun)
1734 1546769 : cfun->machine->function_label_emitted = true;
1735 :
1736 1550370 : if (is_ms_hook)
1737 : {
1738 2 : int i, filler_count = (TARGET_64BIT ? 32 : 16);
1739 2 : unsigned int filler_cc = 0xcccccccc;
1740 :
1741 18 : for (i = 0; i < filler_count; i += 4)
1742 16 : fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1743 : }
1744 :
1745 : #ifdef SUBTARGET_ASM_UNWIND_INIT
1746 : SUBTARGET_ASM_UNWIND_INIT (out_file);
1747 : #endif
1748 :
1749 1550370 : assemble_function_label_raw (out_file, fname);
1750 :
1751 : /* Output magic byte marker, if hot-patch attribute is set. */
1752 1550370 : if (is_ms_hook)
1753 : {
1754 2 : if (TARGET_64BIT)
1755 : {
1756 : /* leaq [%rsp + 0], %rsp */
1757 2 : fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1758 : out_file);
1759 : }
1760 : else
1761 : {
1762 : /* movl.s %edi, %edi
1763 : push %ebp
1764 : movl.s %esp, %ebp */
1765 0 : fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1766 : }
1767 : }
1768 1550370 : }
1769 :
1770 : /* Output a user-defined label. In AT&T syntax, registers are prefixed
1771 : with %, so labels require no punctuation. In Intel syntax, registers
1772 : are unprefixed, so labels may clash with registers or other operators,
1773 : and require quoting. */
1774 : void
1775 35036178 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1776 : {
1777 35036178 : if (ASSEMBLER_DIALECT == ASM_ATT)
1778 35035078 : fprintf (file, "%s%s", prefix, label);
1779 : else
1780 1100 : fprintf (file, "\"%s%s\"", prefix, label);
1781 35036178 : }
1782 :
1783 : /* Implementation of call abi switching target hook. Specific to FNDECL
1784 : the specific call register sets are set. See also
1785 : ix86_conditional_register_usage for more details. */
1786 : void
1787 196749138 : ix86_call_abi_override (const_tree fndecl)
1788 : {
1789 196749138 : cfun->machine->call_abi = ix86_function_abi (fndecl);
1790 196749138 : }
1791 :
1792 : /* Return 1 if pseudo register should be created and used to hold
1793 : GOT address for PIC code. */
1794 : bool
1795 169930737 : ix86_use_pseudo_pic_reg (void)
1796 : {
1797 169930737 : if ((TARGET_64BIT
1798 158882977 : && (ix86_cmodel == CM_SMALL_PIC
1799 : || TARGET_PECOFF))
1800 164000143 : || !flag_pic)
1801 165131920 : return false;
1802 : return true;
1803 : }
1804 :
1805 : /* Initialize large model PIC register. */
1806 :
1807 : static void
1808 56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
1809 : {
1810 56 : rtx_code_label *label;
1811 56 : rtx tmp_reg;
1812 :
1813 56 : gcc_assert (Pmode == DImode);
1814 56 : label = gen_label_rtx ();
1815 56 : emit_label (label);
1816 56 : LABEL_PRESERVE_P (label) = 1;
1817 56 : tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1818 56 : gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1819 56 : emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1820 : label));
1821 56 : emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1822 56 : emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1823 56 : const char *name = LABEL_NAME (label);
1824 56 : PUT_CODE (label, NOTE);
1825 56 : NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1826 56 : NOTE_DELETED_LABEL_NAME (label) = name;
1827 56 : }
1828 :
1829 : /* Create and initialize PIC register if required. */
1830 : static void
1831 1480117 : ix86_init_pic_reg (void)
1832 : {
1833 1480117 : edge entry_edge;
1834 1480117 : rtx_insn *seq;
1835 :
1836 1480117 : if (!ix86_use_pseudo_pic_reg ())
1837 : return;
1838 :
1839 40438 : start_sequence ();
1840 :
1841 40438 : if (TARGET_64BIT)
1842 : {
1843 69 : if (ix86_cmodel == CM_LARGE_PIC)
1844 53 : ix86_init_large_pic_reg (R11_REG);
1845 : else
1846 16 : emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1847 : }
1848 : else
1849 : {
1850 : /* If there is future mcount call in the function it is more profitable
1851 : to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1852 40369 : rtx reg = crtl->profile
1853 40369 : ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1854 40369 : : pic_offset_table_rtx;
1855 40369 : rtx_insn *insn = emit_insn (gen_set_got (reg));
1856 40369 : RTX_FRAME_RELATED_P (insn) = 1;
1857 40369 : if (crtl->profile)
1858 0 : emit_move_insn (pic_offset_table_rtx, reg);
1859 40369 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1860 : }
1861 :
1862 40438 : seq = end_sequence ();
1863 :
1864 40438 : entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1865 40438 : insert_insn_on_edge (seq, entry_edge);
1866 40438 : commit_one_edge_insertion (entry_edge);
1867 : }
1868 :
1869 : /* Initialize a variable CUM of type CUMULATIVE_ARGS
1870 : for a call to a function whose data type is FNTYPE.
1871 : For a library call, FNTYPE is 0. */
1872 :
1873 : void
1874 10409303 : init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1875 : tree fntype, /* tree ptr for function decl */
1876 : rtx libname, /* SYMBOL_REF of library name or 0 */
1877 : tree fndecl,
1878 : int caller)
1879 : {
1880 10409303 : struct cgraph_node *local_info_node = NULL;
1881 10409303 : struct cgraph_node *target = NULL;
1882 :
1883 : /* Set silent_p to false to raise an error for invalid calls when
1884 : expanding function body. */
1885 10409303 : cfun->machine->silent_p = false;
1886 :
1887 10409303 : memset (cum, 0, sizeof (*cum));
1888 :
1889 10409303 : tree preserve_none_type;
1890 10409303 : if (fndecl)
1891 : {
1892 10069244 : target = cgraph_node::get (fndecl);
1893 10069244 : if (target)
1894 : {
1895 9922838 : target = target->function_symbol ();
1896 9922838 : local_info_node = cgraph_node::local_info_node (target->decl);
1897 9922838 : cum->call_abi = ix86_function_abi (target->decl);
1898 9922838 : preserve_none_type = TREE_TYPE (target->decl);
1899 : }
1900 : else
1901 : {
1902 146406 : cum->call_abi = ix86_function_abi (fndecl);
1903 146406 : preserve_none_type = TREE_TYPE (fndecl);
1904 : }
1905 : }
1906 : else
1907 : {
1908 340059 : cum->call_abi = ix86_function_type_abi (fntype);
1909 340059 : preserve_none_type = fntype;
1910 : }
1911 10409303 : cum->preserve_none_abi
1912 10409303 : = (preserve_none_type
1913 20701716 : && (lookup_attribute ("preserve_none",
1914 10292413 : TYPE_ATTRIBUTES (preserve_none_type))
1915 : != nullptr));
1916 :
1917 10409303 : cum->caller = caller;
1918 :
1919 : /* Set up the number of registers to use for passing arguments. */
1920 10409303 : cum->nregs = ix86_regparm;
1921 10409303 : if (TARGET_64BIT)
1922 : {
1923 9373237 : cum->nregs = (cum->call_abi == SYSV_ABI
1924 9373237 : ? X86_64_REGPARM_MAX
1925 : : X86_64_MS_REGPARM_MAX);
1926 : }
1927 10409303 : if (TARGET_SSE)
1928 : {
1929 10400200 : cum->sse_nregs = SSE_REGPARM_MAX;
1930 10400200 : if (TARGET_64BIT)
1931 : {
1932 9364254 : cum->sse_nregs = (cum->call_abi == SYSV_ABI
1933 9364254 : ? X86_64_SSE_REGPARM_MAX
1934 : : X86_64_MS_SSE_REGPARM_MAX);
1935 : }
1936 : }
1937 10409303 : if (TARGET_MMX)
1938 11232593 : cum->mmx_nregs = MMX_REGPARM_MAX;
1939 10409303 : cum->warn_avx512f = true;
1940 10409303 : cum->warn_avx = true;
1941 10409303 : cum->warn_sse = true;
1942 10409303 : cum->warn_mmx = true;
1943 :
1944 : /* Because type might mismatch in between caller and callee, we need to
1945 : use actual type of function for local calls.
1946 : FIXME: cgraph_analyze can be told to actually record if function uses
1947 : va_start so for local functions maybe_vaarg can be made aggressive
1948 : helping K&R code.
1949 : FIXME: once typesytem is fixed, we won't need this code anymore. */
1950 10409303 : if (local_info_node && local_info_node->local
1951 426823 : && local_info_node->can_change_signature)
1952 403552 : fntype = TREE_TYPE (target->decl);
1953 10409303 : cum->stdarg = stdarg_p (fntype);
1954 20818606 : cum->maybe_vaarg = (fntype
1955 10876044 : ? (!prototype_p (fntype) || stdarg_p (fntype))
1956 116890 : : !libname);
1957 :
1958 10409303 : cum->decl = fndecl;
1959 :
1960 10409303 : cum->warn_empty = !warn_abi || cum->stdarg;
1961 10409303 : if (!cum->warn_empty && fntype)
1962 : {
1963 2706277 : function_args_iterator iter;
1964 2706277 : tree argtype;
1965 2706277 : bool seen_empty_type = false;
1966 7487856 : FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1967 : {
1968 7487793 : if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1969 : break;
1970 4801189 : if (TYPE_EMPTY_P (argtype))
1971 : seen_empty_type = true;
1972 4729947 : else if (seen_empty_type)
1973 : {
1974 19610 : cum->warn_empty = true;
1975 19610 : break;
1976 : }
1977 : }
1978 : }
1979 :
1980 10409303 : if (!TARGET_64BIT)
1981 : {
1982 : /* If there are variable arguments, then we won't pass anything
1983 : in registers in 32-bit mode. */
1984 1036066 : if (stdarg_p (fntype))
1985 : {
1986 9055 : cum->nregs = 0;
1987 : /* Since in 32-bit, variable arguments are always passed on
1988 : stack, there is scratch register available for indirect
1989 : sibcall. */
1990 9055 : cfun->machine->arg_reg_available = true;
1991 9055 : cum->sse_nregs = 0;
1992 9055 : cum->mmx_nregs = 0;
1993 9055 : cum->warn_avx512f = false;
1994 9055 : cum->warn_avx = false;
1995 9055 : cum->warn_sse = false;
1996 9055 : cum->warn_mmx = false;
1997 9055 : return;
1998 : }
1999 :
2000 : /* Use ecx and edx registers if function has fastcall attribute,
2001 : else look for regparm information. */
2002 1027011 : if (fntype)
2003 : {
2004 1013817 : unsigned int ccvt = ix86_get_callcvt (fntype);
2005 1013817 : if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
2006 : {
2007 0 : cum->nregs = 1;
2008 0 : cum->fastcall = 1; /* Same first register as in fastcall. */
2009 : }
2010 1013817 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
2011 : {
2012 4 : cum->nregs = 2;
2013 4 : cum->fastcall = 1;
2014 : }
2015 : else
2016 1013813 : cum->nregs = ix86_function_regparm (fntype, fndecl);
2017 : }
2018 :
2019 : /* Set up the number of SSE registers used for passing SFmode
2020 : and DFmode arguments. Warn for mismatching ABI. */
2021 1027011 : cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
2022 : }
2023 :
2024 10400248 : cfun->machine->arg_reg_available = (cum->nregs > 0);
2025 : }
2026 :
2027 : /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2028 : But in the case of vector types, it is some vector mode.
2029 :
2030 : When we have only some of our vector isa extensions enabled, then there
2031 : are some modes for which vector_mode_supported_p is false. For these
2032 : modes, the generic vector support in gcc will choose some non-vector mode
2033 : in order to implement the type. By computing the natural mode, we'll
2034 : select the proper ABI location for the operand and not depend on whatever
2035 : the middle-end decides to do with these vector types.
2036 :
2037 : The midde-end can't deal with the vector types > 16 bytes. In this
2038 : case, we return the original mode and warn ABI change if CUM isn't
2039 : NULL.
2040 :
2041 : If INT_RETURN is true, warn ABI change if the vector mode isn't
2042 : available for function return value. */
2043 :
2044 : static machine_mode
2045 228854765 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
2046 : bool in_return)
2047 : {
2048 228854765 : machine_mode mode = TYPE_MODE (type);
2049 :
2050 228854765 : if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
2051 : {
2052 467262 : HOST_WIDE_INT size = int_size_in_bytes (type);
2053 467262 : if ((size == 8 || size == 16 || size == 32 || size == 64)
2054 : /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2055 467262 : && TYPE_VECTOR_SUBPARTS (type) > 1)
2056 : {
2057 430668 : machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2058 :
2059 : /* There are no XFmode vector modes ... */
2060 430668 : if (innermode == XFmode)
2061 : return mode;
2062 :
2063 : /* ... and no decimal float vector modes. */
2064 430115 : if (DECIMAL_FLOAT_MODE_P (innermode))
2065 : return mode;
2066 :
2067 429822 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2068 : mode = MIN_MODE_VECTOR_FLOAT;
2069 : else
2070 360002 : mode = MIN_MODE_VECTOR_INT;
2071 :
2072 : /* Get the mode which has this inner mode and number of units. */
2073 9086586 : FOR_EACH_MODE_FROM (mode, mode)
2074 18908131 : if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2075 10251367 : && GET_MODE_INNER (mode) == innermode)
2076 : {
2077 429822 : if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2078 : {
2079 293561 : static bool warnedavx512f;
2080 293561 : static bool warnedavx512f_ret;
2081 :
2082 293561 : if (cum && cum->warn_avx512f && !warnedavx512f)
2083 : {
2084 1361 : if (warning (OPT_Wpsabi, "AVX512F vector argument "
2085 : "without AVX512F enabled changes the ABI"))
2086 2 : warnedavx512f = true;
2087 : }
2088 292200 : else if (in_return && !warnedavx512f_ret)
2089 : {
2090 283579 : if (warning (OPT_Wpsabi, "AVX512F vector return "
2091 : "without AVX512F enabled changes the ABI"))
2092 4 : warnedavx512f_ret = true;
2093 : }
2094 :
2095 293561 : return TYPE_MODE (type);
2096 : }
2097 136261 : else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2098 : {
2099 135712 : static bool warnedavx;
2100 135712 : static bool warnedavx_ret;
2101 :
2102 135712 : if (cum && cum->warn_avx && !warnedavx)
2103 : {
2104 770 : if (warning (OPT_Wpsabi, "AVX vector argument "
2105 : "without AVX enabled changes the ABI"))
2106 5 : warnedavx = true;
2107 : }
2108 134942 : else if (in_return && !warnedavx_ret)
2109 : {
2110 120871 : if (warning (OPT_Wpsabi, "AVX vector return "
2111 : "without AVX enabled changes the ABI"))
2112 10 : warnedavx_ret = true;
2113 : }
2114 :
2115 135712 : return TYPE_MODE (type);
2116 : }
2117 549 : else if (((size == 8 && TARGET_64BIT) || size == 16)
2118 546 : && !TARGET_SSE
2119 140 : && !TARGET_IAMCU)
2120 : {
2121 140 : static bool warnedsse;
2122 140 : static bool warnedsse_ret;
2123 :
2124 140 : if (cum && cum->warn_sse && !warnedsse)
2125 : {
2126 19 : if (warning (OPT_Wpsabi, "SSE vector argument "
2127 : "without SSE enabled changes the ABI"))
2128 6 : warnedsse = true;
2129 : }
2130 121 : else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2131 : {
2132 0 : if (warning (OPT_Wpsabi, "SSE vector return "
2133 : "without SSE enabled changes the ABI"))
2134 0 : warnedsse_ret = true;
2135 : }
2136 : }
2137 409 : else if ((size == 8 && !TARGET_64BIT)
2138 0 : && (!cfun
2139 0 : || cfun->machine->func_type == TYPE_NORMAL)
2140 0 : && !TARGET_MMX
2141 0 : && !TARGET_IAMCU)
2142 : {
2143 0 : static bool warnedmmx;
2144 0 : static bool warnedmmx_ret;
2145 :
2146 0 : if (cum && cum->warn_mmx && !warnedmmx)
2147 : {
2148 0 : if (warning (OPT_Wpsabi, "MMX vector argument "
2149 : "without MMX enabled changes the ABI"))
2150 0 : warnedmmx = true;
2151 : }
2152 0 : else if (in_return && !warnedmmx_ret)
2153 : {
2154 0 : if (warning (OPT_Wpsabi, "MMX vector return "
2155 : "without MMX enabled changes the ABI"))
2156 0 : warnedmmx_ret = true;
2157 : }
2158 : }
2159 549 : return mode;
2160 : }
2161 :
2162 0 : gcc_unreachable ();
2163 : }
2164 : }
2165 :
2166 : return mode;
2167 : }
2168 :
2169 : /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2170 : this may not agree with the mode that the type system has chosen for the
2171 : register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2172 : go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2173 :
2174 : static rtx
2175 36445491 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2176 : unsigned int regno)
2177 : {
2178 36445491 : rtx tmp;
2179 :
2180 36445491 : if (orig_mode != BLKmode)
2181 36445463 : tmp = gen_rtx_REG (orig_mode, regno);
2182 : else
2183 : {
2184 28 : tmp = gen_rtx_REG (mode, regno);
2185 28 : tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2186 28 : tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2187 : }
2188 :
2189 36445491 : return tmp;
2190 : }
2191 :
2192 : /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2193 : of this code is to classify each 8bytes of incoming argument by the register
2194 : class and assign registers accordingly. */
2195 :
2196 : /* Return the union class of CLASS1 and CLASS2.
2197 : See the x86-64 PS ABI for details. */
2198 :
2199 : static enum x86_64_reg_class
2200 53221695 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2201 : {
2202 : /* Rule #1: If both classes are equal, this is the resulting class. */
2203 51981684 : if (class1 == class2)
2204 : return class1;
2205 :
2206 : /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2207 : the other class. */
2208 45830529 : if (class1 == X86_64_NO_CLASS)
2209 : return class2;
2210 46650466 : if (class2 == X86_64_NO_CLASS)
2211 : return class1;
2212 :
2213 : /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2214 1661326 : if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2215 : return X86_64_MEMORY_CLASS;
2216 :
2217 : /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2218 1515290 : if ((class1 == X86_64_INTEGERSI_CLASS
2219 190314 : && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2220 1514084 : || (class2 == X86_64_INTEGERSI_CLASS
2221 924097 : && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2222 : return X86_64_INTEGERSI_CLASS;
2223 1510268 : if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2224 381589 : || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2225 : return X86_64_INTEGER_CLASS;
2226 :
2227 : /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2228 : MEMORY is used. */
2229 90921 : if (class1 == X86_64_X87_CLASS
2230 : || class1 == X86_64_X87UP_CLASS
2231 90921 : || class1 == X86_64_COMPLEX_X87_CLASS
2232 : || class2 == X86_64_X87_CLASS
2233 90016 : || class2 == X86_64_X87UP_CLASS
2234 59748 : || class2 == X86_64_COMPLEX_X87_CLASS)
2235 31173 : return X86_64_MEMORY_CLASS;
2236 :
2237 : /* Rule #6: Otherwise class SSE is used. */
2238 : return X86_64_SSE_CLASS;
2239 : }
2240 :
2241 : /* Classify the argument of type TYPE and mode MODE.
2242 : CLASSES will be filled by the register class used to pass each word
2243 : of the operand. The number of words is returned. In case the parameter
2244 : should be passed in memory, 0 is returned. As a special case for zero
2245 : sized containers, classes[0] will be NO_CLASS and 1 is returned.
2246 :
2247 : BIT_OFFSET is used internally for handling records and specifies offset
2248 : of the offset in bits modulo 512 to avoid overflow cases.
2249 :
2250 : See the x86-64 PS ABI for details.
2251 : */
2252 :
2253 : static int
2254 389944759 : classify_argument (machine_mode mode, const_tree type,
2255 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2256 : int &zero_width_bitfields)
2257 : {
2258 389944759 : HOST_WIDE_INT bytes
2259 773771951 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2260 389944759 : int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2261 :
2262 : /* Variable sized entities are always passed/returned in memory. */
2263 389944759 : if (bytes < 0)
2264 : return 0;
2265 :
2266 389943596 : if (mode != VOIDmode)
2267 : {
2268 : /* The value of "named" doesn't matter. */
2269 388863941 : function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2270 388863941 : if (targetm.calls.must_pass_in_stack (arg))
2271 37 : return 0;
2272 : }
2273 :
2274 389943559 : if (type && (AGGREGATE_TYPE_P (type)
2275 354934079 : || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2276 : {
2277 36108717 : int i;
2278 36108717 : tree field;
2279 36108717 : enum x86_64_reg_class subclasses[MAX_CLASSES];
2280 :
2281 : /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2282 36108717 : if (bytes > 64)
2283 : return 0;
2284 :
2285 90113897 : for (i = 0; i < words; i++)
2286 54827433 : classes[i] = X86_64_NO_CLASS;
2287 :
2288 : /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2289 : signalize memory class, so handle it as special case. */
2290 35286464 : if (!words)
2291 : {
2292 83094 : classes[0] = X86_64_NO_CLASS;
2293 83094 : return 1;
2294 : }
2295 :
2296 : /* Classify each field of record and merge classes. */
2297 35203370 : switch (TREE_CODE (type))
2298 : {
2299 33178692 : case RECORD_TYPE:
2300 : /* And now merge the fields of structure. */
2301 907547220 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2302 : {
2303 874880636 : if (TREE_CODE (field) == FIELD_DECL)
2304 : {
2305 48420898 : int num;
2306 :
2307 48420898 : if (TREE_TYPE (field) == error_mark_node)
2308 4 : continue;
2309 :
2310 : /* Bitfields are always classified as integer. Handle them
2311 : early, since later code would consider them to be
2312 : misaligned integers. */
2313 48420894 : if (DECL_BIT_FIELD (field))
2314 : {
2315 1249272 : if (integer_zerop (DECL_SIZE (field)))
2316 : {
2317 12902 : if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2318 8048 : continue;
2319 4854 : if (zero_width_bitfields != 2)
2320 : {
2321 4320 : zero_width_bitfields = 1;
2322 4320 : continue;
2323 : }
2324 : }
2325 1236904 : for (i = (int_bit_position (field)
2326 1236904 : + (bit_offset % 64)) / 8 / 8;
2327 2476915 : i < ((int_bit_position (field) + (bit_offset % 64))
2328 2476915 : + tree_to_shwi (DECL_SIZE (field))
2329 2476915 : + 63) / 8 / 8; i++)
2330 1240011 : classes[i]
2331 2480022 : = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2332 : }
2333 : else
2334 : {
2335 47171622 : int pos;
2336 :
2337 47171622 : type = TREE_TYPE (field);
2338 :
2339 : /* Flexible array member is ignored. */
2340 47171622 : if (TYPE_MODE (type) == BLKmode
2341 647933 : && TREE_CODE (type) == ARRAY_TYPE
2342 170640 : && TYPE_SIZE (type) == NULL_TREE
2343 2013 : && TYPE_DOMAIN (type) != NULL_TREE
2344 47172870 : && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2345 : == NULL_TREE))
2346 : {
2347 1248 : static bool warned;
2348 :
2349 1248 : if (!warned && warn_psabi)
2350 : {
2351 3 : warned = true;
2352 3 : inform (input_location,
2353 : "the ABI of passing struct with"
2354 : " a flexible array member has"
2355 : " changed in GCC 4.4");
2356 : }
2357 1248 : continue;
2358 1248 : }
2359 47170374 : num = classify_argument (TYPE_MODE (type), type,
2360 : subclasses,
2361 47170374 : (int_bit_position (field)
2362 47170374 : + bit_offset) % 512,
2363 : zero_width_bitfields);
2364 47170374 : if (!num)
2365 : return 0;
2366 46658266 : pos = (int_bit_position (field)
2367 46658266 : + (bit_offset % 64)) / 8 / 8;
2368 96682082 : for (i = 0; i < num && (i + pos) < words; i++)
2369 50023816 : classes[i + pos]
2370 50023816 : = merge_classes (subclasses[i], classes[i + pos]);
2371 : }
2372 : }
2373 : }
2374 : break;
2375 :
2376 448631 : case ARRAY_TYPE:
2377 : /* Arrays are handled as small records. */
2378 448631 : {
2379 448631 : int num;
2380 448631 : num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2381 448631 : TREE_TYPE (type), subclasses, bit_offset,
2382 : zero_width_bitfields);
2383 448631 : if (!num)
2384 : return 0;
2385 :
2386 : /* The partial classes are now full classes. */
2387 433165 : if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2388 13863 : subclasses[0] = X86_64_SSE_CLASS;
2389 433165 : if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2390 5126 : subclasses[0] = X86_64_SSE_CLASS;
2391 433165 : if (subclasses[0] == X86_64_INTEGERSI_CLASS
2392 165804 : && !((bit_offset % 64) == 0 && bytes == 4))
2393 133652 : subclasses[0] = X86_64_INTEGER_CLASS;
2394 :
2395 1335417 : for (i = 0; i < words; i++)
2396 902252 : classes[i] = subclasses[i % num];
2397 :
2398 : break;
2399 : }
2400 274201 : case UNION_TYPE:
2401 274201 : case QUAL_UNION_TYPE:
2402 : /* Unions are similar to RECORD_TYPE but offset is always 0.
2403 : */
2404 3042251 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2405 : {
2406 2803315 : if (TREE_CODE (field) == FIELD_DECL)
2407 : {
2408 1232903 : int num;
2409 :
2410 1232903 : if (TREE_TYPE (field) == error_mark_node)
2411 10 : continue;
2412 :
2413 1232893 : num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2414 1232893 : TREE_TYPE (field), subclasses,
2415 : bit_offset, zero_width_bitfields);
2416 1232893 : if (!num)
2417 : return 0;
2418 3155496 : for (i = 0; i < num && i < words; i++)
2419 1957868 : classes[i] = merge_classes (subclasses[i], classes[i]);
2420 : }
2421 : }
2422 : break;
2423 :
2424 1301846 : case BITINT_TYPE:
2425 : /* _BitInt(N) for N > 64 is passed as structure containing
2426 : (N + 63) / 64 64-bit elements. */
2427 1301846 : if (words > 2)
2428 : return 0;
2429 74994 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2430 74994 : return 2;
2431 :
2432 0 : default:
2433 0 : gcc_unreachable ();
2434 : }
2435 :
2436 33338685 : if (words > 2)
2437 : {
2438 : /* When size > 16 bytes, if the first one isn't
2439 : X86_64_SSE_CLASS or any other ones aren't
2440 : X86_64_SSEUP_CLASS, everything should be passed in
2441 : memory. */
2442 1653388 : if (classes[0] != X86_64_SSE_CLASS)
2443 : return 0;
2444 :
2445 197324 : for (i = 1; i < words; i++)
2446 179135 : if (classes[i] != X86_64_SSEUP_CLASS)
2447 : return 0;
2448 : }
2449 :
2450 : /* Final merger cleanup. */
2451 73759682 : for (i = 0; i < words; i++)
2452 : {
2453 : /* If one class is MEMORY, everything should be passed in
2454 : memory. */
2455 42089026 : if (classes[i] == X86_64_MEMORY_CLASS)
2456 : return 0;
2457 :
2458 : /* The X86_64_SSEUP_CLASS should be always preceded by
2459 : X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2460 42058566 : if (classes[i] == X86_64_SSEUP_CLASS
2461 207097 : && classes[i - 1] != X86_64_SSE_CLASS
2462 76550 : && classes[i - 1] != X86_64_SSEUP_CLASS)
2463 : {
2464 : /* The first one should never be X86_64_SSEUP_CLASS. */
2465 1916 : gcc_assert (i != 0);
2466 1916 : classes[i] = X86_64_SSE_CLASS;
2467 : }
2468 :
2469 : /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2470 : everything should be passed in memory. */
2471 42058566 : if (classes[i] == X86_64_X87UP_CLASS
2472 180307 : && (classes[i - 1] != X86_64_X87_CLASS))
2473 : {
2474 2370 : static bool warned;
2475 :
2476 : /* The first one should never be X86_64_X87UP_CLASS. */
2477 2370 : gcc_assert (i != 0);
2478 2370 : if (!warned && warn_psabi)
2479 : {
2480 1 : warned = true;
2481 1 : inform (input_location,
2482 : "the ABI of passing union with %<long double%>"
2483 : " has changed in GCC 4.4");
2484 : }
2485 2370 : return 0;
2486 : }
2487 : }
2488 : return words;
2489 : }
2490 :
2491 : /* Compute alignment needed. We align all types to natural boundaries with
2492 : exception of XFmode that is aligned to 64bits. */
2493 353834842 : if (mode != VOIDmode && mode != BLKmode)
2494 : {
2495 352291174 : int mode_alignment = GET_MODE_BITSIZE (mode);
2496 :
2497 352291174 : if (mode == XFmode)
2498 : mode_alignment = 128;
2499 345346774 : else if (mode == XCmode)
2500 554977 : mode_alignment = 256;
2501 352291174 : if (COMPLEX_MODE_P (mode))
2502 2317754 : mode_alignment /= 2;
2503 : /* Misaligned fields are always returned in memory. */
2504 352291174 : if (bit_offset % mode_alignment)
2505 : return 0;
2506 : }
2507 :
2508 : /* for V1xx modes, just use the base mode */
2509 353827209 : if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2510 447294695 : && GET_MODE_UNIT_SIZE (mode) == bytes)
2511 6469 : mode = GET_MODE_INNER (mode);
2512 :
2513 : /* Classification of atomic types. */
2514 353827209 : switch (mode)
2515 : {
2516 208054 : case E_SDmode:
2517 208054 : case E_DDmode:
2518 208054 : classes[0] = X86_64_SSE_CLASS;
2519 208054 : return 1;
2520 99208 : case E_TDmode:
2521 99208 : classes[0] = X86_64_SSE_CLASS;
2522 99208 : classes[1] = X86_64_SSEUP_CLASS;
2523 99208 : return 2;
2524 230809608 : case E_DImode:
2525 230809608 : case E_SImode:
2526 230809608 : case E_HImode:
2527 230809608 : case E_QImode:
2528 230809608 : case E_CSImode:
2529 230809608 : case E_CHImode:
2530 230809608 : case E_CQImode:
2531 230809608 : {
2532 230809608 : int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2533 :
2534 : /* Analyze last 128 bits only. */
2535 230809608 : size = (size - 1) & 0x7f;
2536 :
2537 230809608 : if (size < 32)
2538 : {
2539 102742471 : classes[0] = X86_64_INTEGERSI_CLASS;
2540 102742471 : return 1;
2541 : }
2542 128067137 : else if (size < 64)
2543 : {
2544 118425610 : classes[0] = X86_64_INTEGER_CLASS;
2545 118425610 : return 1;
2546 : }
2547 9641527 : else if (size < 64+32)
2548 : {
2549 3860700 : classes[0] = X86_64_INTEGER_CLASS;
2550 3860700 : classes[1] = X86_64_INTEGERSI_CLASS;
2551 3860700 : return 2;
2552 : }
2553 5780827 : else if (size < 64+64)
2554 : {
2555 5780827 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2556 5780827 : return 2;
2557 : }
2558 : else
2559 : gcc_unreachable ();
2560 : }
2561 2437946 : case E_CDImode:
2562 2437946 : case E_TImode:
2563 2437946 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2564 2437946 : return 2;
2565 0 : case E_COImode:
2566 0 : case E_OImode:
2567 : /* OImode shouldn't be used directly. */
2568 0 : gcc_unreachable ();
2569 : case E_CTImode:
2570 : return 0;
2571 822787 : case E_HFmode:
2572 822787 : case E_BFmode:
2573 822787 : if (!(bit_offset % 64))
2574 820237 : classes[0] = X86_64_SSEHF_CLASS;
2575 : else
2576 2550 : classes[0] = X86_64_SSE_CLASS;
2577 : return 1;
2578 9758741 : case E_SFmode:
2579 9758741 : if (!(bit_offset % 64))
2580 9705281 : classes[0] = X86_64_SSESF_CLASS;
2581 : else
2582 53460 : classes[0] = X86_64_SSE_CLASS;
2583 : return 1;
2584 4329993 : case E_DFmode:
2585 4329993 : classes[0] = X86_64_SSEDF_CLASS;
2586 4329993 : return 1;
2587 6943684 : case E_XFmode:
2588 6943684 : classes[0] = X86_64_X87_CLASS;
2589 6943684 : classes[1] = X86_64_X87UP_CLASS;
2590 6943684 : return 2;
2591 1282644 : case E_TFmode:
2592 1282644 : classes[0] = X86_64_SSE_CLASS;
2593 1282644 : classes[1] = X86_64_SSEUP_CLASS;
2594 1282644 : return 2;
2595 75166 : case E_HCmode:
2596 75166 : case E_BCmode:
2597 75166 : classes[0] = X86_64_SSE_CLASS;
2598 75166 : if (!(bit_offset % 64))
2599 : return 1;
2600 : else
2601 : {
2602 98 : classes[1] = X86_64_SSEHF_CLASS;
2603 98 : return 2;
2604 : }
2605 692191 : case E_SCmode:
2606 692191 : classes[0] = X86_64_SSE_CLASS;
2607 692191 : if (!(bit_offset % 64))
2608 : return 1;
2609 : else
2610 : {
2611 1119 : static bool warned;
2612 :
2613 1119 : if (!warned && warn_psabi)
2614 : {
2615 2 : warned = true;
2616 2 : inform (input_location,
2617 : "the ABI of passing structure with %<complex float%>"
2618 : " member has changed in GCC 4.4");
2619 : }
2620 1119 : classes[1] = X86_64_SSESF_CLASS;
2621 1119 : return 2;
2622 : }
2623 701894 : case E_DCmode:
2624 701894 : classes[0] = X86_64_SSEDF_CLASS;
2625 701894 : classes[1] = X86_64_SSEDF_CLASS;
2626 701894 : return 2;
2627 554977 : case E_XCmode:
2628 554977 : classes[0] = X86_64_COMPLEX_X87_CLASS;
2629 554977 : return 1;
2630 : case E_TCmode:
2631 : /* This modes is larger than 16 bytes. */
2632 : return 0;
2633 25341644 : case E_V8SFmode:
2634 25341644 : case E_V8SImode:
2635 25341644 : case E_V32QImode:
2636 25341644 : case E_V16HFmode:
2637 25341644 : case E_V16BFmode:
2638 25341644 : case E_V16HImode:
2639 25341644 : case E_V4DFmode:
2640 25341644 : case E_V4DImode:
2641 25341644 : classes[0] = X86_64_SSE_CLASS;
2642 25341644 : classes[1] = X86_64_SSEUP_CLASS;
2643 25341644 : classes[2] = X86_64_SSEUP_CLASS;
2644 25341644 : classes[3] = X86_64_SSEUP_CLASS;
2645 25341644 : return 4;
2646 27476715 : case E_V8DFmode:
2647 27476715 : case E_V16SFmode:
2648 27476715 : case E_V32HFmode:
2649 27476715 : case E_V32BFmode:
2650 27476715 : case E_V8DImode:
2651 27476715 : case E_V16SImode:
2652 27476715 : case E_V32HImode:
2653 27476715 : case E_V64QImode:
2654 27476715 : classes[0] = X86_64_SSE_CLASS;
2655 27476715 : classes[1] = X86_64_SSEUP_CLASS;
2656 27476715 : classes[2] = X86_64_SSEUP_CLASS;
2657 27476715 : classes[3] = X86_64_SSEUP_CLASS;
2658 27476715 : classes[4] = X86_64_SSEUP_CLASS;
2659 27476715 : classes[5] = X86_64_SSEUP_CLASS;
2660 27476715 : classes[6] = X86_64_SSEUP_CLASS;
2661 27476715 : classes[7] = X86_64_SSEUP_CLASS;
2662 27476715 : return 8;
2663 37352466 : case E_V4SFmode:
2664 37352466 : case E_V4SImode:
2665 37352466 : case E_V16QImode:
2666 37352466 : case E_V8HImode:
2667 37352466 : case E_V8HFmode:
2668 37352466 : case E_V8BFmode:
2669 37352466 : case E_V2DFmode:
2670 37352466 : case E_V2DImode:
2671 37352466 : classes[0] = X86_64_SSE_CLASS;
2672 37352466 : classes[1] = X86_64_SSEUP_CLASS;
2673 37352466 : return 2;
2674 3265076 : case E_V1TImode:
2675 3265076 : case E_V1DImode:
2676 3265076 : case E_V2SFmode:
2677 3265076 : case E_V2SImode:
2678 3265076 : case E_V4HImode:
2679 3265076 : case E_V4HFmode:
2680 3265076 : case E_V4BFmode:
2681 3265076 : case E_V2HFmode:
2682 3265076 : case E_V2BFmode:
2683 3265076 : case E_V8QImode:
2684 3265076 : classes[0] = X86_64_SSE_CLASS;
2685 3265076 : return 1;
2686 : case E_BLKmode:
2687 : case E_VOIDmode:
2688 : return 0;
2689 44993 : default:
2690 44993 : gcc_assert (VECTOR_MODE_P (mode));
2691 :
2692 44993 : if (bytes > 16)
2693 : return 0;
2694 :
2695 60258 : gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2696 :
2697 60258 : if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2698 29689 : classes[0] = X86_64_INTEGERSI_CLASS;
2699 : else
2700 440 : classes[0] = X86_64_INTEGER_CLASS;
2701 30129 : classes[1] = X86_64_INTEGER_CLASS;
2702 30129 : return 1 + (bytes > 8);
2703 : }
2704 : }
2705 :
2706 : /* Wrapper around classify_argument with the extra zero_width_bitfields
2707 : argument, to diagnose GCC 12.1 ABI differences for C. */
2708 :
2709 : static int
2710 341092327 : classify_argument (machine_mode mode, const_tree type,
2711 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2712 : {
2713 341092327 : int zero_width_bitfields = 0;
2714 341092327 : static bool warned = false;
2715 341092327 : int n = classify_argument (mode, type, classes, bit_offset,
2716 : zero_width_bitfields);
2717 341092327 : if (!zero_width_bitfields || warned || !warn_psabi)
2718 : return n;
2719 534 : enum x86_64_reg_class alt_classes[MAX_CLASSES];
2720 534 : zero_width_bitfields = 2;
2721 534 : if (classify_argument (mode, type, alt_classes, bit_offset,
2722 : zero_width_bitfields) != n)
2723 0 : zero_width_bitfields = 3;
2724 : else
2725 1286 : for (int i = 0; i < n; i++)
2726 760 : if (classes[i] != alt_classes[i])
2727 : {
2728 8 : zero_width_bitfields = 3;
2729 8 : break;
2730 : }
2731 534 : if (zero_width_bitfields == 3)
2732 : {
2733 8 : warned = true;
2734 8 : const char *url
2735 : = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2736 :
2737 8 : inform (input_location,
2738 : "the ABI of passing C structures with zero-width bit-fields"
2739 : " has changed in GCC %{12.1%}", url);
2740 : }
2741 : return n;
2742 : }
2743 :
2744 : /* Examine the argument and return set number of register required in each
2745 : class. Return true iff parameter should be passed in memory. */
2746 :
2747 : static bool
2748 229531410 : examine_argument (machine_mode mode, const_tree type, int in_return,
2749 : int *int_nregs, int *sse_nregs)
2750 : {
2751 229531410 : enum x86_64_reg_class regclass[MAX_CLASSES];
2752 229531410 : int n = classify_argument (mode, type, regclass, 0);
2753 :
2754 229531410 : *int_nregs = 0;
2755 229531410 : *sse_nregs = 0;
2756 :
2757 229531410 : if (!n)
2758 : return true;
2759 665215238 : for (n--; n >= 0; n--)
2760 440691553 : switch (regclass[n])
2761 : {
2762 152282519 : case X86_64_INTEGER_CLASS:
2763 152282519 : case X86_64_INTEGERSI_CLASS:
2764 152282519 : (*int_nregs)++;
2765 152282519 : break;
2766 74397332 : case X86_64_SSE_CLASS:
2767 74397332 : case X86_64_SSEHF_CLASS:
2768 74397332 : case X86_64_SSESF_CLASS:
2769 74397332 : case X86_64_SSEDF_CLASS:
2770 74397332 : (*sse_nregs)++;
2771 74397332 : break;
2772 : case X86_64_NO_CLASS:
2773 : case X86_64_SSEUP_CLASS:
2774 : break;
2775 9421846 : case X86_64_X87_CLASS:
2776 9421846 : case X86_64_X87UP_CLASS:
2777 9421846 : case X86_64_COMPLEX_X87_CLASS:
2778 9421846 : if (!in_return)
2779 : return true;
2780 : break;
2781 0 : case X86_64_MEMORY_CLASS:
2782 0 : gcc_unreachable ();
2783 : }
2784 :
2785 : return false;
2786 : }
2787 :
2788 : /* Construct container for the argument used by GCC interface. See
2789 : FUNCTION_ARG for the detailed description. */
2790 :
2791 : static rtx
2792 111560917 : construct_container (machine_mode mode, machine_mode orig_mode,
2793 : const_tree type, int in_return, int nintregs, int nsseregs,
2794 : const int *intreg, int sse_regno)
2795 : {
2796 : /* The following variables hold the static issued_error state. */
2797 111560917 : static bool issued_sse_arg_error;
2798 111560917 : static bool issued_sse_ret_error;
2799 111560917 : static bool issued_x87_ret_error;
2800 :
2801 111560917 : machine_mode tmpmode;
2802 111560917 : int bytes
2803 222445743 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2804 111560917 : enum x86_64_reg_class regclass[MAX_CLASSES];
2805 111560917 : int n;
2806 111560917 : int i;
2807 111560917 : int nexps = 0;
2808 111560917 : int needed_sseregs, needed_intregs;
2809 111560917 : rtx exp[MAX_CLASSES];
2810 111560917 : rtx ret;
2811 :
2812 111560917 : n = classify_argument (mode, type, regclass, 0);
2813 111560917 : if (!n)
2814 : return NULL;
2815 111097907 : if (examine_argument (mode, type, in_return, &needed_intregs,
2816 : &needed_sseregs))
2817 : return NULL;
2818 111048576 : if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2819 : return NULL;
2820 :
2821 : /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2822 : some less clueful developer tries to use floating-point anyway. */
2823 109947135 : if (needed_sseregs
2824 36764482 : && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2825 : {
2826 : /* Return early if we shouldn't raise an error for invalid
2827 : calls. */
2828 71 : if (cfun != NULL && cfun->machine->silent_p)
2829 : return NULL;
2830 39 : if (in_return)
2831 : {
2832 34 : if (!issued_sse_ret_error)
2833 : {
2834 16 : if (VALID_SSE2_TYPE_MODE (mode))
2835 5 : error ("SSE register return with SSE2 disabled");
2836 : else
2837 11 : error ("SSE register return with SSE disabled");
2838 16 : issued_sse_ret_error = true;
2839 : }
2840 : }
2841 5 : else if (!issued_sse_arg_error)
2842 : {
2843 5 : if (VALID_SSE2_TYPE_MODE (mode))
2844 0 : error ("SSE register argument with SSE2 disabled");
2845 : else
2846 5 : error ("SSE register argument with SSE disabled");
2847 5 : issued_sse_arg_error = true;
2848 : }
2849 39 : return NULL;
2850 : }
2851 :
2852 : /* Likewise, error if the ABI requires us to return values in the
2853 : x87 registers and the user specified -mno-80387. */
2854 109947064 : if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2855 1424678 : for (i = 0; i < n; i++)
2856 751950 : if (regclass[i] == X86_64_X87_CLASS
2857 : || regclass[i] == X86_64_X87UP_CLASS
2858 751950 : || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2859 : {
2860 : /* Return early if we shouldn't raise an error for invalid
2861 : calls. */
2862 16 : if (cfun != NULL && cfun->machine->silent_p)
2863 : return NULL;
2864 13 : if (!issued_x87_ret_error)
2865 : {
2866 8 : error ("x87 register return with x87 disabled");
2867 8 : issued_x87_ret_error = true;
2868 : }
2869 13 : return NULL;
2870 : }
2871 :
2872 : /* First construct simple cases. Avoid SCmode, since we want to use
2873 : single register to pass this type. */
2874 109947048 : if (n == 1 && mode != SCmode && mode != HCmode)
2875 73438560 : switch (regclass[0])
2876 : {
2877 67400950 : case X86_64_INTEGER_CLASS:
2878 67400950 : case X86_64_INTEGERSI_CLASS:
2879 67400950 : return gen_rtx_REG (mode, intreg[0]);
2880 5837598 : case X86_64_SSE_CLASS:
2881 5837598 : case X86_64_SSEHF_CLASS:
2882 5837598 : case X86_64_SSESF_CLASS:
2883 5837598 : case X86_64_SSEDF_CLASS:
2884 5837598 : if (mode != BLKmode)
2885 11674388 : return gen_reg_or_parallel (mode, orig_mode,
2886 11674388 : GET_SSE_REGNO (sse_regno));
2887 : break;
2888 171693 : case X86_64_X87_CLASS:
2889 171693 : case X86_64_COMPLEX_X87_CLASS:
2890 171693 : return gen_rtx_REG (mode, FIRST_STACK_REG);
2891 : case X86_64_NO_CLASS:
2892 : /* Zero sized array, struct or class. */
2893 : return NULL;
2894 0 : default:
2895 0 : gcc_unreachable ();
2896 : }
2897 36508892 : if (n == 2
2898 18712123 : && regclass[0] == X86_64_SSE_CLASS
2899 12881799 : && regclass[1] == X86_64_SSEUP_CLASS
2900 12876691 : && mode != BLKmode)
2901 25753382 : return gen_reg_or_parallel (mode, orig_mode,
2902 25753382 : GET_SSE_REGNO (sse_regno));
2903 23632201 : if (n == 4
2904 8430742 : && regclass[0] == X86_64_SSE_CLASS
2905 8430742 : && regclass[1] == X86_64_SSEUP_CLASS
2906 8430742 : && regclass[2] == X86_64_SSEUP_CLASS
2907 8430742 : && regclass[3] == X86_64_SSEUP_CLASS
2908 8430742 : && mode != BLKmode)
2909 16858106 : return gen_reg_or_parallel (mode, orig_mode,
2910 16858106 : GET_SSE_REGNO (sse_regno));
2911 15203148 : if (n == 8
2912 9128370 : && regclass[0] == X86_64_SSE_CLASS
2913 9128370 : && regclass[1] == X86_64_SSEUP_CLASS
2914 9128370 : && regclass[2] == X86_64_SSEUP_CLASS
2915 9128370 : && regclass[3] == X86_64_SSEUP_CLASS
2916 9128370 : && regclass[4] == X86_64_SSEUP_CLASS
2917 9128370 : && regclass[5] == X86_64_SSEUP_CLASS
2918 9128370 : && regclass[6] == X86_64_SSEUP_CLASS
2919 9128370 : && regclass[7] == X86_64_SSEUP_CLASS
2920 9128370 : && mode != BLKmode)
2921 18252468 : return gen_reg_or_parallel (mode, orig_mode,
2922 18252468 : GET_SSE_REGNO (sse_regno));
2923 6076914 : if (n == 2
2924 5835432 : && regclass[0] == X86_64_X87_CLASS
2925 2250395 : && regclass[1] == X86_64_X87UP_CLASS)
2926 2250395 : return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2927 :
2928 3826519 : if (n == 2
2929 3585037 : && regclass[0] == X86_64_INTEGER_CLASS
2930 3189148 : && regclass[1] == X86_64_INTEGER_CLASS
2931 3180862 : && (mode == CDImode || mode == TImode || mode == BLKmode)
2932 3180862 : && intreg[0] + 1 == intreg[1])
2933 : {
2934 2864410 : if (mode == BLKmode)
2935 : {
2936 : /* Use TImode for BLKmode values in 2 integer registers. */
2937 504542 : exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2938 252271 : gen_rtx_REG (TImode, intreg[0]),
2939 : GEN_INT (0));
2940 252271 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2941 252271 : XVECEXP (ret, 0, 0) = exp[0];
2942 252271 : return ret;
2943 : }
2944 : else
2945 2612139 : return gen_rtx_REG (mode, intreg[0]);
2946 : }
2947 :
2948 : /* Otherwise figure out the entries of the PARALLEL. */
2949 2644845 : for (i = 0; i < n; i++)
2950 : {
2951 1682736 : int pos;
2952 :
2953 1682736 : switch (regclass[i])
2954 : {
2955 : case X86_64_NO_CLASS:
2956 : break;
2957 937351 : case X86_64_INTEGER_CLASS:
2958 937351 : case X86_64_INTEGERSI_CLASS:
2959 : /* Merge TImodes on aligned occasions here too. */
2960 937351 : if (i * 8 + 8 > bytes)
2961 : {
2962 3226 : unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2963 3226 : if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2964 : /* We've requested 24 bytes we
2965 : don't have mode for. Use DImode. */
2966 357 : tmpmode = DImode;
2967 : }
2968 934125 : else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2969 : tmpmode = SImode;
2970 : else
2971 787365 : tmpmode = DImode;
2972 1874702 : exp [nexps++]
2973 937351 : = gen_rtx_EXPR_LIST (VOIDmode,
2974 937351 : gen_rtx_REG (tmpmode, *intreg),
2975 937351 : GEN_INT (i*8));
2976 937351 : intreg++;
2977 937351 : break;
2978 592 : case X86_64_SSEHF_CLASS:
2979 592 : tmpmode = (mode == BFmode ? BFmode : HFmode);
2980 1184 : exp [nexps++]
2981 1184 : = gen_rtx_EXPR_LIST (VOIDmode,
2982 : gen_rtx_REG (tmpmode,
2983 592 : GET_SSE_REGNO (sse_regno)),
2984 592 : GEN_INT (i*8));
2985 592 : sse_regno++;
2986 592 : break;
2987 2969 : case X86_64_SSESF_CLASS:
2988 5938 : exp [nexps++]
2989 5938 : = gen_rtx_EXPR_LIST (VOIDmode,
2990 : gen_rtx_REG (SFmode,
2991 2969 : GET_SSE_REGNO (sse_regno)),
2992 2969 : GEN_INT (i*8));
2993 2969 : sse_regno++;
2994 2969 : break;
2995 482534 : case X86_64_SSEDF_CLASS:
2996 965068 : exp [nexps++]
2997 965068 : = gen_rtx_EXPR_LIST (VOIDmode,
2998 : gen_rtx_REG (DFmode,
2999 482534 : GET_SSE_REGNO (sse_regno)),
3000 482534 : GEN_INT (i*8));
3001 482534 : sse_regno++;
3002 482534 : break;
3003 251328 : case X86_64_SSE_CLASS:
3004 251328 : pos = i;
3005 251328 : switch (n)
3006 : {
3007 : case 1:
3008 : tmpmode = DImode;
3009 : break;
3010 10060 : case 2:
3011 10060 : if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
3012 : {
3013 0 : tmpmode = TImode;
3014 0 : i++;
3015 : }
3016 : else
3017 : tmpmode = DImode;
3018 : break;
3019 1689 : case 4:
3020 1689 : gcc_assert (i == 0
3021 : && regclass[1] == X86_64_SSEUP_CLASS
3022 : && regclass[2] == X86_64_SSEUP_CLASS
3023 : && regclass[3] == X86_64_SSEUP_CLASS);
3024 : tmpmode = OImode;
3025 : i += 3;
3026 : break;
3027 2136 : case 8:
3028 2136 : gcc_assert (i == 0
3029 : && regclass[1] == X86_64_SSEUP_CLASS
3030 : && regclass[2] == X86_64_SSEUP_CLASS
3031 : && regclass[3] == X86_64_SSEUP_CLASS
3032 : && regclass[4] == X86_64_SSEUP_CLASS
3033 : && regclass[5] == X86_64_SSEUP_CLASS
3034 : && regclass[6] == X86_64_SSEUP_CLASS
3035 : && regclass[7] == X86_64_SSEUP_CLASS);
3036 : tmpmode = XImode;
3037 : i += 7;
3038 : break;
3039 0 : default:
3040 0 : gcc_unreachable ();
3041 : }
3042 502656 : exp [nexps++]
3043 502656 : = gen_rtx_EXPR_LIST (VOIDmode,
3044 : gen_rtx_REG (tmpmode,
3045 251328 : GET_SSE_REGNO (sse_regno)),
3046 251328 : GEN_INT (pos*8));
3047 251328 : sse_regno++;
3048 251328 : break;
3049 0 : default:
3050 0 : gcc_unreachable ();
3051 : }
3052 : }
3053 :
3054 : /* Empty aligned struct, union or class. */
3055 962109 : if (nexps == 0)
3056 : return NULL;
3057 :
3058 961854 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3059 2636628 : for (i = 0; i < nexps; i++)
3060 1674774 : XVECEXP (ret, 0, i) = exp [i];
3061 : return ret;
3062 : }
3063 :
3064 : /* Update the data in CUM to advance over an argument of mode MODE
3065 : and data type TYPE. (TYPE is null for libcalls where that information
3066 : may not be available.)
3067 :
3068 : Return a number of integer regsiters advanced over. */
3069 :
3070 : static int
3071 2129187 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3072 : const_tree type, HOST_WIDE_INT bytes,
3073 : HOST_WIDE_INT words)
3074 : {
3075 2129187 : int res = 0;
3076 2129187 : bool error_p = false;
3077 :
3078 2129187 : if (TARGET_IAMCU)
3079 : {
3080 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3081 : bytes in registers. */
3082 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3083 0 : goto pass_in_reg;
3084 : return res;
3085 : }
3086 :
3087 2129187 : switch (mode)
3088 : {
3089 : default:
3090 : break;
3091 :
3092 93891 : case E_BLKmode:
3093 93891 : if (bytes < 0)
3094 : break;
3095 : /* FALLTHRU */
3096 :
3097 2092387 : case E_DImode:
3098 2092387 : case E_SImode:
3099 2092387 : case E_HImode:
3100 2092387 : case E_QImode:
3101 93891 : pass_in_reg:
3102 2092387 : cum->words += words;
3103 2092387 : cum->nregs -= words;
3104 2092387 : cum->regno += words;
3105 2092387 : if (cum->nregs >= 0)
3106 47118 : res = words;
3107 2092387 : if (cum->nregs <= 0)
3108 : {
3109 2058459 : cum->nregs = 0;
3110 2058459 : cfun->machine->arg_reg_available = false;
3111 2058459 : cum->regno = 0;
3112 : }
3113 : break;
3114 :
3115 0 : case E_OImode:
3116 : /* OImode shouldn't be used directly. */
3117 0 : gcc_unreachable ();
3118 :
3119 4736 : case E_DFmode:
3120 4736 : if (cum->float_in_sse == -1)
3121 0 : error_p = true;
3122 4736 : if (cum->float_in_sse < 2)
3123 : break;
3124 : /* FALLTHRU */
3125 1352 : case E_SFmode:
3126 1352 : if (cum->float_in_sse == -1)
3127 0 : error_p = true;
3128 1352 : if (cum->float_in_sse < 1)
3129 : break;
3130 : /* FALLTHRU */
3131 :
3132 52 : case E_V16HFmode:
3133 52 : case E_V16BFmode:
3134 52 : case E_V8SFmode:
3135 52 : case E_V8SImode:
3136 52 : case E_V64QImode:
3137 52 : case E_V32HImode:
3138 52 : case E_V16SImode:
3139 52 : case E_V8DImode:
3140 52 : case E_V32HFmode:
3141 52 : case E_V32BFmode:
3142 52 : case E_V16SFmode:
3143 52 : case E_V8DFmode:
3144 52 : case E_V32QImode:
3145 52 : case E_V16HImode:
3146 52 : case E_V4DFmode:
3147 52 : case E_V4DImode:
3148 52 : case E_TImode:
3149 52 : case E_V16QImode:
3150 52 : case E_V8HImode:
3151 52 : case E_V4SImode:
3152 52 : case E_V2DImode:
3153 52 : case E_V8HFmode:
3154 52 : case E_V8BFmode:
3155 52 : case E_V4SFmode:
3156 52 : case E_V2DFmode:
3157 52 : if (!type || !AGGREGATE_TYPE_P (type))
3158 : {
3159 52 : cum->sse_words += words;
3160 52 : cum->sse_nregs -= 1;
3161 52 : cum->sse_regno += 1;
3162 52 : if (cum->sse_nregs <= 0)
3163 : {
3164 4 : cum->sse_nregs = 0;
3165 4 : cum->sse_regno = 0;
3166 : }
3167 : }
3168 : break;
3169 :
3170 16 : case E_V8QImode:
3171 16 : case E_V4HImode:
3172 16 : case E_V4HFmode:
3173 16 : case E_V4BFmode:
3174 16 : case E_V2SImode:
3175 16 : case E_V2SFmode:
3176 16 : case E_V1TImode:
3177 16 : case E_V1DImode:
3178 16 : if (!type || !AGGREGATE_TYPE_P (type))
3179 : {
3180 16 : cum->mmx_words += words;
3181 16 : cum->mmx_nregs -= 1;
3182 16 : cum->mmx_regno += 1;
3183 16 : if (cum->mmx_nregs <= 0)
3184 : {
3185 0 : cum->mmx_nregs = 0;
3186 0 : cum->mmx_regno = 0;
3187 : }
3188 : }
3189 : break;
3190 : }
3191 2064599 : if (error_p)
3192 : {
3193 0 : cum->float_in_sse = 0;
3194 0 : error ("calling %qD with SSE calling convention without "
3195 : "SSE/SSE2 enabled", cum->decl);
3196 0 : sorry ("this is a GCC bug that can be worked around by adding "
3197 : "attribute used to function called");
3198 : }
3199 :
3200 : return res;
3201 : }
3202 :
3203 : static int
3204 18989904 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3205 : const_tree type, HOST_WIDE_INT words, bool named)
3206 : {
3207 18989904 : int int_nregs, sse_nregs;
3208 :
3209 : /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3210 18989904 : if (!named && (VALID_AVX512F_REG_MODE (mode)
3211 : || VALID_AVX256_REG_MODE (mode)))
3212 : return 0;
3213 :
3214 18989540 : if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3215 18989540 : && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3216 : {
3217 16716605 : cum->nregs -= int_nregs;
3218 16716605 : cum->sse_nregs -= sse_nregs;
3219 16716605 : cum->regno += int_nregs;
3220 16716605 : cum->sse_regno += sse_nregs;
3221 16716605 : return int_nregs;
3222 : }
3223 : else
3224 : {
3225 2272935 : int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3226 2272935 : cum->words = ROUND_UP (cum->words, align);
3227 2272935 : cum->words += words;
3228 2272935 : return 0;
3229 : }
3230 : }
3231 :
3232 : static int
3233 446989 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3234 : HOST_WIDE_INT words)
3235 : {
3236 : /* Otherwise, this should be passed indirect. */
3237 446989 : gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3238 :
3239 446989 : cum->words += words;
3240 446989 : if (cum->nregs > 0)
3241 : {
3242 289355 : cum->nregs -= 1;
3243 289355 : cum->regno += 1;
3244 289355 : return 1;
3245 : }
3246 : return 0;
3247 : }
3248 :
3249 : /* Update the data in CUM to advance over argument ARG. */
3250 :
3251 : static void
3252 21566447 : ix86_function_arg_advance (cumulative_args_t cum_v,
3253 : const function_arg_info &arg)
3254 : {
3255 21566447 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3256 21566447 : machine_mode mode = arg.mode;
3257 21566447 : HOST_WIDE_INT bytes, words;
3258 21566447 : int nregs;
3259 :
3260 : /* The argument of interrupt handler is a special case and is
3261 : handled in ix86_function_arg. */
3262 21566447 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3263 : return;
3264 :
3265 21566080 : bytes = arg.promoted_size_in_bytes ();
3266 21566080 : words = CEIL (bytes, UNITS_PER_WORD);
3267 :
3268 21566080 : if (arg.type)
3269 21253939 : mode = type_natural_mode (arg.type, NULL, false);
3270 :
3271 21566080 : if (TARGET_64BIT)
3272 : {
3273 19436893 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3274 :
3275 19436893 : if (call_abi == MS_ABI)
3276 446989 : nregs = function_arg_advance_ms_64 (cum, bytes, words);
3277 : else
3278 18989904 : nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3279 18989904 : arg.named);
3280 : }
3281 : else
3282 2129187 : nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3283 :
3284 21566080 : if (!nregs)
3285 : {
3286 : /* Track if there are outgoing arguments on stack. */
3287 5701794 : if (cum->caller)
3288 2714476 : cfun->machine->outgoing_args_on_stack = true;
3289 : }
3290 : }
3291 :
3292 : /* Define where to put the arguments to a function.
3293 : Value is zero to push the argument on the stack,
3294 : or a hard register in which to store the argument.
3295 :
3296 : MODE is the argument's machine mode.
3297 : TYPE is the data type of the argument (as a tree).
3298 : This is null for libcalls where that information may
3299 : not be available.
3300 : CUM is a variable of type CUMULATIVE_ARGS which gives info about
3301 : the preceding args and about the function being called.
3302 : NAMED is nonzero if this argument is a named parameter
3303 : (otherwise it is an extra parameter matching an ellipsis). */
3304 :
3305 : static rtx
3306 2556093 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3307 : machine_mode orig_mode, const_tree type,
3308 : HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3309 : {
3310 2556093 : bool error_p = false;
3311 :
3312 : /* Avoid the AL settings for the Unix64 ABI. */
3313 2556093 : if (mode == VOIDmode)
3314 741825 : return constm1_rtx;
3315 :
3316 1814268 : if (TARGET_IAMCU)
3317 : {
3318 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3319 : bytes in registers. */
3320 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3321 0 : goto pass_in_reg;
3322 : return NULL_RTX;
3323 : }
3324 :
3325 1814268 : switch (mode)
3326 : {
3327 : default:
3328 : break;
3329 :
3330 77859 : case E_BLKmode:
3331 77859 : if (bytes < 0)
3332 : break;
3333 : /* FALLTHRU */
3334 1780753 : case E_DImode:
3335 1780753 : case E_SImode:
3336 1780753 : case E_HImode:
3337 1780753 : case E_QImode:
3338 77859 : pass_in_reg:
3339 1780753 : if (words <= cum->nregs)
3340 : {
3341 45286 : int regno = cum->regno;
3342 :
3343 : /* Fastcall allocates the first two DWORD (SImode) or
3344 : smaller arguments to ECX and EDX if it isn't an
3345 : aggregate type . */
3346 45286 : if (cum->fastcall)
3347 : {
3348 6 : if (mode == BLKmode
3349 6 : || mode == DImode
3350 6 : || (type && AGGREGATE_TYPE_P (type)))
3351 : break;
3352 :
3353 : /* ECX not EAX is the first allocated register. */
3354 6 : if (regno == AX_REG)
3355 45286 : regno = CX_REG;
3356 : }
3357 45286 : return gen_rtx_REG (mode, regno);
3358 : }
3359 : break;
3360 :
3361 3346 : case E_DFmode:
3362 3346 : if (cum->float_in_sse == -1)
3363 0 : error_p = true;
3364 3346 : if (cum->float_in_sse < 2)
3365 : break;
3366 : /* FALLTHRU */
3367 952 : case E_SFmode:
3368 952 : if (cum->float_in_sse == -1)
3369 0 : error_p = true;
3370 952 : if (cum->float_in_sse < 1)
3371 : break;
3372 : /* FALLTHRU */
3373 12 : case E_TImode:
3374 : /* In 32bit, we pass TImode in xmm registers. */
3375 12 : case E_V16QImode:
3376 12 : case E_V8HImode:
3377 12 : case E_V4SImode:
3378 12 : case E_V2DImode:
3379 12 : case E_V8HFmode:
3380 12 : case E_V8BFmode:
3381 12 : case E_V4SFmode:
3382 12 : case E_V2DFmode:
3383 12 : if (!type || !AGGREGATE_TYPE_P (type))
3384 : {
3385 12 : if (cum->sse_nregs)
3386 12 : return gen_reg_or_parallel (mode, orig_mode,
3387 12 : cum->sse_regno + FIRST_SSE_REG);
3388 : }
3389 : break;
3390 :
3391 0 : case E_OImode:
3392 0 : case E_XImode:
3393 : /* OImode and XImode shouldn't be used directly. */
3394 0 : gcc_unreachable ();
3395 :
3396 9 : case E_V64QImode:
3397 9 : case E_V32HImode:
3398 9 : case E_V16SImode:
3399 9 : case E_V8DImode:
3400 9 : case E_V32HFmode:
3401 9 : case E_V32BFmode:
3402 9 : case E_V16SFmode:
3403 9 : case E_V8DFmode:
3404 9 : case E_V16HFmode:
3405 9 : case E_V16BFmode:
3406 9 : case E_V8SFmode:
3407 9 : case E_V8SImode:
3408 9 : case E_V32QImode:
3409 9 : case E_V16HImode:
3410 9 : case E_V4DFmode:
3411 9 : case E_V4DImode:
3412 9 : if (!type || !AGGREGATE_TYPE_P (type))
3413 : {
3414 9 : if (cum->sse_nregs)
3415 9 : return gen_reg_or_parallel (mode, orig_mode,
3416 9 : cum->sse_regno + FIRST_SSE_REG);
3417 : }
3418 : break;
3419 :
3420 8 : case E_V8QImode:
3421 8 : case E_V4HImode:
3422 8 : case E_V4HFmode:
3423 8 : case E_V4BFmode:
3424 8 : case E_V2SImode:
3425 8 : case E_V2SFmode:
3426 8 : case E_V1TImode:
3427 8 : case E_V1DImode:
3428 8 : if (!type || !AGGREGATE_TYPE_P (type))
3429 : {
3430 8 : if (cum->mmx_nregs)
3431 8 : return gen_reg_or_parallel (mode, orig_mode,
3432 8 : cum->mmx_regno + FIRST_MMX_REG);
3433 : }
3434 : break;
3435 : }
3436 4298 : if (error_p)
3437 : {
3438 0 : cum->float_in_sse = 0;
3439 0 : error ("calling %qD with SSE calling convention without "
3440 : "SSE/SSE2 enabled", cum->decl);
3441 0 : sorry ("this is a GCC bug that can be worked around by adding "
3442 : "attribute used to function called");
3443 : }
3444 :
3445 : return NULL_RTX;
3446 : }
3447 :
3448 : static rtx
3449 18683482 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3450 : machine_mode orig_mode, const_tree type, bool named)
3451 : {
3452 : /* Handle a hidden AL argument containing number of registers
3453 : for varargs x86-64 functions. */
3454 18683482 : if (mode == VOIDmode)
3455 5194820 : return GEN_INT (cum->maybe_vaarg
3456 : ? (cum->sse_nregs < 0
3457 : ? X86_64_SSE_REGPARM_MAX
3458 : : cum->sse_regno)
3459 : : -1);
3460 :
3461 13488662 : switch (mode)
3462 : {
3463 : default:
3464 : break;
3465 :
3466 90203 : case E_V16HFmode:
3467 90203 : case E_V16BFmode:
3468 90203 : case E_V8SFmode:
3469 90203 : case E_V8SImode:
3470 90203 : case E_V32QImode:
3471 90203 : case E_V16HImode:
3472 90203 : case E_V4DFmode:
3473 90203 : case E_V4DImode:
3474 90203 : case E_V32HFmode:
3475 90203 : case E_V32BFmode:
3476 90203 : case E_V16SFmode:
3477 90203 : case E_V16SImode:
3478 90203 : case E_V64QImode:
3479 90203 : case E_V32HImode:
3480 90203 : case E_V8DFmode:
3481 90203 : case E_V8DImode:
3482 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3483 90203 : if (!named)
3484 : return NULL;
3485 : break;
3486 : }
3487 :
3488 13488298 : const int *parm_regs;
3489 13488298 : if (cum->preserve_none_abi)
3490 : parm_regs = x86_64_preserve_none_int_parameter_registers;
3491 : else
3492 13488183 : parm_regs = x86_64_int_parameter_registers;
3493 :
3494 13488298 : return construct_container (mode, orig_mode, type, 0, cum->nregs,
3495 13488298 : cum->sse_nregs,
3496 13488298 : &parm_regs[cum->regno],
3497 13488298 : cum->sse_regno);
3498 : }
3499 :
3500 : static rtx
3501 296338 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3502 : machine_mode orig_mode, bool named, const_tree type,
3503 : HOST_WIDE_INT bytes)
3504 : {
3505 296338 : unsigned int regno;
3506 :
3507 : /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3508 : We use value of -2 to specify that current function call is MSABI. */
3509 296338 : if (mode == VOIDmode)
3510 36293 : return GEN_INT (-2);
3511 :
3512 : /* If we've run out of registers, it goes on the stack. */
3513 260045 : if (cum->nregs == 0)
3514 : return NULL_RTX;
3515 :
3516 176290 : regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3517 :
3518 : /* Only floating point modes less than 64 bits are passed in anything but
3519 : integer regs. Larger floating point types are excluded as the Windows
3520 : ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
3521 176290 : if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
3522 : {
3523 38254 : if (named)
3524 : {
3525 38254 : if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3526 37260 : regno = cum->regno + FIRST_SSE_REG;
3527 : }
3528 : else
3529 : {
3530 0 : rtx t1, t2;
3531 :
3532 : /* Unnamed floating parameters are passed in both the
3533 : SSE and integer registers. */
3534 0 : t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3535 0 : t2 = gen_rtx_REG (mode, regno);
3536 0 : t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3537 0 : t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3538 0 : return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3539 : }
3540 : }
3541 : /* Handle aggregated types passed in register. */
3542 176290 : if (orig_mode == BLKmode)
3543 : {
3544 0 : if (bytes > 0 && bytes <= 8)
3545 0 : mode = (bytes > 4 ? DImode : SImode);
3546 0 : if (mode == BLKmode)
3547 0 : mode = DImode;
3548 : }
3549 :
3550 176290 : return gen_reg_or_parallel (mode, orig_mode, regno);
3551 : }
3552 :
3553 : /* Return where to put the arguments to a function.
3554 : Return zero to push the argument on the stack, or a hard register in which to store the argument.
3555 :
3556 : ARG describes the argument while CUM gives information about the
3557 : preceding args and about the function being called. */
3558 :
3559 : static rtx
3560 21536100 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3561 : {
3562 21536100 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3563 21536100 : machine_mode mode = arg.mode;
3564 21536100 : HOST_WIDE_INT bytes, words;
3565 21536100 : rtx reg;
3566 :
3567 21536100 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3568 : {
3569 187 : gcc_assert (arg.type != NULL_TREE);
3570 187 : if (POINTER_TYPE_P (arg.type))
3571 : {
3572 : /* This is the pointer argument. */
3573 122 : gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3574 : /* It is at -WORD(AP) in the current frame in interrupt and
3575 : exception handlers. */
3576 122 : reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3577 : }
3578 : else
3579 : {
3580 65 : gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3581 : && TREE_CODE (arg.type) == INTEGER_TYPE
3582 : && TYPE_MODE (arg.type) == word_mode);
3583 : /* The error code is the word-mode integer argument at
3584 : -2 * WORD(AP) in the current frame of the exception
3585 : handler. */
3586 65 : reg = gen_rtx_MEM (word_mode,
3587 65 : plus_constant (Pmode,
3588 : arg_pointer_rtx,
3589 65 : -2 * UNITS_PER_WORD));
3590 : }
3591 187 : return reg;
3592 : }
3593 :
3594 21535913 : bytes = arg.promoted_size_in_bytes ();
3595 21535913 : words = CEIL (bytes, UNITS_PER_WORD);
3596 :
3597 : /* To simplify the code below, represent vector types with a vector mode
3598 : even if MMX/SSE are not active. */
3599 21535913 : if (arg.type && VECTOR_TYPE_P (arg.type))
3600 171358 : mode = type_natural_mode (arg.type, cum, false);
3601 :
3602 21535913 : if (TARGET_64BIT)
3603 : {
3604 18979820 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3605 :
3606 18979820 : if (call_abi == MS_ABI)
3607 296338 : reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3608 296338 : arg.type, bytes);
3609 : else
3610 18683482 : reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3611 : }
3612 : else
3613 2556093 : reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3614 :
3615 : /* Track if there are outgoing arguments on stack. */
3616 21535913 : if (reg == NULL_RTX && cum->caller)
3617 2197096 : cfun->machine->outgoing_args_on_stack = true;
3618 :
3619 : return reg;
3620 : }
3621 :
3622 : /* A C expression that indicates when an argument must be passed by
3623 : reference. If nonzero for an argument, a copy of that argument is
3624 : made in memory and a pointer to the argument is passed instead of
3625 : the argument itself. The pointer is passed in whatever way is
3626 : appropriate for passing a pointer to that type. */
3627 :
3628 : static bool
3629 21487316 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3630 : {
3631 21487316 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3632 :
3633 21487316 : if (TARGET_64BIT)
3634 : {
3635 19368389 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3636 :
3637 : /* See Windows x64 Software Convention. */
3638 19368389 : if (call_abi == MS_ABI)
3639 : {
3640 441390 : HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3641 :
3642 441390 : if (tree type = arg.type)
3643 : {
3644 : /* Arrays are passed by reference. */
3645 441390 : if (TREE_CODE (type) == ARRAY_TYPE)
3646 : return true;
3647 :
3648 441390 : if (RECORD_OR_UNION_TYPE_P (type))
3649 : {
3650 : /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3651 : are passed by reference. */
3652 15022 : msize = int_size_in_bytes (type);
3653 : }
3654 : }
3655 :
3656 : /* __m128 is passed by reference. */
3657 872851 : return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3658 : }
3659 18926999 : else if (arg.type && int_size_in_bytes (arg.type) == -1)
3660 : return true;
3661 : }
3662 :
3663 : return false;
3664 : }
3665 :
3666 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3667 : passing ABI. XXX: This function is obsolete and is only used for
3668 : checking psABI compatibility with previous versions of GCC. */
3669 :
3670 : static bool
3671 1974463 : ix86_compat_aligned_value_p (const_tree type)
3672 : {
3673 1974463 : machine_mode mode = TYPE_MODE (type);
3674 1974463 : if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3675 1974421 : || mode == TDmode
3676 1974421 : || mode == TFmode
3677 : || mode == TCmode)
3678 1974675 : && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3679 : return true;
3680 1974251 : if (TYPE_ALIGN (type) < 128)
3681 : return false;
3682 :
3683 0 : if (AGGREGATE_TYPE_P (type))
3684 : {
3685 : /* Walk the aggregates recursively. */
3686 0 : switch (TREE_CODE (type))
3687 : {
3688 0 : case RECORD_TYPE:
3689 0 : case UNION_TYPE:
3690 0 : case QUAL_UNION_TYPE:
3691 0 : {
3692 0 : tree field;
3693 :
3694 : /* Walk all the structure fields. */
3695 0 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3696 : {
3697 0 : if (TREE_CODE (field) == FIELD_DECL
3698 0 : && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3699 : return true;
3700 : }
3701 : break;
3702 : }
3703 :
3704 0 : case ARRAY_TYPE:
3705 : /* Just for use if some languages passes arrays by value. */
3706 0 : if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3707 : return true;
3708 : break;
3709 :
3710 : default:
3711 : gcc_unreachable ();
3712 : }
3713 : }
3714 : return false;
3715 : }
3716 :
3717 : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3718 : XXX: This function is obsolete and is only used for checking psABI
3719 : compatibility with previous versions of GCC. */
3720 :
3721 : static unsigned int
3722 5541689 : ix86_compat_function_arg_boundary (machine_mode mode,
3723 : const_tree type, unsigned int align)
3724 : {
3725 : /* In 32bit, only _Decimal128 and __float128 are aligned to their
3726 : natural boundaries. */
3727 5541689 : if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3728 : {
3729 : /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3730 : make an exception for SSE modes since these require 128bit
3731 : alignment.
3732 :
3733 : The handling here differs from field_alignment. ICC aligns MMX
3734 : arguments to 4 byte boundaries, while structure fields are aligned
3735 : to 8 byte boundaries. */
3736 1986393 : if (!type)
3737 : {
3738 11930 : if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3739 1986181 : align = PARM_BOUNDARY;
3740 : }
3741 : else
3742 : {
3743 1974463 : if (!ix86_compat_aligned_value_p (type))
3744 1986181 : align = PARM_BOUNDARY;
3745 : }
3746 : }
3747 10683489 : if (align > BIGGEST_ALIGNMENT)
3748 90 : align = BIGGEST_ALIGNMENT;
3749 5541689 : return align;
3750 : }
3751 :
3752 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3753 : passing ABI. */
3754 :
3755 : static bool
3756 1977134 : ix86_contains_aligned_value_p (const_tree type)
3757 : {
3758 1977134 : machine_mode mode = TYPE_MODE (type);
3759 :
3760 1977134 : if (mode == XFmode || mode == XCmode)
3761 : return false;
3762 :
3763 1975000 : if (TYPE_ALIGN (type) < 128)
3764 : return false;
3765 :
3766 2883 : if (AGGREGATE_TYPE_P (type))
3767 : {
3768 : /* Walk the aggregates recursively. */
3769 0 : switch (TREE_CODE (type))
3770 : {
3771 0 : case RECORD_TYPE:
3772 0 : case UNION_TYPE:
3773 0 : case QUAL_UNION_TYPE:
3774 0 : {
3775 0 : tree field;
3776 :
3777 : /* Walk all the structure fields. */
3778 0 : for (field = TYPE_FIELDS (type);
3779 0 : field;
3780 0 : field = DECL_CHAIN (field))
3781 : {
3782 0 : if (TREE_CODE (field) == FIELD_DECL
3783 0 : && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3784 : return true;
3785 : }
3786 : break;
3787 : }
3788 :
3789 0 : case ARRAY_TYPE:
3790 : /* Just for use if some languages passes arrays by value. */
3791 0 : if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3792 : return true;
3793 : break;
3794 :
3795 : default:
3796 : gcc_unreachable ();
3797 : }
3798 : }
3799 : else
3800 2883 : return TYPE_ALIGN (type) >= 128;
3801 :
3802 : return false;
3803 : }
3804 :
3805 : /* Gives the alignment boundary, in bits, of an argument with the
3806 : specified mode and type. */
3807 :
3808 : static unsigned int
3809 10937583 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
3810 : {
3811 10937583 : unsigned int align;
3812 10937583 : if (type)
3813 : {
3814 : /* Since the main variant type is used for call, we convert it to
3815 : the main variant type. */
3816 10897806 : type = TYPE_MAIN_VARIANT (type);
3817 10897806 : align = TYPE_ALIGN (type);
3818 10897806 : if (TYPE_EMPTY_P (type))
3819 24517 : return PARM_BOUNDARY;
3820 : }
3821 : else
3822 39777 : align = GET_MODE_ALIGNMENT (mode);
3823 12937365 : if (align < PARM_BOUNDARY)
3824 4109211 : align = PARM_BOUNDARY;
3825 : else
3826 : {
3827 6803855 : static bool warned;
3828 6803855 : unsigned int saved_align = align;
3829 :
3830 6803855 : if (!TARGET_64BIT)
3831 : {
3832 : /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3833 2012936 : if (!type)
3834 : {
3835 35802 : if (mode == XFmode || mode == XCmode)
3836 : align = PARM_BOUNDARY;
3837 : }
3838 1977134 : else if (!ix86_contains_aligned_value_p (type))
3839 : align = PARM_BOUNDARY;
3840 :
3841 38685 : if (align < 128)
3842 1986181 : align = PARM_BOUNDARY;
3843 : }
3844 :
3845 6803855 : if (warn_psabi
3846 5546525 : && !warned
3847 12345544 : && align != ix86_compat_function_arg_boundary (mode, type,
3848 : saved_align))
3849 : {
3850 90 : warned = true;
3851 90 : inform (input_location,
3852 : "the ABI for passing parameters with %d-byte"
3853 : " alignment has changed in GCC 4.6",
3854 : align / BITS_PER_UNIT);
3855 : }
3856 : }
3857 :
3858 : return align;
3859 : }
3860 :
3861 : /* Return true if N is a possible register number of function value. */
3862 :
3863 : static bool
3864 4662656 : ix86_function_value_regno_p (const unsigned int regno)
3865 : {
3866 4662656 : switch (regno)
3867 : {
3868 : case AX_REG:
3869 : return true;
3870 105159 : case DX_REG:
3871 105159 : return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3872 98769 : case DI_REG:
3873 98769 : case SI_REG:
3874 98769 : return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3875 :
3876 : /* Complex values are returned in %st(0)/%st(1) pair. */
3877 24145 : case ST0_REG:
3878 24145 : case ST1_REG:
3879 : /* TODO: The function should depend on current function ABI but
3880 : builtins.cc would need updating then. Therefore we use the
3881 : default ABI. */
3882 24145 : if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3883 : return false;
3884 24145 : return TARGET_FLOAT_RETURNS_IN_80387;
3885 :
3886 : /* Complex values are returned in %xmm0/%xmm1 pair. */
3887 1291922 : case XMM0_REG:
3888 1291922 : case XMM1_REG:
3889 1291922 : return TARGET_SSE;
3890 :
3891 9489 : case MM0_REG:
3892 9489 : if (TARGET_MACHO || TARGET_64BIT)
3893 : return false;
3894 2497 : return TARGET_MMX;
3895 : }
3896 :
3897 : return false;
3898 : }
3899 :
3900 : /* Check whether the register REGNO should be zeroed on X86.
3901 : When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3902 : together, no need to zero it again.
3903 : When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3904 :
3905 : static bool
3906 1377 : zero_call_used_regno_p (const unsigned int regno,
3907 : bool all_sse_zeroed,
3908 : bool need_zero_mmx)
3909 : {
3910 835 : return GENERAL_REGNO_P (regno)
3911 819 : || (!all_sse_zeroed && SSE_REGNO_P (regno))
3912 439 : || MASK_REGNO_P (regno)
3913 1800 : || (need_zero_mmx && MMX_REGNO_P (regno));
3914 : }
3915 :
3916 : /* Return the machine_mode that is used to zero register REGNO. */
3917 :
3918 : static machine_mode
3919 954 : zero_call_used_regno_mode (const unsigned int regno)
3920 : {
3921 : /* NB: We only need to zero the lower 32 bits for integer registers
3922 : and the lower 128 bits for vector registers since destination are
3923 : zero-extended to the full register width. */
3924 954 : if (GENERAL_REGNO_P (regno))
3925 : return SImode;
3926 : else if (SSE_REGNO_P (regno))
3927 380 : return V4SFmode;
3928 : else if (MASK_REGNO_P (regno))
3929 : return HImode;
3930 : else if (MMX_REGNO_P (regno))
3931 0 : return V2SImode;
3932 : else
3933 0 : gcc_unreachable ();
3934 : }
3935 :
3936 : /* Generate a rtx to zero all vector registers together if possible,
3937 : otherwise, return NULL. */
3938 :
3939 : static rtx
3940 131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3941 : {
3942 131 : if (!TARGET_AVX)
3943 : return NULL;
3944 :
3945 372 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3946 368 : if ((LEGACY_SSE_REGNO_P (regno)
3947 336 : || (TARGET_64BIT
3948 336 : && (REX_SSE_REGNO_P (regno)
3949 304 : || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3950 432 : && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3951 : return NULL;
3952 :
3953 4 : return gen_avx_vzeroall ();
3954 : }
3955 :
3956 : /* Generate insns to zero all st registers together.
3957 : Return true when zeroing instructions are generated.
3958 : Assume the number of st registers that are zeroed is num_of_st,
3959 : we will emit the following sequence to zero them together:
3960 : fldz; \
3961 : fldz; \
3962 : ...
3963 : fldz; \
3964 : fstp %%st(0); \
3965 : fstp %%st(0); \
3966 : ...
3967 : fstp %%st(0);
3968 : i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3969 : mark stack slots empty.
3970 :
3971 : How to compute the num_of_st:
3972 : There is no direct mapping from stack registers to hard register
3973 : numbers. If one stack register needs to be cleared, we don't know
3974 : where in the stack the value remains. So, if any stack register
3975 : needs to be cleared, the whole stack should be cleared. However,
3976 : x87 stack registers that hold the return value should be excluded.
3977 : x87 returns in the top (two for complex values) register, so
3978 : num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3979 : return the value of num_of_st. */
3980 :
3981 :
3982 : static int
3983 131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3984 : {
3985 :
3986 : /* If the FPU is disabled, no need to zero all st registers. */
3987 131 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3988 : return 0;
3989 :
3990 10329 : unsigned int num_of_st = 0;
3991 10329 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3992 10220 : if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3993 10220 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3994 : {
3995 : num_of_st++;
3996 : break;
3997 : }
3998 :
3999 130 : if (num_of_st == 0)
4000 : return 0;
4001 :
4002 21 : bool return_with_x87 = false;
4003 42 : return_with_x87 = (crtl->return_rtx
4004 21 : && (STACK_REG_P (crtl->return_rtx)));
4005 :
4006 21 : bool complex_return = false;
4007 42 : complex_return = (crtl->return_rtx
4008 21 : && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
4009 :
4010 21 : if (return_with_x87)
4011 2 : if (complex_return)
4012 : num_of_st = 6;
4013 : else
4014 1 : num_of_st = 7;
4015 : else
4016 : num_of_st = 8;
4017 :
4018 21 : rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
4019 186 : for (unsigned int i = 0; i < num_of_st; i++)
4020 165 : emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
4021 :
4022 186 : for (unsigned int i = 0; i < num_of_st; i++)
4023 : {
4024 165 : rtx insn;
4025 165 : insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
4026 165 : add_reg_note (insn, REG_DEAD, st_reg);
4027 : }
4028 21 : return num_of_st;
4029 : }
4030 :
4031 :
4032 : /* When the routine exit in MMX mode, if any ST register needs
4033 : to be zeroed, we should clear all MMX registers except the
4034 : RET_MMX_REGNO that holds the return value. */
4035 : static bool
4036 0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
4037 : unsigned int ret_mmx_regno)
4038 : {
4039 0 : bool need_zero_all_mm = false;
4040 0 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4041 0 : if (STACK_REGNO_P (regno)
4042 0 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4043 : {
4044 : need_zero_all_mm = true;
4045 : break;
4046 : }
4047 :
4048 0 : if (!need_zero_all_mm)
4049 : return false;
4050 :
4051 : machine_mode mode = V2SImode;
4052 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4053 0 : if (regno != ret_mmx_regno)
4054 : {
4055 0 : rtx reg = gen_rtx_REG (mode, regno);
4056 0 : emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
4057 : }
4058 : return true;
4059 : }
4060 :
4061 : /* TARGET_ZERO_CALL_USED_REGS. */
4062 : /* Generate a sequence of instructions that zero registers specified by
4063 : NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
4064 : zeroed. */
4065 : static HARD_REG_SET
4066 131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
4067 : {
4068 131 : HARD_REG_SET zeroed_hardregs;
4069 131 : bool all_sse_zeroed = false;
4070 131 : int all_st_zeroed_num = 0;
4071 131 : bool all_mm_zeroed = false;
4072 :
4073 131 : CLEAR_HARD_REG_SET (zeroed_hardregs);
4074 :
4075 : /* first, let's see whether we can zero all vector registers together. */
4076 131 : rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4077 131 : if (zero_all_vec_insn)
4078 : {
4079 4 : emit_insn (zero_all_vec_insn);
4080 4 : all_sse_zeroed = true;
4081 4 : if (TARGET_64BIT && TARGET_AVX512F)
4082 : {
4083 2 : rtx zero = CONST0_RTX (V4SFmode);
4084 34 : for (unsigned int regno = XMM16_REG;
4085 34 : regno <= XMM31_REG;
4086 : regno++)
4087 : {
4088 32 : rtx reg = gen_rtx_REG (V4SFmode, regno);
4089 32 : emit_move_insn (reg, zero);
4090 : }
4091 : }
4092 : }
4093 :
4094 : /* mm/st registers are shared registers set, we should follow the following
4095 : rules to clear them:
4096 : MMX exit mode x87 exit mode
4097 : -------------|----------------------|---------------
4098 : uses x87 reg | clear all MMX | clear all x87
4099 : uses MMX reg | clear individual MMX | clear all x87
4100 : x87 + MMX | clear all MMX | clear all x87
4101 :
4102 : first, we should decide which mode (MMX mode or x87 mode) the function
4103 : exit with. */
4104 :
4105 131 : bool exit_with_mmx_mode = (crtl->return_rtx
4106 131 : && (MMX_REG_P (crtl->return_rtx)));
4107 :
4108 131 : if (!exit_with_mmx_mode)
4109 : /* x87 exit mode, we should zero all st registers together. */
4110 : {
4111 131 : all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4112 :
4113 131 : if (all_st_zeroed_num > 0)
4114 189 : for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4115 : /* x87 stack registers that hold the return value should be excluded.
4116 : x87 returns in the top (two for complex values) register. */
4117 168 : if (all_st_zeroed_num == 8
4118 168 : || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4119 : || (all_st_zeroed_num == 6
4120 7 : && (regno == (REGNO (crtl->return_rtx) + 1)))))
4121 165 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4122 : }
4123 : else
4124 : /* MMX exit mode, check whether we can zero all mm registers. */
4125 : {
4126 0 : unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4127 0 : all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4128 : exit_mmx_regno);
4129 0 : if (all_mm_zeroed)
4130 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4131 0 : if (regno != exit_mmx_regno)
4132 0 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4133 : }
4134 :
4135 : /* Now, generate instructions to zero all the other registers. */
4136 :
4137 12183 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4138 : {
4139 12052 : if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4140 10675 : continue;
4141 1800 : if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4142 1377 : exit_with_mmx_mode && !all_mm_zeroed))
4143 423 : continue;
4144 :
4145 954 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4146 :
4147 954 : machine_mode mode = zero_call_used_regno_mode (regno);
4148 :
4149 954 : rtx reg = gen_rtx_REG (mode, regno);
4150 954 : rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4151 :
4152 954 : switch (mode)
4153 : {
4154 558 : case E_SImode:
4155 558 : if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4156 : {
4157 558 : rtx clob = gen_rtx_CLOBBER (VOIDmode,
4158 : gen_rtx_REG (CCmode,
4159 : FLAGS_REG));
4160 558 : tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4161 : tmp,
4162 : clob));
4163 : }
4164 : /* FALLTHRU. */
4165 :
4166 954 : case E_V4SFmode:
4167 954 : case E_HImode:
4168 954 : case E_V2SImode:
4169 954 : emit_insn (tmp);
4170 954 : break;
4171 :
4172 0 : default:
4173 0 : gcc_unreachable ();
4174 : }
4175 : }
4176 131 : return zeroed_hardregs;
4177 : }
4178 :
4179 : /* Define how to find the value returned by a function.
4180 : VALTYPE is the data type of the value (as a tree).
4181 : If the precise function being called is known, FUNC is its FUNCTION_DECL;
4182 : otherwise, FUNC is 0. */
4183 :
4184 : static rtx
4185 3931110 : function_value_32 (machine_mode orig_mode, machine_mode mode,
4186 : const_tree fntype, const_tree fn)
4187 : {
4188 3931110 : unsigned int regno;
4189 :
4190 : /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4191 : we normally prevent this case when mmx is not available. However
4192 : some ABIs may require the result to be returned like DImode. */
4193 4199195 : if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4194 : regno = FIRST_MMX_REG;
4195 :
4196 : /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4197 : we prevent this case when sse is not available. However some ABIs
4198 : may require the result to be returned like integer TImode. */
4199 3921834 : else if (mode == TImode
4200 4180643 : || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4201 : regno = FIRST_SSE_REG;
4202 :
4203 : /* 32-byte vector modes in %ymm0. */
4204 3962745 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4205 : regno = FIRST_SSE_REG;
4206 :
4207 : /* 64-byte vector modes in %zmm0. */
4208 3818547 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4209 : regno = FIRST_SSE_REG;
4210 :
4211 : /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4212 3663025 : else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4213 : regno = FIRST_FLOAT_REG;
4214 : else
4215 : /* Most things go in %eax. */
4216 3598760 : regno = AX_REG;
4217 :
4218 : /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4219 3931110 : if (mode == HFmode || mode == BFmode)
4220 : {
4221 1907 : if (!TARGET_SSE2)
4222 : {
4223 0 : error ("SSE register return with SSE2 disabled");
4224 0 : regno = AX_REG;
4225 : }
4226 : else
4227 : regno = FIRST_SSE_REG;
4228 : }
4229 :
4230 3931110 : if (mode == HCmode)
4231 : {
4232 129 : if (!TARGET_SSE2)
4233 0 : error ("SSE register return with SSE2 disabled");
4234 :
4235 129 : rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4236 258 : XVECEXP (ret, 0, 0)
4237 258 : = gen_rtx_EXPR_LIST (VOIDmode,
4238 : gen_rtx_REG (SImode,
4239 129 : TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4240 : GEN_INT (0));
4241 129 : return ret;
4242 : }
4243 :
4244 : /* Override FP return register with %xmm0 for local functions when
4245 : SSE math is enabled or for functions with sseregparm attribute. */
4246 3930981 : if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4247 : {
4248 50232 : int sse_level = ix86_function_sseregparm (fntype, fn, false);
4249 50232 : if (sse_level == -1)
4250 : {
4251 0 : error ("calling %qD with SSE calling convention without "
4252 : "SSE/SSE2 enabled", fn);
4253 0 : sorry ("this is a GCC bug that can be worked around by adding "
4254 : "attribute used to function called");
4255 : }
4256 50232 : else if ((sse_level >= 1 && mode == SFmode)
4257 50232 : || (sse_level == 2 && mode == DFmode))
4258 : regno = FIRST_SSE_REG;
4259 : }
4260 :
4261 : /* OImode shouldn't be used directly. */
4262 3930981 : gcc_assert (mode != OImode);
4263 :
4264 3930981 : return gen_rtx_REG (orig_mode, regno);
4265 : }
4266 :
4267 : static rtx
4268 98122836 : function_value_64 (machine_mode orig_mode, machine_mode mode,
4269 : const_tree valtype)
4270 : {
4271 98122836 : rtx ret;
4272 :
4273 : /* Handle libcalls, which don't provide a type node. */
4274 98122836 : if (valtype == NULL)
4275 : {
4276 102030 : unsigned int regno;
4277 :
4278 102030 : switch (mode)
4279 : {
4280 : case E_BFmode:
4281 : case E_HFmode:
4282 : case E_HCmode:
4283 : case E_SFmode:
4284 : case E_SCmode:
4285 : case E_DFmode:
4286 : case E_DCmode:
4287 : case E_TFmode:
4288 : case E_SDmode:
4289 : case E_DDmode:
4290 : case E_TDmode:
4291 : regno = FIRST_SSE_REG;
4292 : break;
4293 1037 : case E_XFmode:
4294 1037 : case E_XCmode:
4295 1037 : regno = FIRST_FLOAT_REG;
4296 1037 : break;
4297 : case E_TCmode:
4298 : return NULL;
4299 56048 : default:
4300 56048 : regno = AX_REG;
4301 : }
4302 :
4303 102030 : return gen_rtx_REG (mode, regno);
4304 : }
4305 98020806 : else if (POINTER_TYPE_P (valtype))
4306 : {
4307 : /* Pointers are always returned in word_mode. */
4308 16637513 : mode = word_mode;
4309 : }
4310 :
4311 98020806 : ret = construct_container (mode, orig_mode, valtype, 1,
4312 : X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4313 : x86_64_int_return_registers, 0);
4314 :
4315 : /* For zero sized structures, construct_container returns NULL, but we
4316 : need to keep rest of compiler happy by returning meaningful value. */
4317 98020806 : if (!ret)
4318 205265 : ret = gen_rtx_REG (orig_mode, AX_REG);
4319 :
4320 : return ret;
4321 : }
4322 :
4323 : static rtx
4324 0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4325 : const_tree fntype, const_tree fn, const_tree valtype)
4326 : {
4327 0 : unsigned int regno;
4328 :
4329 : /* Floating point return values in %st(0)
4330 : (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4331 0 : if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4332 0 : && (GET_MODE_SIZE (mode) > 8
4333 0 : || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4334 : {
4335 0 : regno = FIRST_FLOAT_REG;
4336 0 : return gen_rtx_REG (orig_mode, regno);
4337 : }
4338 : else
4339 0 : return function_value_32(orig_mode, mode, fntype,fn);
4340 : }
4341 :
4342 : static rtx
4343 767095 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4344 : const_tree valtype)
4345 : {
4346 767095 : unsigned int regno = AX_REG;
4347 :
4348 767095 : if (TARGET_SSE)
4349 : {
4350 1532736 : switch (GET_MODE_SIZE (mode))
4351 : {
4352 14003 : case 16:
4353 14003 : if (valtype != NULL_TREE
4354 14003 : && !VECTOR_INTEGER_TYPE_P (valtype)
4355 7146 : && !INTEGRAL_TYPE_P (valtype)
4356 21149 : && !VECTOR_FLOAT_TYPE_P (valtype))
4357 : break;
4358 14003 : if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4359 : && !COMPLEX_MODE_P (mode))
4360 197656 : regno = FIRST_SSE_REG;
4361 : break;
4362 741063 : case 8:
4363 741063 : case 4:
4364 741063 : case 2:
4365 741063 : if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4366 : break;
4367 723842 : if (mode == HFmode || mode == SFmode || mode == DFmode)
4368 197656 : regno = FIRST_SSE_REG;
4369 : break;
4370 : default:
4371 : break;
4372 : }
4373 : }
4374 767095 : return gen_rtx_REG (orig_mode, regno);
4375 : }
4376 :
4377 : static rtx
4378 102821041 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4379 : machine_mode orig_mode, machine_mode mode)
4380 : {
4381 102821041 : const_tree fn, fntype;
4382 :
4383 102821041 : fn = NULL_TREE;
4384 102821041 : if (fntype_or_decl && DECL_P (fntype_or_decl))
4385 3544799 : fn = fntype_or_decl;
4386 3544799 : fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4387 :
4388 102821041 : if (ix86_function_type_abi (fntype) == MS_ABI)
4389 : {
4390 767095 : if (TARGET_64BIT)
4391 767095 : return function_value_ms_64 (orig_mode, mode, valtype);
4392 : else
4393 0 : return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4394 : }
4395 102053946 : else if (TARGET_64BIT)
4396 98122836 : return function_value_64 (orig_mode, mode, valtype);
4397 : else
4398 3931110 : return function_value_32 (orig_mode, mode, fntype, fn);
4399 : }
4400 :
4401 : static rtx
4402 102715877 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4403 : {
4404 102715877 : machine_mode mode, orig_mode;
4405 :
4406 102715877 : orig_mode = TYPE_MODE (valtype);
4407 102715877 : mode = type_natural_mode (valtype, NULL, true);
4408 102715877 : return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4409 : }
4410 :
4411 : /* Pointer function arguments and return values are promoted to
4412 : word_mode for normal functions. */
4413 :
4414 : static machine_mode
4415 32041405 : ix86_promote_function_mode (const_tree type, machine_mode mode,
4416 : int *punsignedp, const_tree fntype,
4417 : int for_return)
4418 : {
4419 32041405 : if (cfun->machine->func_type == TYPE_NORMAL
4420 32040382 : && type != NULL_TREE
4421 32006821 : && POINTER_TYPE_P (type))
4422 : {
4423 16050995 : *punsignedp = POINTERS_EXTEND_UNSIGNED;
4424 16050995 : return word_mode;
4425 : }
4426 15990410 : return default_promote_function_mode (type, mode, punsignedp, fntype,
4427 15990410 : for_return);
4428 : }
4429 :
4430 : /* Return true if a structure, union or array with MODE containing FIELD
4431 : should be accessed using BLKmode. */
4432 :
4433 : static bool
4434 142590258 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4435 : {
4436 : /* Union with XFmode must be in BLKmode. */
4437 142590258 : return (mode == XFmode
4438 142728009 : && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4439 130879 : || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4440 : }
4441 :
4442 : rtx
4443 105164 : ix86_libcall_value (machine_mode mode)
4444 : {
4445 105164 : return ix86_function_value_1 (NULL, NULL, mode, mode);
4446 : }
4447 :
4448 : /* Return true iff type is returned in memory. */
4449 :
4450 : static bool
4451 104661750 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4452 : {
4453 104661750 : const machine_mode mode = type_natural_mode (type, NULL, true);
4454 104661750 : HOST_WIDE_INT size;
4455 :
4456 104661750 : if (TARGET_64BIT)
4457 : {
4458 100122066 : if (ix86_function_type_abi (fntype) == MS_ABI)
4459 : {
4460 706853 : size = int_size_in_bytes (type);
4461 :
4462 : /* __m128 is returned in xmm0. */
4463 706853 : if ((!type || VECTOR_INTEGER_TYPE_P (type)
4464 687282 : || INTEGRAL_TYPE_P (type)
4465 216966 : || VECTOR_FLOAT_TYPE_P (type))
4466 505715 : && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4467 : && !COMPLEX_MODE_P (mode)
4468 1212568 : && (GET_MODE_SIZE (mode) == 16 || size == 16))
4469 : return false;
4470 :
4471 : /* Otherwise, the size must be exactly in [1248]. */
4472 1349324 : return size != 1 && size != 2 && size != 4 && size != 8;
4473 : }
4474 : else
4475 : {
4476 99415213 : int needed_intregs, needed_sseregs;
4477 :
4478 99415213 : return examine_argument (mode, type, 1,
4479 : &needed_intregs, &needed_sseregs);
4480 : }
4481 : }
4482 : else
4483 : {
4484 4539684 : size = int_size_in_bytes (type);
4485 :
4486 : /* Intel MCU psABI returns scalars and aggregates no larger than 8
4487 : bytes in registers. */
4488 4539684 : if (TARGET_IAMCU)
4489 0 : return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4490 :
4491 4539684 : if (mode == BLKmode)
4492 : return true;
4493 :
4494 4539684 : if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4495 : return false;
4496 :
4497 4539684 : if (VECTOR_MODE_P (mode) || mode == TImode)
4498 : {
4499 : /* User-created vectors small enough to fit in EAX. */
4500 268055 : if (size < 8)
4501 : return false;
4502 :
4503 : /* Unless ABI prescibes otherwise,
4504 : MMX/3dNow values are returned in MM0 if available. */
4505 :
4506 268055 : if (size == 8)
4507 9266 : return TARGET_VECT8_RETURNS || !TARGET_MMX;
4508 :
4509 : /* SSE values are returned in XMM0 if available. */
4510 258789 : if (size == 16)
4511 108939 : return !TARGET_SSE;
4512 :
4513 : /* AVX values are returned in YMM0 if available. */
4514 149850 : if (size == 32)
4515 72090 : return !TARGET_AVX;
4516 :
4517 : /* AVX512F values are returned in ZMM0 if available. */
4518 77760 : if (size == 64)
4519 77760 : return !TARGET_AVX512F;
4520 : }
4521 :
4522 4271629 : if (mode == XFmode)
4523 : return false;
4524 :
4525 4259948 : if (size > 12)
4526 : return true;
4527 :
4528 : /* OImode shouldn't be used directly. */
4529 3278347 : gcc_assert (mode != OImode);
4530 :
4531 : return false;
4532 : }
4533 : }
4534 :
4535 : /* Implement TARGET_PUSH_ARGUMENT. */
4536 :
4537 : static bool
4538 9345974 : ix86_push_argument (unsigned int npush)
4539 : {
4540 : /* If SSE2 is available, use vector move to put large argument onto
4541 : stack. NB: In 32-bit mode, use 8-byte vector move. */
4542 11770644 : return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4543 9081101 : && TARGET_PUSH_ARGS
4544 18426977 : && !ACCUMULATE_OUTGOING_ARGS);
4545 : }
4546 :
4547 :
4548 : /* Create the va_list data type. */
4549 :
4550 : static tree
4551 281305 : ix86_build_builtin_va_list_64 (void)
4552 : {
4553 281305 : tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4554 :
4555 281305 : record = lang_hooks.types.make_type (RECORD_TYPE);
4556 281305 : type_decl = build_decl (BUILTINS_LOCATION,
4557 : TYPE_DECL, get_identifier ("__va_list_tag"), record);
4558 :
4559 281305 : f_gpr = build_decl (BUILTINS_LOCATION,
4560 : FIELD_DECL, get_identifier ("gp_offset"),
4561 : unsigned_type_node);
4562 281305 : f_fpr = build_decl (BUILTINS_LOCATION,
4563 : FIELD_DECL, get_identifier ("fp_offset"),
4564 : unsigned_type_node);
4565 281305 : f_ovf = build_decl (BUILTINS_LOCATION,
4566 : FIELD_DECL, get_identifier ("overflow_arg_area"),
4567 : ptr_type_node);
4568 281305 : f_sav = build_decl (BUILTINS_LOCATION,
4569 : FIELD_DECL, get_identifier ("reg_save_area"),
4570 : ptr_type_node);
4571 :
4572 281305 : va_list_gpr_counter_field = f_gpr;
4573 281305 : va_list_fpr_counter_field = f_fpr;
4574 :
4575 281305 : DECL_FIELD_CONTEXT (f_gpr) = record;
4576 281305 : DECL_FIELD_CONTEXT (f_fpr) = record;
4577 281305 : DECL_FIELD_CONTEXT (f_ovf) = record;
4578 281305 : DECL_FIELD_CONTEXT (f_sav) = record;
4579 :
4580 281305 : TYPE_STUB_DECL (record) = type_decl;
4581 281305 : TYPE_NAME (record) = type_decl;
4582 281305 : TYPE_FIELDS (record) = f_gpr;
4583 281305 : DECL_CHAIN (f_gpr) = f_fpr;
4584 281305 : DECL_CHAIN (f_fpr) = f_ovf;
4585 281305 : DECL_CHAIN (f_ovf) = f_sav;
4586 281305 : TREE_PUBLIC (type_decl) = 1;
4587 :
4588 281305 : layout_type (record);
4589 :
4590 281305 : TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4591 281305 : NULL_TREE, TYPE_ATTRIBUTES (record));
4592 :
4593 : /* The correct type is an array type of one element. */
4594 281305 : return build_array_type (record, build_index_type (size_zero_node));
4595 : }
4596 :
4597 : /* Setup the builtin va_list data type and for 64-bit the additional
4598 : calling convention specific va_list data types. */
4599 :
4600 : static tree
4601 288469 : ix86_build_builtin_va_list (void)
4602 : {
4603 288469 : if (TARGET_64BIT)
4604 : {
4605 : /* Initialize ABI specific va_list builtin types.
4606 :
4607 : In lto1, we can encounter two va_list types:
4608 : - one as a result of the type-merge across TUs, and
4609 : - the one constructed here.
4610 : These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4611 : a type identity check in canonical_va_list_type based on
4612 : TYPE_MAIN_VARIANT (which we used to have) will not work.
4613 : Instead, we tag each va_list_type_node with its unique attribute, and
4614 : look for the attribute in the type identity check in
4615 : canonical_va_list_type.
4616 :
4617 : Tagging sysv_va_list_type_node directly with the attribute is
4618 : problematic since it's a array of one record, which will degrade into a
4619 : pointer to record when used as parameter (see build_va_arg comments for
4620 : an example), dropping the attribute in the process. So we tag the
4621 : record instead. */
4622 :
4623 : /* For SYSV_ABI we use an array of one record. */
4624 281305 : sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4625 :
4626 : /* For MS_ABI we use plain pointer to argument area. */
4627 281305 : tree char_ptr_type = build_pointer_type (char_type_node);
4628 281305 : tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4629 281305 : TYPE_ATTRIBUTES (char_ptr_type));
4630 281305 : ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4631 :
4632 281305 : return ((ix86_abi == MS_ABI)
4633 281305 : ? ms_va_list_type_node
4634 281305 : : sysv_va_list_type_node);
4635 : }
4636 : else
4637 : {
4638 : /* For i386 we use plain pointer to argument area. */
4639 7164 : return build_pointer_type (char_type_node);
4640 : }
4641 : }
4642 :
4643 : /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4644 :
4645 : static void
4646 15714 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4647 : {
4648 15714 : rtx save_area, mem;
4649 15714 : alias_set_type set;
4650 15714 : int i, max;
4651 :
4652 : /* GPR size of varargs save area. */
4653 15714 : if (cfun->va_list_gpr_size)
4654 15264 : ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4655 : else
4656 450 : ix86_varargs_gpr_size = 0;
4657 :
4658 : /* FPR size of varargs save area. We don't need it if we don't pass
4659 : anything in SSE registers. */
4660 15714 : if (TARGET_SSE && cfun->va_list_fpr_size)
4661 14657 : ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4662 : else
4663 1057 : ix86_varargs_fpr_size = 0;
4664 :
4665 15714 : if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4666 : return;
4667 :
4668 15433 : save_area = frame_pointer_rtx;
4669 15433 : set = get_varargs_alias_set ();
4670 :
4671 15433 : max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4672 15433 : if (max > X86_64_REGPARM_MAX)
4673 : max = X86_64_REGPARM_MAX;
4674 :
4675 15433 : const int *parm_regs;
4676 15433 : if (cum->preserve_none_abi)
4677 : parm_regs = x86_64_preserve_none_int_parameter_registers;
4678 : else
4679 15432 : parm_regs = x86_64_int_parameter_registers;
4680 :
4681 85622 : for (i = cum->regno; i < max; i++)
4682 : {
4683 70189 : mem = gen_rtx_MEM (word_mode,
4684 70189 : plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4685 70189 : MEM_NOTRAP_P (mem) = 1;
4686 70189 : set_mem_alias_set (mem, set);
4687 70189 : emit_move_insn (mem,
4688 70189 : gen_rtx_REG (word_mode, parm_regs[i]));
4689 : }
4690 :
4691 15433 : if (ix86_varargs_fpr_size)
4692 : {
4693 14657 : machine_mode smode;
4694 14657 : rtx_code_label *label;
4695 14657 : rtx test;
4696 :
4697 : /* Now emit code to save SSE registers. The AX parameter contains number
4698 : of SSE parameter registers used to call this function, though all we
4699 : actually check here is the zero/non-zero status. */
4700 :
4701 14657 : label = gen_label_rtx ();
4702 14657 : test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4703 14657 : emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4704 : label));
4705 :
4706 : /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4707 : we used movdqa (i.e. TImode) instead? Perhaps even better would
4708 : be if we could determine the real mode of the data, via a hook
4709 : into pass_stdarg. Ignore all that for now. */
4710 14657 : smode = V4SFmode;
4711 14657 : if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4712 4158 : crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4713 :
4714 14657 : max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4715 14657 : if (max > X86_64_SSE_REGPARM_MAX)
4716 : max = X86_64_SSE_REGPARM_MAX;
4717 :
4718 130304 : for (i = cum->sse_regno; i < max; ++i)
4719 : {
4720 115647 : mem = plus_constant (Pmode, save_area,
4721 115647 : i * 16 + ix86_varargs_gpr_size);
4722 115647 : mem = gen_rtx_MEM (smode, mem);
4723 115647 : MEM_NOTRAP_P (mem) = 1;
4724 115647 : set_mem_alias_set (mem, set);
4725 115647 : set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4726 :
4727 115647 : emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4728 : }
4729 :
4730 14657 : emit_label (label);
4731 : }
4732 : }
4733 :
4734 : static void
4735 5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4736 : {
4737 5652 : alias_set_type set = get_varargs_alias_set ();
4738 5652 : int i;
4739 :
4740 : /* Reset to zero, as there might be a sysv vaarg used
4741 : before. */
4742 5652 : ix86_varargs_gpr_size = 0;
4743 5652 : ix86_varargs_fpr_size = 0;
4744 :
4745 14154 : for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4746 : {
4747 8502 : rtx reg, mem;
4748 :
4749 8502 : mem = gen_rtx_MEM (Pmode,
4750 8502 : plus_constant (Pmode, virtual_incoming_args_rtx,
4751 8502 : i * UNITS_PER_WORD));
4752 8502 : MEM_NOTRAP_P (mem) = 1;
4753 8502 : set_mem_alias_set (mem, set);
4754 :
4755 8502 : reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4756 8502 : emit_move_insn (mem, reg);
4757 : }
4758 5652 : }
4759 :
4760 : static void
4761 21520 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4762 : const function_arg_info &arg,
4763 : int *, int no_rtl)
4764 : {
4765 21520 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4766 21520 : CUMULATIVE_ARGS next_cum;
4767 21520 : tree fntype;
4768 :
4769 : /* This argument doesn't appear to be used anymore. Which is good,
4770 : because the old code here didn't suppress rtl generation. */
4771 21520 : gcc_assert (!no_rtl);
4772 :
4773 21520 : if (!TARGET_64BIT)
4774 154 : return;
4775 :
4776 21366 : fntype = TREE_TYPE (current_function_decl);
4777 :
4778 : /* For varargs, we do not want to skip the dummy va_dcl argument.
4779 : For stdargs, we do want to skip the last named argument. */
4780 21366 : next_cum = *cum;
4781 21366 : if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4782 122 : || arg.type != NULL_TREE)
4783 21391 : && stdarg_p (fntype))
4784 21269 : ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4785 :
4786 21366 : if (cum->call_abi == MS_ABI)
4787 5652 : setup_incoming_varargs_ms_64 (&next_cum);
4788 : else
4789 15714 : setup_incoming_varargs_64 (&next_cum);
4790 : }
4791 :
4792 : /* Checks if TYPE is of kind va_list char *. */
4793 :
4794 : static bool
4795 73110 : is_va_list_char_pointer (tree type)
4796 : {
4797 73110 : tree canonic;
4798 :
4799 : /* For 32-bit it is always true. */
4800 73110 : if (!TARGET_64BIT)
4801 : return true;
4802 72948 : canonic = ix86_canonical_va_list_type (type);
4803 72948 : return (canonic == ms_va_list_type_node
4804 72948 : || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4805 : }
4806 :
4807 : /* Implement va_start. */
4808 :
4809 : static void
4810 21009 : ix86_va_start (tree valist, rtx nextarg)
4811 : {
4812 21009 : HOST_WIDE_INT words, n_gpr, n_fpr;
4813 21009 : tree f_gpr, f_fpr, f_ovf, f_sav;
4814 21009 : tree gpr, fpr, ovf, sav, t;
4815 21009 : tree type;
4816 21009 : rtx ovf_rtx;
4817 :
4818 21009 : if (flag_split_stack
4819 12 : && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4820 : {
4821 12 : unsigned int scratch_regno;
4822 :
4823 : /* When we are splitting the stack, we can't refer to the stack
4824 : arguments using internal_arg_pointer, because they may be on
4825 : the old stack. The split stack prologue will arrange to
4826 : leave a pointer to the old stack arguments in a scratch
4827 : register, which we here copy to a pseudo-register. The split
4828 : stack prologue can't set the pseudo-register directly because
4829 : it (the prologue) runs before any registers have been saved. */
4830 :
4831 12 : scratch_regno = split_stack_prologue_scratch_regno ();
4832 12 : if (scratch_regno != INVALID_REGNUM)
4833 : {
4834 12 : rtx reg;
4835 12 : rtx_insn *seq;
4836 :
4837 16 : reg = gen_reg_rtx (Pmode);
4838 12 : cfun->machine->split_stack_varargs_pointer = reg;
4839 :
4840 12 : start_sequence ();
4841 16 : emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4842 12 : seq = end_sequence ();
4843 :
4844 12 : push_topmost_sequence ();
4845 12 : emit_insn_after (seq, entry_of_function ());
4846 12 : pop_topmost_sequence ();
4847 : }
4848 : }
4849 :
4850 : /* Only 64bit target needs something special. */
4851 21009 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4852 : {
4853 5656 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4854 5652 : std_expand_builtin_va_start (valist, nextarg);
4855 : else
4856 : {
4857 4 : rtx va_r, next;
4858 :
4859 4 : va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4860 8 : next = expand_binop (ptr_mode, add_optab,
4861 4 : cfun->machine->split_stack_varargs_pointer,
4862 : crtl->args.arg_offset_rtx,
4863 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4864 4 : convert_move (va_r, next, 0);
4865 : }
4866 5656 : return;
4867 : }
4868 :
4869 15353 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4870 15353 : f_fpr = DECL_CHAIN (f_gpr);
4871 15353 : f_ovf = DECL_CHAIN (f_fpr);
4872 15353 : f_sav = DECL_CHAIN (f_ovf);
4873 :
4874 15353 : valist = build_simple_mem_ref (valist);
4875 15353 : TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4876 : /* The following should be folded into the MEM_REF offset. */
4877 15353 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4878 : f_gpr, NULL_TREE);
4879 15353 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4880 : f_fpr, NULL_TREE);
4881 15353 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4882 : f_ovf, NULL_TREE);
4883 15353 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4884 : f_sav, NULL_TREE);
4885 :
4886 : /* Count number of gp and fp argument registers used. */
4887 15353 : words = crtl->args.info.words;
4888 15353 : n_gpr = crtl->args.info.regno;
4889 15353 : n_fpr = crtl->args.info.sse_regno;
4890 :
4891 15353 : if (cfun->va_list_gpr_size)
4892 : {
4893 15119 : type = TREE_TYPE (gpr);
4894 15119 : t = build2 (MODIFY_EXPR, type,
4895 15119 : gpr, build_int_cst (type, n_gpr * 8));
4896 15119 : TREE_SIDE_EFFECTS (t) = 1;
4897 15119 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4898 : }
4899 :
4900 15353 : if (TARGET_SSE && cfun->va_list_fpr_size)
4901 : {
4902 14500 : type = TREE_TYPE (fpr);
4903 14500 : t = build2 (MODIFY_EXPR, type, fpr,
4904 14500 : build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4905 14500 : TREE_SIDE_EFFECTS (t) = 1;
4906 14500 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4907 : }
4908 :
4909 : /* Find the overflow area. */
4910 15353 : type = TREE_TYPE (ovf);
4911 15353 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4912 15345 : ovf_rtx = crtl->args.internal_arg_pointer;
4913 : else
4914 : ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4915 15353 : t = make_tree (type, ovf_rtx);
4916 15353 : if (words != 0)
4917 498 : t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4918 :
4919 15353 : t = build2 (MODIFY_EXPR, type, ovf, t);
4920 15353 : TREE_SIDE_EFFECTS (t) = 1;
4921 15353 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4922 :
4923 15353 : if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4924 : {
4925 : /* Find the register save area.
4926 : Prologue of the function save it right above stack frame. */
4927 15288 : type = TREE_TYPE (sav);
4928 15288 : t = make_tree (type, frame_pointer_rtx);
4929 15288 : if (!ix86_varargs_gpr_size)
4930 169 : t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4931 :
4932 15288 : t = build2 (MODIFY_EXPR, type, sav, t);
4933 15288 : TREE_SIDE_EFFECTS (t) = 1;
4934 15288 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4935 : }
4936 : }
4937 :
4938 : /* Implement va_arg. */
4939 :
4940 : static tree
4941 52101 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4942 : gimple_seq *post_p)
4943 : {
4944 52101 : static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4945 52101 : tree f_gpr, f_fpr, f_ovf, f_sav;
4946 52101 : tree gpr, fpr, ovf, sav, t;
4947 52101 : int size, rsize;
4948 52101 : tree lab_false, lab_over = NULL_TREE;
4949 52101 : tree addr, t2;
4950 52101 : rtx container;
4951 52101 : int indirect_p = 0;
4952 52101 : tree ptrtype;
4953 52101 : machine_mode nat_mode;
4954 52101 : unsigned int arg_boundary;
4955 52101 : unsigned int type_align;
4956 :
4957 : /* Only 64bit target needs something special. */
4958 52101 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4959 260 : return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4960 :
4961 51841 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4962 51841 : f_fpr = DECL_CHAIN (f_gpr);
4963 51841 : f_ovf = DECL_CHAIN (f_fpr);
4964 51841 : f_sav = DECL_CHAIN (f_ovf);
4965 :
4966 51841 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4967 : valist, f_gpr, NULL_TREE);
4968 :
4969 51841 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4970 51841 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4971 51841 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4972 :
4973 51841 : indirect_p = pass_va_arg_by_reference (type);
4974 51841 : if (indirect_p)
4975 103 : type = build_pointer_type (type);
4976 51841 : size = arg_int_size_in_bytes (type);
4977 51841 : rsize = CEIL (size, UNITS_PER_WORD);
4978 :
4979 51841 : nat_mode = type_natural_mode (type, NULL, false);
4980 51841 : switch (nat_mode)
4981 : {
4982 28 : case E_V16HFmode:
4983 28 : case E_V16BFmode:
4984 28 : case E_V8SFmode:
4985 28 : case E_V8SImode:
4986 28 : case E_V32QImode:
4987 28 : case E_V16HImode:
4988 28 : case E_V4DFmode:
4989 28 : case E_V4DImode:
4990 28 : case E_V32HFmode:
4991 28 : case E_V32BFmode:
4992 28 : case E_V16SFmode:
4993 28 : case E_V16SImode:
4994 28 : case E_V64QImode:
4995 28 : case E_V32HImode:
4996 28 : case E_V8DFmode:
4997 28 : case E_V8DImode:
4998 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4999 28 : if (!TARGET_64BIT_MS_ABI)
5000 : {
5001 : container = NULL;
5002 : break;
5003 : }
5004 : /* FALLTHRU */
5005 :
5006 51813 : default:
5007 51813 : container = construct_container (nat_mode, TYPE_MODE (type),
5008 : type, 0, X86_64_REGPARM_MAX,
5009 : X86_64_SSE_REGPARM_MAX, intreg,
5010 : 0);
5011 51813 : break;
5012 : }
5013 :
5014 : /* Pull the value out of the saved registers. */
5015 :
5016 51841 : addr = create_tmp_var (ptr_type_node, "addr");
5017 51841 : type_align = TYPE_ALIGN (type);
5018 :
5019 51841 : if (container)
5020 : {
5021 28750 : int needed_intregs, needed_sseregs;
5022 28750 : bool need_temp;
5023 28750 : tree int_addr, sse_addr;
5024 :
5025 28750 : lab_false = create_artificial_label (UNKNOWN_LOCATION);
5026 28750 : lab_over = create_artificial_label (UNKNOWN_LOCATION);
5027 :
5028 28750 : examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5029 :
5030 28750 : bool container_in_reg = false;
5031 28750 : if (REG_P (container))
5032 : container_in_reg = true;
5033 1641 : else if (GET_CODE (container) == PARALLEL
5034 1641 : && GET_MODE (container) == BLKmode
5035 580 : && XVECLEN (container, 0) == 1)
5036 : {
5037 : /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
5038 : expression in a TImode register. In this case, temp isn't
5039 : needed. Otherwise, the TImode variable will be put in the
5040 : GPR save area which guarantees only 8-byte alignment. */
5041 509 : rtx x = XVECEXP (container, 0, 0);
5042 509 : if (GET_CODE (x) == EXPR_LIST
5043 509 : && REG_P (XEXP (x, 0))
5044 509 : && XEXP (x, 1) == const0_rtx)
5045 : container_in_reg = true;
5046 : }
5047 :
5048 680 : need_temp = (!container_in_reg
5049 1150 : && ((needed_intregs && TYPE_ALIGN (type) > 64)
5050 680 : || TYPE_ALIGN (type) > 128));
5051 :
5052 : /* In case we are passing structure, verify that it is consecutive block
5053 : on the register save area. If not we need to do moves. */
5054 680 : if (!need_temp && !container_in_reg)
5055 : {
5056 : /* Verify that all registers are strictly consecutive */
5057 966 : if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5058 : {
5059 : int i;
5060 :
5061 815 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5062 : {
5063 529 : rtx slot = XVECEXP (container, 0, i);
5064 529 : if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5065 529 : || INTVAL (XEXP (slot, 1)) != i * 16)
5066 : need_temp = true;
5067 : }
5068 : }
5069 : else
5070 : {
5071 : int i;
5072 :
5073 1120 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5074 : {
5075 726 : rtx slot = XVECEXP (container, 0, i);
5076 726 : if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5077 726 : || INTVAL (XEXP (slot, 1)) != i * 8)
5078 : need_temp = true;
5079 : }
5080 : }
5081 : }
5082 28750 : if (!need_temp)
5083 : {
5084 : int_addr = addr;
5085 : sse_addr = addr;
5086 : }
5087 : else
5088 : {
5089 877 : int_addr = create_tmp_var (ptr_type_node, "int_addr");
5090 877 : sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5091 : }
5092 :
5093 : /* First ensure that we fit completely in registers. */
5094 28750 : if (needed_intregs)
5095 : {
5096 18033 : t = build_int_cst (TREE_TYPE (gpr),
5097 18033 : (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5098 18033 : t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5099 18033 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5100 18033 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5101 18033 : gimplify_and_add (t, pre_p);
5102 : }
5103 28750 : if (needed_sseregs)
5104 : {
5105 11109 : t = build_int_cst (TREE_TYPE (fpr),
5106 : (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5107 11109 : + X86_64_REGPARM_MAX * 8);
5108 11109 : t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5109 11109 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5110 11109 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5111 11109 : gimplify_and_add (t, pre_p);
5112 : }
5113 :
5114 : /* Compute index to start of area used for integer regs. */
5115 28750 : if (needed_intregs)
5116 : {
5117 : /* int_addr = gpr + sav; */
5118 18033 : t = fold_build_pointer_plus (sav, gpr);
5119 18033 : gimplify_assign (int_addr, t, pre_p);
5120 : }
5121 28750 : if (needed_sseregs)
5122 : {
5123 : /* sse_addr = fpr + sav; */
5124 11109 : t = fold_build_pointer_plus (sav, fpr);
5125 11109 : gimplify_assign (sse_addr, t, pre_p);
5126 : }
5127 28750 : if (need_temp)
5128 : {
5129 877 : int i, prev_size = 0;
5130 877 : tree temp = create_tmp_var (type, "va_arg_tmp");
5131 877 : TREE_ADDRESSABLE (temp) = 1;
5132 :
5133 : /* addr = &temp; */
5134 877 : t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5135 877 : gimplify_assign (addr, t, pre_p);
5136 :
5137 2241 : for (i = 0; i < XVECLEN (container, 0); i++)
5138 : {
5139 1364 : rtx slot = XVECEXP (container, 0, i);
5140 1364 : rtx reg = XEXP (slot, 0);
5141 1364 : machine_mode mode = GET_MODE (reg);
5142 1364 : tree piece_type;
5143 1364 : tree addr_type;
5144 1364 : tree daddr_type;
5145 1364 : tree src_addr, src;
5146 1364 : int src_offset;
5147 1364 : tree dest_addr, dest;
5148 1364 : int cur_size = GET_MODE_SIZE (mode);
5149 :
5150 1364 : gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5151 1364 : prev_size = INTVAL (XEXP (slot, 1));
5152 1364 : if (prev_size + cur_size > size)
5153 : {
5154 30 : cur_size = size - prev_size;
5155 30 : unsigned int nbits = cur_size * BITS_PER_UNIT;
5156 30 : if (!int_mode_for_size (nbits, 1).exists (&mode))
5157 10 : mode = QImode;
5158 : }
5159 1364 : piece_type = lang_hooks.types.type_for_mode (mode, 1);
5160 1364 : if (mode == GET_MODE (reg))
5161 1334 : addr_type = build_pointer_type (piece_type);
5162 : else
5163 30 : addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5164 : true);
5165 1364 : daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5166 : true);
5167 :
5168 1364 : if (SSE_REGNO_P (REGNO (reg)))
5169 : {
5170 534 : src_addr = sse_addr;
5171 534 : src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5172 : }
5173 : else
5174 : {
5175 830 : src_addr = int_addr;
5176 830 : src_offset = REGNO (reg) * 8;
5177 : }
5178 1364 : src_addr = fold_convert (addr_type, src_addr);
5179 1364 : src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5180 :
5181 1364 : dest_addr = fold_convert (daddr_type, addr);
5182 1364 : dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5183 2728 : if (cur_size == GET_MODE_SIZE (mode))
5184 : {
5185 1354 : src = build_va_arg_indirect_ref (src_addr);
5186 1354 : dest = build_va_arg_indirect_ref (dest_addr);
5187 :
5188 1354 : gimplify_assign (dest, src, pre_p);
5189 : }
5190 : else
5191 : {
5192 10 : tree copy
5193 20 : = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5194 : 3, dest_addr, src_addr,
5195 10 : size_int (cur_size));
5196 10 : gimplify_and_add (copy, pre_p);
5197 : }
5198 1364 : prev_size += cur_size;
5199 : }
5200 : }
5201 :
5202 28750 : if (needed_intregs)
5203 : {
5204 18033 : t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5205 18033 : build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5206 18033 : gimplify_assign (gpr, t, pre_p);
5207 : /* The GPR save area guarantees only 8-byte alignment. */
5208 18033 : if (!need_temp)
5209 17229 : type_align = MIN (type_align, 64);
5210 : }
5211 :
5212 28750 : if (needed_sseregs)
5213 : {
5214 11109 : t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5215 11109 : build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5216 11109 : gimplify_assign (unshare_expr (fpr), t, pre_p);
5217 : }
5218 :
5219 28750 : gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5220 :
5221 28750 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5222 : }
5223 :
5224 : /* ... otherwise out of the overflow area. */
5225 :
5226 : /* When we align parameter on stack for caller, if the parameter
5227 : alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5228 : aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5229 : here with caller. */
5230 51841 : arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5231 51841 : if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5232 : arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5233 :
5234 : /* Care for on-stack alignment if needed. */
5235 51841 : if (arg_boundary <= 64 || size == 0)
5236 34804 : t = ovf;
5237 : else
5238 : {
5239 17037 : HOST_WIDE_INT align = arg_boundary / 8;
5240 17037 : t = fold_build_pointer_plus_hwi (ovf, align - 1);
5241 17037 : t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5242 17037 : build_int_cst (TREE_TYPE (t), -align));
5243 : }
5244 :
5245 51841 : gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5246 51841 : gimplify_assign (addr, t, pre_p);
5247 :
5248 51841 : t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5249 51841 : gimplify_assign (unshare_expr (ovf), t, pre_p);
5250 :
5251 51841 : if (container)
5252 28750 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5253 :
5254 51841 : type = build_aligned_type (type, type_align);
5255 51841 : ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5256 51841 : addr = fold_convert (ptrtype, addr);
5257 :
5258 51841 : if (indirect_p)
5259 103 : addr = build_va_arg_indirect_ref (addr);
5260 51841 : return build_va_arg_indirect_ref (addr);
5261 : }
5262 :
5263 : /* Return true if OPNUM's MEM should be matched
5264 : in movabs* patterns. */
5265 :
5266 : bool
5267 505 : ix86_check_movabs (rtx insn, int opnum)
5268 : {
5269 505 : rtx set, mem;
5270 :
5271 505 : set = PATTERN (insn);
5272 505 : if (GET_CODE (set) == PARALLEL)
5273 0 : set = XVECEXP (set, 0, 0);
5274 505 : gcc_assert (GET_CODE (set) == SET);
5275 505 : mem = XEXP (set, opnum);
5276 505 : while (SUBREG_P (mem))
5277 0 : mem = SUBREG_REG (mem);
5278 505 : gcc_assert (MEM_P (mem));
5279 505 : return volatile_ok || !MEM_VOLATILE_P (mem);
5280 : }
5281 :
5282 : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5283 : bool
5284 221047 : ix86_check_movs (rtx insn, int idx)
5285 : {
5286 221047 : rtx pat = PATTERN (insn);
5287 221047 : gcc_assert (GET_CODE (pat) == PARALLEL);
5288 :
5289 221047 : rtx set = XVECEXP (pat, 0, idx);
5290 221047 : gcc_assert (GET_CODE (set) == SET);
5291 :
5292 221047 : rtx dst = SET_DEST (set);
5293 221047 : gcc_assert (MEM_P (dst));
5294 :
5295 221047 : rtx src = SET_SRC (set);
5296 221047 : gcc_assert (MEM_P (src));
5297 :
5298 221047 : return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5299 442094 : && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5300 0 : || Pmode == word_mode));
5301 : }
5302 :
5303 : /* Return false if INSN contains a MEM with a non-default address space. */
5304 : bool
5305 65277 : ix86_check_no_addr_space (rtx insn)
5306 : {
5307 65277 : subrtx_var_iterator::array_type array;
5308 1436550 : FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5309 : {
5310 1371273 : rtx x = *iter;
5311 1501827 : if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5312 0 : return false;
5313 : }
5314 65277 : return true;
5315 65277 : }
5316 :
5317 : /* Initialize the table of extra 80387 mathematical constants. */
5318 :
5319 : static void
5320 2346 : init_ext_80387_constants (void)
5321 : {
5322 2346 : static const char * cst[5] =
5323 : {
5324 : "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5325 : "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5326 : "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5327 : "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5328 : "3.1415926535897932385128089594061862044", /* 4: fldpi */
5329 : };
5330 2346 : int i;
5331 :
5332 14076 : for (i = 0; i < 5; i++)
5333 : {
5334 11730 : real_from_string (&ext_80387_constants_table[i], cst[i]);
5335 : /* Ensure each constant is rounded to XFmode precision. */
5336 11730 : real_convert (&ext_80387_constants_table[i],
5337 23460 : XFmode, &ext_80387_constants_table[i]);
5338 : }
5339 :
5340 2346 : ext_80387_constants_init = 1;
5341 2346 : }
5342 :
5343 : /* Return non-zero if the constant is something that
5344 : can be loaded with a special instruction. */
5345 :
5346 : int
5347 5062375 : standard_80387_constant_p (rtx x)
5348 : {
5349 5062375 : machine_mode mode = GET_MODE (x);
5350 :
5351 5062375 : const REAL_VALUE_TYPE *r;
5352 :
5353 5062375 : if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5354 : return -1;
5355 :
5356 4602731 : if (x == CONST0_RTX (mode))
5357 : return 1;
5358 2114661 : if (x == CONST1_RTX (mode))
5359 : return 2;
5360 :
5361 1227721 : r = CONST_DOUBLE_REAL_VALUE (x);
5362 :
5363 : /* For XFmode constants, try to find a special 80387 instruction when
5364 : optimizing for size or on those CPUs that benefit from them. */
5365 1227721 : if (mode == XFmode
5366 790763 : && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5367 2018484 : && !flag_rounding_math)
5368 : {
5369 782501 : int i;
5370 :
5371 782501 : if (! ext_80387_constants_init)
5372 2339 : init_ext_80387_constants ();
5373 :
5374 4684494 : for (i = 0; i < 5; i++)
5375 3910824 : if (real_identical (r, &ext_80387_constants_table[i]))
5376 8831 : return i + 3;
5377 : }
5378 :
5379 : /* Load of the constant -0.0 or -1.0 will be split as
5380 : fldz;fchs or fld1;fchs sequence. */
5381 1218890 : if (real_isnegzero (r))
5382 : return 8;
5383 1202513 : if (real_identical (r, &dconstm1))
5384 301764 : return 9;
5385 :
5386 : return 0;
5387 : }
5388 :
5389 : /* Return the opcode of the special instruction to be used to load
5390 : the constant X. */
5391 :
5392 : const char *
5393 54508 : standard_80387_constant_opcode (rtx x)
5394 : {
5395 54508 : switch (standard_80387_constant_p (x))
5396 : {
5397 : case 1:
5398 : return "fldz";
5399 33957 : case 2:
5400 33957 : return "fld1";
5401 1 : case 3:
5402 1 : return "fldlg2";
5403 10 : case 4:
5404 10 : return "fldln2";
5405 12 : case 5:
5406 12 : return "fldl2e";
5407 2 : case 6:
5408 2 : return "fldl2t";
5409 192 : case 7:
5410 192 : return "fldpi";
5411 0 : case 8:
5412 0 : case 9:
5413 0 : return "#";
5414 0 : default:
5415 0 : gcc_unreachable ();
5416 : }
5417 : }
5418 :
5419 : /* Return the CONST_DOUBLE representing the 80387 constant that is
5420 : loaded by the specified special instruction. The argument IDX
5421 : matches the return value from standard_80387_constant_p. */
5422 :
5423 : rtx
5424 24 : standard_80387_constant_rtx (int idx)
5425 : {
5426 24 : int i;
5427 :
5428 24 : if (! ext_80387_constants_init)
5429 7 : init_ext_80387_constants ();
5430 :
5431 24 : switch (idx)
5432 : {
5433 24 : case 3:
5434 24 : case 4:
5435 24 : case 5:
5436 24 : case 6:
5437 24 : case 7:
5438 24 : i = idx - 3;
5439 24 : break;
5440 :
5441 0 : default:
5442 0 : gcc_unreachable ();
5443 : }
5444 :
5445 24 : return const_double_from_real_value (ext_80387_constants_table[i],
5446 24 : XFmode);
5447 : }
5448 :
5449 : /* Return 1 if X is all bits 0, 2 if X is all bits 1
5450 : and 3 if X is all bits 1 with zero extend
5451 : in supported SSE/AVX vector mode. */
5452 :
5453 : int
5454 55010084 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
5455 : {
5456 55010084 : machine_mode mode;
5457 :
5458 55010084 : if (!TARGET_SSE)
5459 : return 0;
5460 :
5461 54841345 : mode = GET_MODE (x);
5462 :
5463 54841345 : if (x == const0_rtx || const0_operand (x, mode))
5464 13135126 : return 1;
5465 :
5466 41706219 : if (x == constm1_rtx
5467 41569209 : || vector_all_ones_operand (x, mode)
5468 82746568 : || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5469 34388841 : || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5470 6652458 : && float_vector_all_ones_operand (x, mode)))
5471 : {
5472 : /* VOIDmode integer constant, get mode from the predicate. */
5473 667973 : if (mode == VOIDmode)
5474 137010 : mode = pred_mode;
5475 :
5476 1335946 : switch (GET_MODE_SIZE (mode))
5477 : {
5478 31179 : case 64:
5479 31179 : if (TARGET_AVX512F)
5480 : return 2;
5481 : break;
5482 38905 : case 32:
5483 38905 : if (TARGET_AVX2)
5484 : return 2;
5485 : break;
5486 585694 : case 16:
5487 585694 : if (TARGET_SSE2)
5488 : return 2;
5489 : break;
5490 0 : case 0:
5491 : /* VOIDmode */
5492 0 : gcc_unreachable ();
5493 : default:
5494 : break;
5495 : }
5496 : }
5497 :
5498 41051371 : if (vector_all_ones_zero_extend_half_operand (x, mode)
5499 41051371 : || vector_all_ones_zero_extend_quarter_operand (x, mode))
5500 706 : return 3;
5501 :
5502 : return 0;
5503 : }
5504 :
5505 : /* Return the opcode of the special instruction to be used to load
5506 : the constant operands[1] into operands[0]. */
5507 :
5508 : const char *
5509 462498 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5510 : {
5511 462498 : machine_mode mode;
5512 462498 : rtx x = operands[1];
5513 :
5514 462498 : gcc_assert (TARGET_SSE);
5515 :
5516 462498 : mode = GET_MODE (x);
5517 :
5518 462498 : if (x == const0_rtx || const0_operand (x, mode))
5519 : {
5520 451121 : switch (get_attr_mode (insn))
5521 : {
5522 433430 : case MODE_TI:
5523 433430 : if (!EXT_REX_SSE_REG_P (operands[0]))
5524 : return "%vpxor\t%0, %d0";
5525 : /* FALLTHRU */
5526 6239 : case MODE_XI:
5527 6239 : case MODE_OI:
5528 6239 : if (EXT_REX_SSE_REG_P (operands[0]))
5529 : {
5530 71 : if (TARGET_AVX512VL)
5531 : return "vpxord\t%x0, %x0, %x0";
5532 : else
5533 29 : return "vpxord\t%g0, %g0, %g0";
5534 : }
5535 : return "vpxor\t%x0, %x0, %x0";
5536 :
5537 2073 : case MODE_V2DF:
5538 2073 : if (!EXT_REX_SSE_REG_P (operands[0]))
5539 : return "%vxorpd\t%0, %d0";
5540 : /* FALLTHRU */
5541 853 : case MODE_V8DF:
5542 853 : case MODE_V4DF:
5543 853 : if (EXT_REX_SSE_REG_P (operands[0]))
5544 : {
5545 4 : if (TARGET_AVX512DQ)
5546 : {
5547 0 : if (TARGET_AVX512VL)
5548 : return "vxorpd\t%x0, %x0, %x0";
5549 : else
5550 0 : return "vxorpd\t%g0, %g0, %g0";
5551 : }
5552 : else
5553 : {
5554 4 : if (TARGET_AVX512VL)
5555 : return "vpxorq\t%x0, %x0, %x0";
5556 : else
5557 4 : return "vpxorq\t%g0, %g0, %g0";
5558 : }
5559 : }
5560 : return "vxorpd\t%x0, %x0, %x0";
5561 :
5562 6569 : case MODE_V4SF:
5563 6569 : if (!EXT_REX_SSE_REG_P (operands[0]))
5564 : return "%vxorps\t%0, %d0";
5565 : /* FALLTHRU */
5566 2023 : case MODE_V16SF:
5567 2023 : case MODE_V8SF:
5568 2023 : if (EXT_REX_SSE_REG_P (operands[0]))
5569 : {
5570 66 : if (TARGET_AVX512DQ)
5571 : {
5572 26 : if (TARGET_AVX512VL)
5573 : return "vxorps\t%x0, %x0, %x0";
5574 : else
5575 0 : return "vxorps\t%g0, %g0, %g0";
5576 : }
5577 : else
5578 : {
5579 40 : if (TARGET_AVX512VL)
5580 : return "vpxord\t%x0, %x0, %x0";
5581 : else
5582 38 : return "vpxord\t%g0, %g0, %g0";
5583 : }
5584 : }
5585 : return "vxorps\t%x0, %x0, %x0";
5586 :
5587 0 : default:
5588 0 : gcc_unreachable ();
5589 : }
5590 : }
5591 11377 : else if (x == constm1_rtx
5592 11366 : || vector_all_ones_operand (x, mode)
5593 11444 : || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5594 45 : && float_vector_all_ones_operand (x, mode)))
5595 : {
5596 11355 : enum attr_mode insn_mode = get_attr_mode (insn);
5597 :
5598 11355 : switch (insn_mode)
5599 : {
5600 4 : case MODE_XI:
5601 4 : case MODE_V8DF:
5602 4 : case MODE_V16SF:
5603 4 : gcc_assert (TARGET_AVX512F);
5604 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5605 :
5606 947 : case MODE_OI:
5607 947 : case MODE_V4DF:
5608 947 : case MODE_V8SF:
5609 947 : gcc_assert (TARGET_AVX2);
5610 : /* FALLTHRU */
5611 11351 : case MODE_TI:
5612 11351 : case MODE_V2DF:
5613 11351 : case MODE_V4SF:
5614 11351 : gcc_assert (TARGET_SSE2);
5615 11351 : if (EXT_REX_SSE_REG_P (operands[0]))
5616 : {
5617 2 : if (TARGET_AVX512VL)
5618 : return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5619 : else
5620 0 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5621 : }
5622 11349 : return (TARGET_AVX
5623 11349 : ? "vpcmpeqd\t%0, %0, %0"
5624 11349 : : "pcmpeqd\t%0, %0");
5625 :
5626 0 : default:
5627 0 : gcc_unreachable ();
5628 : }
5629 : }
5630 22 : else if (vector_all_ones_zero_extend_half_operand (x, mode))
5631 : {
5632 40 : if (GET_MODE_SIZE (mode) == 64)
5633 : {
5634 5 : gcc_assert (TARGET_AVX512F);
5635 : return "vpcmpeqd\t%t0, %t0, %t0";
5636 : }
5637 30 : else if (GET_MODE_SIZE (mode) == 32)
5638 : {
5639 15 : gcc_assert (TARGET_AVX);
5640 : return "vpcmpeqd\t%x0, %x0, %x0";
5641 : }
5642 0 : gcc_unreachable ();
5643 : }
5644 2 : else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5645 : {
5646 2 : gcc_assert (TARGET_AVX512F);
5647 : return "vpcmpeqd\t%x0, %x0, %x0";
5648 : }
5649 :
5650 0 : gcc_unreachable ();
5651 : }
5652 :
5653 : /* Returns true if INSN can be transformed from a memory load
5654 : to a supported FP constant load. */
5655 :
5656 : bool
5657 2135347 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5658 : {
5659 2135347 : rtx src = find_constant_src (insn);
5660 :
5661 2135347 : gcc_assert (REG_P (dst));
5662 :
5663 2135347 : if (src == NULL
5664 593057 : || (SSE_REGNO_P (REGNO (dst))
5665 461573 : && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5666 165170 : || (!TARGET_AVX512VL
5667 165109 : && EXT_REX_SSE_REGNO_P (REGNO (dst))
5668 0 : && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5669 2300517 : || (STACK_REGNO_P (REGNO (dst))
5670 131484 : && standard_80387_constant_p (src) < 1))
5671 2059112 : return false;
5672 :
5673 : return true;
5674 : }
5675 :
5676 : /* Predicate for pre-reload splitters with associated instructions,
5677 : which can match any time before the split1 pass (usually combine),
5678 : then are unconditionally split in that pass and should not be
5679 : matched again afterwards. */
5680 :
5681 : bool
5682 17724661 : ix86_pre_reload_split (void)
5683 : {
5684 17724661 : return (can_create_pseudo_p ()
5685 26884165 : && !(cfun->curr_properties & PROP_rtl_split_insns));
5686 : }
5687 :
5688 : /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5689 : or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5690 : TARGET_AVX512VL or it is a register to register move which can
5691 : be done with zmm register move. */
5692 :
5693 : static const char *
5694 4210827 : ix86_get_ssemov (rtx *operands, unsigned size,
5695 : enum attr_mode insn_mode, machine_mode mode)
5696 : {
5697 4210827 : char buf[128];
5698 4210827 : bool misaligned_p = (misaligned_operand (operands[0], mode)
5699 4210827 : || misaligned_operand (operands[1], mode));
5700 4210827 : bool evex_reg_p = (size == 64
5701 4124077 : || EXT_REX_SSE_REG_P (operands[0])
5702 8334168 : || EXT_REX_SSE_REG_P (operands[1]));
5703 :
5704 4210827 : bool egpr_p = (TARGET_APX_EGPR
5705 4210827 : && (x86_extended_rex2reg_mentioned_p (operands[0])
5706 184 : || x86_extended_rex2reg_mentioned_p (operands[1])));
5707 196 : bool egpr_vl = egpr_p && TARGET_AVX512VL;
5708 :
5709 4210827 : machine_mode scalar_mode;
5710 :
5711 4210827 : const char *opcode = NULL;
5712 4210827 : enum
5713 : {
5714 : opcode_int,
5715 : opcode_float,
5716 : opcode_double
5717 4210827 : } type = opcode_int;
5718 :
5719 4210827 : switch (insn_mode)
5720 : {
5721 : case MODE_V16SF:
5722 : case MODE_V8SF:
5723 : case MODE_V4SF:
5724 : scalar_mode = E_SFmode;
5725 : type = opcode_float;
5726 : break;
5727 209481 : case MODE_V8DF:
5728 209481 : case MODE_V4DF:
5729 209481 : case MODE_V2DF:
5730 209481 : scalar_mode = E_DFmode;
5731 209481 : type = opcode_double;
5732 209481 : break;
5733 1527005 : case MODE_XI:
5734 1527005 : case MODE_OI:
5735 1527005 : case MODE_TI:
5736 1527005 : scalar_mode = GET_MODE_INNER (mode);
5737 : break;
5738 0 : default:
5739 0 : gcc_unreachable ();
5740 : }
5741 :
5742 : /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5743 : we can only use zmm register move without memory operand. */
5744 4210827 : if (evex_reg_p
5745 88787 : && !TARGET_AVX512VL
5746 4261302 : && GET_MODE_SIZE (mode) < 64)
5747 : {
5748 : /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5749 : xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5750 : AVX512VL is disabled, LRA can still generate reg to
5751 : reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5752 : modes. */
5753 0 : if (memory_operand (operands[0], mode)
5754 0 : || memory_operand (operands[1], mode))
5755 0 : gcc_unreachable ();
5756 0 : size = 64;
5757 0 : switch (type)
5758 : {
5759 0 : case opcode_int:
5760 0 : if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5761 0 : opcode = (misaligned_p
5762 0 : ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5763 : : "vmovdqa64");
5764 : else
5765 0 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5766 : break;
5767 0 : case opcode_float:
5768 0 : opcode = misaligned_p ? "vmovups" : "vmovaps";
5769 : break;
5770 0 : case opcode_double:
5771 0 : opcode = misaligned_p ? "vmovupd" : "vmovapd";
5772 : break;
5773 : }
5774 : }
5775 4210827 : else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5776 : {
5777 2861110 : switch (scalar_mode)
5778 : {
5779 36736 : case E_HFmode:
5780 36736 : case E_BFmode:
5781 36736 : if (evex_reg_p || egpr_vl)
5782 11597 : opcode = (misaligned_p
5783 173 : ? (TARGET_AVX512BW
5784 : ? "vmovdqu16"
5785 : : "vmovdqu64")
5786 : : "vmovdqa64");
5787 25139 : else if (egpr_p)
5788 824418 : opcode = (misaligned_p
5789 0 : ? (TARGET_AVX512BW
5790 0 : ? "vmovdqu16"
5791 : : "%vmovups")
5792 : : "%vmovaps");
5793 : else
5794 434341 : opcode = (misaligned_p
5795 25139 : ? (TARGET_AVX512BW && evex_reg_p
5796 : ? "vmovdqu16"
5797 : : "%vmovdqu")
5798 : : "%vmovdqa");
5799 : break;
5800 2474341 : case E_SFmode:
5801 2474341 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5802 : break;
5803 209481 : case E_DFmode:
5804 209481 : opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5805 : break;
5806 140552 : case E_TFmode:
5807 140552 : if (evex_reg_p || egpr_vl)
5808 14 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5809 140538 : else if (egpr_p)
5810 0 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5811 : else
5812 140538 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5813 : break;
5814 0 : default:
5815 0 : gcc_unreachable ();
5816 : }
5817 : }
5818 1349717 : else if (SCALAR_INT_MODE_P (scalar_mode))
5819 : {
5820 1349717 : switch (scalar_mode)
5821 : {
5822 111293 : case E_QImode:
5823 111293 : if (evex_reg_p || egpr_vl)
5824 4220730 : opcode = (misaligned_p
5825 9903 : ? (TARGET_AVX512BW
5826 5074 : ? "vmovdqu8"
5827 : : "vmovdqu64")
5828 : : "vmovdqa64");
5829 101390 : else if (egpr_p)
5830 30 : opcode = (misaligned_p
5831 0 : ? (TARGET_AVX512BW
5832 : ? "vmovdqu8"
5833 : : "%vmovups")
5834 : : "%vmovaps");
5835 : else
5836 101360 : opcode = (misaligned_p
5837 : ? (TARGET_AVX512BW && evex_reg_p
5838 : ? "vmovdqu8"
5839 : : "%vmovdqu")
5840 : : "%vmovdqa");
5841 : break;
5842 42979 : case E_HImode:
5843 42979 : if (evex_reg_p || egpr_vl)
5844 3770 : opcode = (misaligned_p
5845 294 : ? (TARGET_AVX512BW
5846 : ? "vmovdqu16"
5847 : : "vmovdqu64")
5848 : : "vmovdqa64");
5849 39209 : else if (egpr_p)
5850 824418 : opcode = (misaligned_p
5851 27 : ? (TARGET_AVX512BW
5852 0 : ? "vmovdqu16"
5853 : : "%vmovups")
5854 : : "%vmovaps");
5855 : else
5856 409202 : opcode = (misaligned_p
5857 39182 : ? (TARGET_AVX512BW && evex_reg_p
5858 : ? "vmovdqu16"
5859 : : "%vmovdqu")
5860 : : "%vmovdqa");
5861 : break;
5862 183581 : case E_SImode:
5863 183581 : if (evex_reg_p || egpr_vl)
5864 8291 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5865 175290 : else if (egpr_p)
5866 14 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5867 : else
5868 175276 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5869 : break;
5870 1000056 : case E_DImode:
5871 1000056 : case E_TImode:
5872 1000056 : case E_OImode:
5873 1000056 : if (evex_reg_p || egpr_vl)
5874 18817 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5875 981239 : else if (egpr_p)
5876 26 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5877 : else
5878 981213 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5879 : break;
5880 11808 : case E_XImode:
5881 49556 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5882 : break;
5883 0 : default:
5884 0 : gcc_unreachable ();
5885 : }
5886 : }
5887 : else
5888 0 : gcc_unreachable ();
5889 :
5890 4210827 : switch (size)
5891 : {
5892 86750 : case 64:
5893 86750 : snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5894 : opcode);
5895 86750 : break;
5896 94153 : case 32:
5897 94153 : snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5898 : opcode);
5899 94153 : break;
5900 4029924 : case 16:
5901 4029924 : snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5902 : opcode);
5903 4029924 : break;
5904 0 : default:
5905 0 : gcc_unreachable ();
5906 : }
5907 4210827 : output_asm_insn (buf, operands);
5908 4210827 : return "";
5909 : }
5910 :
5911 : /* Return the template of the TYPE_SSEMOV instruction to move
5912 : operands[1] into operands[0]. */
5913 :
5914 : const char *
5915 6575852 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5916 : {
5917 6575852 : machine_mode mode = GET_MODE (operands[0]);
5918 6575852 : if (get_attr_type (insn) != TYPE_SSEMOV
5919 6575852 : || mode != GET_MODE (operands[1]))
5920 0 : gcc_unreachable ();
5921 :
5922 6575852 : enum attr_mode insn_mode = get_attr_mode (insn);
5923 :
5924 6575852 : switch (insn_mode)
5925 : {
5926 86750 : case MODE_XI:
5927 86750 : case MODE_V8DF:
5928 86750 : case MODE_V16SF:
5929 86750 : return ix86_get_ssemov (operands, 64, insn_mode, mode);
5930 :
5931 94153 : case MODE_OI:
5932 94153 : case MODE_V4DF:
5933 94153 : case MODE_V8SF:
5934 94153 : return ix86_get_ssemov (operands, 32, insn_mode, mode);
5935 :
5936 4029924 : case MODE_TI:
5937 4029924 : case MODE_V2DF:
5938 4029924 : case MODE_V4SF:
5939 4029924 : return ix86_get_ssemov (operands, 16, insn_mode, mode);
5940 :
5941 656631 : case MODE_DI:
5942 : /* Handle broken assemblers that require movd instead of movq. */
5943 656631 : if (GENERAL_REG_P (operands[0]))
5944 : {
5945 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5946 : return "%vmovq\t{%1, %q0|%q0, %1}";
5947 : else
5948 : return "%vmovd\t{%1, %q0|%q0, %1}";
5949 : }
5950 581162 : else if (GENERAL_REG_P (operands[1]))
5951 : {
5952 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5953 : return "%vmovq\t{%q1, %0|%0, %q1}";
5954 : else
5955 : return "%vmovd\t{%q1, %0|%0, %q1}";
5956 : }
5957 : else
5958 421828 : return "%vmovq\t{%1, %0|%0, %1}";
5959 :
5960 201129 : case MODE_SI:
5961 201129 : if (GENERAL_REG_P (operands[0]))
5962 : return "%vmovd\t{%1, %k0|%k0, %1}";
5963 145371 : else if (GENERAL_REG_P (operands[1]))
5964 : return "%vmovd\t{%k1, %0|%0, %k1}";
5965 : else
5966 60852 : return "%vmovd\t{%1, %0|%0, %1}";
5967 :
5968 54154 : case MODE_HI:
5969 54154 : if (GENERAL_REG_P (operands[0]))
5970 : return "vmovw\t{%1, %k0|%k0, %1}";
5971 53991 : else if (GENERAL_REG_P (operands[1]))
5972 : return "vmovw\t{%k1, %0|%0, %k1}";
5973 : else
5974 53757 : return "vmovw\t{%1, %0|%0, %1}";
5975 :
5976 778123 : case MODE_DF:
5977 778123 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5978 : return "vmovsd\t{%d1, %0|%0, %d1}";
5979 : else
5980 777362 : return "%vmovsd\t{%1, %0|%0, %1}";
5981 :
5982 671002 : case MODE_SF:
5983 671002 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5984 : return "vmovss\t{%d1, %0|%0, %d1}";
5985 : else
5986 670426 : return "%vmovss\t{%1, %0|%0, %1}";
5987 :
5988 96 : case MODE_HF:
5989 96 : case MODE_BF:
5990 96 : if (REG_P (operands[0]) && REG_P (operands[1]))
5991 : return "vmovsh\t{%d1, %0|%0, %d1}";
5992 : else
5993 0 : return "vmovsh\t{%1, %0|%0, %1}";
5994 :
5995 36 : case MODE_V1DF:
5996 36 : gcc_assert (!TARGET_AVX);
5997 : return "movlpd\t{%1, %0|%0, %1}";
5998 :
5999 3854 : case MODE_V2SF:
6000 3854 : if (TARGET_AVX && REG_P (operands[0]))
6001 : return "vmovlps\t{%1, %d0|%d0, %1}";
6002 : else
6003 3774 : return "%vmovlps\t{%1, %0|%0, %1}";
6004 :
6005 0 : default:
6006 0 : gcc_unreachable ();
6007 : }
6008 : }
6009 :
6010 : /* Returns true if OP contains a symbol reference */
6011 :
6012 : bool
6013 583616846 : symbolic_reference_mentioned_p (const_rtx op)
6014 : {
6015 583616846 : const char *fmt;
6016 583616846 : int i;
6017 :
6018 583616846 : if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
6019 : return true;
6020 :
6021 441152390 : fmt = GET_RTX_FORMAT (GET_CODE (op));
6022 748506230 : for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6023 : {
6024 597207550 : if (fmt[i] == 'E')
6025 : {
6026 2021445 : int j;
6027 :
6028 4045183 : for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6029 3329181 : if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6030 : return true;
6031 : }
6032 :
6033 595186105 : else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6034 : return true;
6035 : }
6036 :
6037 : return false;
6038 : }
6039 :
6040 : /* Return true if it is appropriate to emit `ret' instructions in the
6041 : body of a function. Do this only if the epilogue is simple, needing a
6042 : couple of insns. Prior to reloading, we can't tell how many registers
6043 : must be saved, so return false then. Return false if there is no frame
6044 : marker to de-allocate. */
6045 :
6046 : bool
6047 0 : ix86_can_use_return_insn_p (void)
6048 : {
6049 0 : if (ix86_function_ms_hook_prologue (current_function_decl))
6050 : return false;
6051 :
6052 0 : if (ix86_function_naked (current_function_decl))
6053 : return false;
6054 :
6055 : /* Don't use `ret' instruction in interrupt handler. */
6056 0 : if (! reload_completed
6057 0 : || frame_pointer_needed
6058 0 : || cfun->machine->func_type != TYPE_NORMAL)
6059 : return 0;
6060 :
6061 : /* Don't allow more than 32k pop, since that's all we can do
6062 : with one instruction. */
6063 0 : if (crtl->args.pops_args && crtl->args.size >= 32768)
6064 : return 0;
6065 :
6066 0 : struct ix86_frame &frame = cfun->machine->frame;
6067 0 : return (frame.stack_pointer_offset == UNITS_PER_WORD
6068 0 : && (frame.nregs + frame.nsseregs) == 0);
6069 : }
6070 :
6071 : /* Return stack frame size. get_frame_size () returns used stack slots
6072 : during compilation, which may be optimized out later. If stack frame
6073 : is needed, stack_frame_required should be true. */
6074 :
6075 : static HOST_WIDE_INT
6076 8235049 : ix86_get_frame_size (void)
6077 : {
6078 8235049 : if (cfun->machine->stack_frame_required)
6079 8165712 : return get_frame_size ();
6080 : else
6081 : return 0;
6082 : }
6083 :
6084 : /* Value should be nonzero if functions must have frame pointers.
6085 : Zero means the frame pointer need not be set up (and parms may
6086 : be accessed via the stack pointer) in functions that seem suitable. */
6087 :
6088 : static bool
6089 1226016 : ix86_frame_pointer_required (void)
6090 : {
6091 : /* If we accessed previous frames, then the generated code expects
6092 : to be able to access the saved ebp value in our frame. */
6093 1226016 : if (cfun->machine->accesses_prev_frame)
6094 : return true;
6095 :
6096 : /* Several x86 os'es need a frame pointer for other reasons,
6097 : usually pertaining to setjmp. */
6098 1225983 : if (SUBTARGET_FRAME_POINTER_REQUIRED)
6099 : return true;
6100 :
6101 : /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6102 1225983 : if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6103 : return true;
6104 :
6105 : /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6106 : allocation is 4GB. */
6107 1225983 : if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6108 : return true;
6109 :
6110 : /* SSE saves require frame-pointer when stack is misaligned. */
6111 1225983 : if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6112 : return true;
6113 :
6114 : /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6115 : turns off the frame pointer by default. Turn it back on now if
6116 : we've not got a leaf function. */
6117 1225982 : if (TARGET_OMIT_LEAF_FRAME_POINTER
6118 1225982 : && (!crtl->is_leaf
6119 0 : || ix86_current_function_calls_tls_descriptor))
6120 0 : return true;
6121 :
6122 : /* Several versions of mcount for the x86 assumes that there is a
6123 : frame, so we cannot allow profiling without a frame pointer. */
6124 1225982 : if (crtl->profile && !flag_fentry)
6125 : return true;
6126 :
6127 : return false;
6128 : }
6129 :
6130 : /* Record that the current function accesses previous call frames. */
6131 :
6132 : void
6133 966 : ix86_setup_frame_addresses (void)
6134 : {
6135 966 : cfun->machine->accesses_prev_frame = 1;
6136 966 : }
6137 :
6138 : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6139 : # define USE_HIDDEN_LINKONCE 1
6140 : #else
6141 : # define USE_HIDDEN_LINKONCE 0
6142 : #endif
6143 :
6144 : /* Label count for call and return thunks. It is used to make unique
6145 : labels in call and return thunks. */
6146 : static int indirectlabelno;
6147 :
6148 : /* True if call thunk function is needed. */
6149 : static bool indirect_thunk_needed = false;
6150 :
6151 : /* Bit masks of integer registers, which contain branch target, used
6152 : by call thunk functions. */
6153 : static HARD_REG_SET indirect_thunks_used;
6154 :
6155 : /* True if return thunk function is needed. */
6156 : static bool indirect_return_needed = false;
6157 :
6158 : /* True if return thunk function via CX is needed. */
6159 : static bool indirect_return_via_cx;
6160 :
6161 : #ifndef INDIRECT_LABEL
6162 : # define INDIRECT_LABEL "LIND"
6163 : #endif
6164 :
6165 : /* Indicate what prefix is needed for an indirect branch. */
6166 : enum indirect_thunk_prefix
6167 : {
6168 : indirect_thunk_prefix_none,
6169 : indirect_thunk_prefix_nt
6170 : };
6171 :
6172 : /* Return the prefix needed for an indirect branch INSN. */
6173 :
6174 : enum indirect_thunk_prefix
6175 68 : indirect_thunk_need_prefix (rtx_insn *insn)
6176 : {
6177 68 : enum indirect_thunk_prefix need_prefix;
6178 68 : if ((cfun->machine->indirect_branch_type
6179 68 : == indirect_branch_thunk_extern)
6180 68 : && ix86_notrack_prefixed_insn_p (insn))
6181 : {
6182 : /* NOTRACK prefix is only used with external thunk so that it
6183 : can be properly updated to support CET at run-time. */
6184 : need_prefix = indirect_thunk_prefix_nt;
6185 : }
6186 : else
6187 : need_prefix = indirect_thunk_prefix_none;
6188 68 : return need_prefix;
6189 : }
6190 :
6191 : /* Fills in the label name that should be used for the indirect thunk. */
6192 :
6193 : static void
6194 74 : indirect_thunk_name (char name[32], unsigned int regno,
6195 : enum indirect_thunk_prefix need_prefix,
6196 : bool ret_p)
6197 : {
6198 74 : if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6199 0 : gcc_unreachable ();
6200 :
6201 74 : if (USE_HIDDEN_LINKONCE)
6202 : {
6203 74 : const char *prefix;
6204 :
6205 74 : if (need_prefix == indirect_thunk_prefix_nt
6206 74 : && regno != INVALID_REGNUM)
6207 : {
6208 : /* NOTRACK prefix is only used with external thunk via
6209 : register so that NOTRACK prefix can be added to indirect
6210 : branch via register to support CET at run-time. */
6211 : prefix = "_nt";
6212 : }
6213 : else
6214 72 : prefix = "";
6215 :
6216 74 : const char *ret = ret_p ? "return" : "indirect";
6217 :
6218 74 : if (regno != INVALID_REGNUM)
6219 : {
6220 55 : const char *reg_prefix;
6221 55 : if (LEGACY_INT_REGNO_P (regno))
6222 53 : reg_prefix = TARGET_64BIT ? "r" : "e";
6223 : else
6224 : reg_prefix = "";
6225 55 : sprintf (name, "__x86_%s_thunk%s_%s%s",
6226 : ret, prefix, reg_prefix, reg_names[regno]);
6227 : }
6228 : else
6229 19 : sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6230 : }
6231 : else
6232 : {
6233 : if (regno != INVALID_REGNUM)
6234 : ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6235 : else
6236 : {
6237 : if (ret_p)
6238 : ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6239 : else
6240 74 : ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6241 : }
6242 : }
6243 74 : }
6244 :
6245 : /* Output a call and return thunk for indirect branch. If REGNO != -1,
6246 : the function address is in REGNO and the call and return thunk looks like:
6247 :
6248 : call L2
6249 : L1:
6250 : pause
6251 : lfence
6252 : jmp L1
6253 : L2:
6254 : mov %REG, (%sp)
6255 : ret
6256 :
6257 : Otherwise, the function address is on the top of stack and the
6258 : call and return thunk looks like:
6259 :
6260 : call L2
6261 : L1:
6262 : pause
6263 : lfence
6264 : jmp L1
6265 : L2:
6266 : lea WORD_SIZE(%sp), %sp
6267 : ret
6268 : */
6269 :
6270 : static void
6271 38 : output_indirect_thunk (unsigned int regno)
6272 : {
6273 38 : char indirectlabel1[32];
6274 38 : char indirectlabel2[32];
6275 :
6276 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6277 : indirectlabelno++);
6278 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6279 : indirectlabelno++);
6280 :
6281 : /* Call */
6282 38 : fputs ("\tcall\t", asm_out_file);
6283 38 : assemble_name_raw (asm_out_file, indirectlabel2);
6284 38 : fputc ('\n', asm_out_file);
6285 :
6286 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6287 :
6288 : /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6289 : Usage of both pause + lfence is compromise solution. */
6290 38 : fprintf (asm_out_file, "\tpause\n\tlfence\n");
6291 :
6292 : /* Jump. */
6293 38 : fputs ("\tjmp\t", asm_out_file);
6294 38 : assemble_name_raw (asm_out_file, indirectlabel1);
6295 38 : fputc ('\n', asm_out_file);
6296 :
6297 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6298 :
6299 : /* The above call insn pushed a word to stack. Adjust CFI info. */
6300 38 : if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6301 : {
6302 38 : if (! dwarf2out_do_cfi_asm ())
6303 : {
6304 0 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6305 0 : xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6306 0 : xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6307 0 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6308 : }
6309 38 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6310 38 : xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6311 38 : xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6312 38 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6313 38 : dwarf2out_emit_cfi (xcfi);
6314 : }
6315 :
6316 38 : if (regno != INVALID_REGNUM)
6317 : {
6318 : /* MOV. */
6319 27 : rtx xops[2];
6320 27 : xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6321 27 : xops[1] = gen_rtx_REG (word_mode, regno);
6322 27 : output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6323 : }
6324 : else
6325 : {
6326 : /* LEA. */
6327 11 : rtx xops[2];
6328 11 : xops[0] = stack_pointer_rtx;
6329 11 : xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6330 11 : output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6331 : }
6332 :
6333 38 : fputs ("\tret\n", asm_out_file);
6334 38 : if ((ix86_harden_sls & harden_sls_return))
6335 1 : fputs ("\tint3\n", asm_out_file);
6336 38 : }
6337 :
6338 : /* Output a funtion with a call and return thunk for indirect branch.
6339 : If REGNO != INVALID_REGNUM, the function address is in REGNO.
6340 : Otherwise, the function address is on the top of stack. Thunk is
6341 : used for function return if RET_P is true. */
6342 :
6343 : static void
6344 22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6345 : unsigned int regno, bool ret_p)
6346 : {
6347 22 : char name[32];
6348 22 : tree decl;
6349 :
6350 : /* Create __x86_indirect_thunk. */
6351 22 : indirect_thunk_name (name, regno, need_prefix, ret_p);
6352 22 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6353 : get_identifier (name),
6354 : build_function_type_list (void_type_node, NULL_TREE));
6355 22 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6356 : NULL_TREE, void_type_node);
6357 22 : TREE_PUBLIC (decl) = 1;
6358 22 : TREE_STATIC (decl) = 1;
6359 22 : DECL_IGNORED_P (decl) = 1;
6360 :
6361 : #if TARGET_MACHO
6362 : if (TARGET_MACHO)
6363 : {
6364 : switch_to_section (darwin_sections[picbase_thunk_section]);
6365 : fputs ("\t.weak_definition\t", asm_out_file);
6366 : assemble_name (asm_out_file, name);
6367 : fputs ("\n\t.private_extern\t", asm_out_file);
6368 : assemble_name (asm_out_file, name);
6369 : putc ('\n', asm_out_file);
6370 : ASM_OUTPUT_LABEL (asm_out_file, name);
6371 : DECL_WEAK (decl) = 1;
6372 : }
6373 : else
6374 : #endif
6375 22 : if (USE_HIDDEN_LINKONCE)
6376 : {
6377 22 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6378 :
6379 22 : targetm.asm_out.unique_section (decl, 0);
6380 22 : switch_to_section (get_named_section (decl, NULL, 0));
6381 :
6382 22 : targetm.asm_out.globalize_label (asm_out_file, name);
6383 22 : fputs ("\t.hidden\t", asm_out_file);
6384 22 : assemble_name (asm_out_file, name);
6385 22 : putc ('\n', asm_out_file);
6386 22 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6387 : }
6388 : else
6389 : {
6390 : switch_to_section (text_section);
6391 22 : ASM_OUTPUT_LABEL (asm_out_file, name);
6392 : }
6393 :
6394 22 : DECL_INITIAL (decl) = make_node (BLOCK);
6395 22 : current_function_decl = decl;
6396 22 : allocate_struct_function (decl, false);
6397 22 : init_function_start (decl);
6398 : /* We're about to hide the function body from callees of final_* by
6399 : emitting it directly; tell them we're a thunk, if they care. */
6400 22 : cfun->is_thunk = true;
6401 22 : first_function_block_is_cold = false;
6402 : /* Make sure unwind info is emitted for the thunk if needed. */
6403 22 : final_start_function (emit_barrier (), asm_out_file, 1);
6404 :
6405 22 : output_indirect_thunk (regno);
6406 :
6407 22 : final_end_function ();
6408 22 : init_insn_lengths ();
6409 22 : free_after_compilation (cfun);
6410 22 : set_cfun (NULL);
6411 22 : current_function_decl = NULL;
6412 22 : }
6413 :
6414 : static int pic_labels_used;
6415 :
6416 : /* Fills in the label name that should be used for a pc thunk for
6417 : the given register. */
6418 :
6419 : static void
6420 37434 : get_pc_thunk_name (char name[32], unsigned int regno)
6421 : {
6422 37434 : gcc_assert (!TARGET_64BIT);
6423 :
6424 37434 : if (USE_HIDDEN_LINKONCE)
6425 37434 : sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6426 : else
6427 37434 : ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6428 37434 : }
6429 :
6430 :
6431 : /* This function generates code for -fpic that loads %ebx with
6432 : the return address of the caller and then returns. */
6433 :
6434 : static void
6435 232593 : ix86_code_end (void)
6436 : {
6437 232593 : rtx xops[2];
6438 232593 : unsigned int regno;
6439 :
6440 232593 : if (indirect_return_needed)
6441 6 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6442 : INVALID_REGNUM, true);
6443 232593 : if (indirect_return_via_cx)
6444 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6445 : CX_REG, true);
6446 232593 : if (indirect_thunk_needed)
6447 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6448 : INVALID_REGNUM, false);
6449 :
6450 2093337 : for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6451 : {
6452 1860744 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6453 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6454 : regno, false);
6455 : }
6456 :
6457 3954081 : for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6458 : {
6459 3721488 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6460 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6461 : regno, false);
6462 : }
6463 :
6464 2093337 : for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6465 : {
6466 1860744 : char name[32];
6467 1860744 : tree decl;
6468 :
6469 1860744 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6470 16 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6471 : regno, false);
6472 :
6473 1860744 : if (!(pic_labels_used & (1 << regno)))
6474 1857165 : continue;
6475 :
6476 3579 : get_pc_thunk_name (name, regno);
6477 :
6478 3579 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6479 : get_identifier (name),
6480 : build_function_type_list (void_type_node, NULL_TREE));
6481 3579 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6482 : NULL_TREE, void_type_node);
6483 3579 : TREE_PUBLIC (decl) = 1;
6484 3579 : TREE_STATIC (decl) = 1;
6485 3579 : DECL_IGNORED_P (decl) = 1;
6486 :
6487 : #if TARGET_MACHO
6488 : if (TARGET_MACHO)
6489 : {
6490 : switch_to_section (darwin_sections[picbase_thunk_section]);
6491 : fputs ("\t.weak_definition\t", asm_out_file);
6492 : assemble_name (asm_out_file, name);
6493 : fputs ("\n\t.private_extern\t", asm_out_file);
6494 : assemble_name (asm_out_file, name);
6495 : putc ('\n', asm_out_file);
6496 : ASM_OUTPUT_LABEL (asm_out_file, name);
6497 : DECL_WEAK (decl) = 1;
6498 : }
6499 : else
6500 : #endif
6501 3579 : if (USE_HIDDEN_LINKONCE)
6502 : {
6503 3579 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6504 :
6505 3579 : targetm.asm_out.unique_section (decl, 0);
6506 3579 : switch_to_section (get_named_section (decl, NULL, 0));
6507 :
6508 3579 : targetm.asm_out.globalize_label (asm_out_file, name);
6509 3579 : fputs ("\t.hidden\t", asm_out_file);
6510 3579 : assemble_name (asm_out_file, name);
6511 3579 : putc ('\n', asm_out_file);
6512 3579 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6513 : }
6514 : else
6515 : {
6516 : switch_to_section (text_section);
6517 3579 : ASM_OUTPUT_LABEL (asm_out_file, name);
6518 : }
6519 :
6520 3579 : DECL_INITIAL (decl) = make_node (BLOCK);
6521 3579 : current_function_decl = decl;
6522 3579 : allocate_struct_function (decl, false);
6523 3579 : init_function_start (decl);
6524 : /* We're about to hide the function body from callees of final_* by
6525 : emitting it directly; tell them we're a thunk, if they care. */
6526 3579 : cfun->is_thunk = true;
6527 3579 : first_function_block_is_cold = false;
6528 : /* Make sure unwind info is emitted for the thunk if needed. */
6529 3579 : final_start_function (emit_barrier (), asm_out_file, 1);
6530 :
6531 : /* Pad stack IP move with 4 instructions (two NOPs count
6532 : as one instruction). */
6533 3579 : if (TARGET_PAD_SHORT_FUNCTION)
6534 : {
6535 : int i = 8;
6536 :
6537 0 : while (i--)
6538 0 : fputs ("\tnop\n", asm_out_file);
6539 : }
6540 :
6541 7158 : xops[0] = gen_rtx_REG (Pmode, regno);
6542 7158 : xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6543 3579 : output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6544 3579 : fputs ("\tret\n", asm_out_file);
6545 3579 : final_end_function ();
6546 3579 : init_insn_lengths ();
6547 3579 : free_after_compilation (cfun);
6548 3579 : set_cfun (NULL);
6549 3579 : current_function_decl = NULL;
6550 : }
6551 :
6552 232593 : if (flag_split_stack)
6553 4710 : file_end_indicate_split_stack ();
6554 232593 : }
6555 :
6556 : /* Emit code for the SET_GOT patterns. */
6557 :
6558 : const char *
6559 33855 : output_set_got (rtx dest, rtx label)
6560 : {
6561 33855 : rtx xops[3];
6562 :
6563 33855 : xops[0] = dest;
6564 :
6565 33855 : if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
6566 : {
6567 : /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6568 : xops[2] = gen_rtx_MEM (Pmode,
6569 : gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6570 : output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6571 :
6572 : /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6573 : Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6574 : an unadorned address. */
6575 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6576 : SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6577 : output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6578 : return "";
6579 : }
6580 :
6581 67710 : xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6582 :
6583 33855 : if (flag_pic)
6584 : {
6585 33855 : char name[32];
6586 33855 : get_pc_thunk_name (name, REGNO (dest));
6587 33855 : pic_labels_used |= 1 << REGNO (dest);
6588 :
6589 67710 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6590 33855 : xops[2] = gen_rtx_MEM (QImode, xops[2]);
6591 33855 : output_asm_insn ("%!call\t%X2", xops);
6592 :
6593 : #if TARGET_MACHO
6594 : /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6595 : This is what will be referenced by the Mach-O PIC subsystem. */
6596 : if (machopic_should_output_picbase_label () || !label)
6597 : ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6598 :
6599 : /* When we are restoring the pic base at the site of a nonlocal label,
6600 : and we decided to emit the pic base above, we will still output a
6601 : local label used for calculating the correction offset (even though
6602 : the offset will be 0 in that case). */
6603 : if (label)
6604 : targetm.asm_out.internal_label (asm_out_file, "L",
6605 : CODE_LABEL_NUMBER (label));
6606 : #endif
6607 : }
6608 : else
6609 : {
6610 0 : if (TARGET_MACHO)
6611 : /* We don't need a pic base, we're not producing pic. */
6612 : gcc_unreachable ();
6613 :
6614 0 : xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6615 0 : output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6616 0 : targetm.asm_out.internal_label (asm_out_file, "L",
6617 0 : CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6618 : }
6619 :
6620 33855 : if (!TARGET_MACHO)
6621 33855 : output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6622 :
6623 33855 : return "";
6624 : }
6625 :
6626 : /* Generate an "push" pattern for input ARG. */
6627 :
6628 : rtx
6629 1878151 : gen_push (rtx arg, bool ppx_p)
6630 : {
6631 1878151 : struct machine_function *m = cfun->machine;
6632 :
6633 1878151 : if (m->fs.cfa_reg == stack_pointer_rtx)
6634 1602921 : m->fs.cfa_offset += UNITS_PER_WORD;
6635 1878151 : m->fs.sp_offset += UNITS_PER_WORD;
6636 :
6637 1878151 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6638 30 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6639 :
6640 1878151 : rtx stack = gen_rtx_MEM (word_mode,
6641 1878151 : gen_rtx_PRE_DEC (Pmode,
6642 : stack_pointer_rtx));
6643 3756214 : return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6644 : }
6645 :
6646 : rtx
6647 23 : gen_pushfl (void)
6648 : {
6649 23 : struct machine_function *m = cfun->machine;
6650 23 : rtx flags, mem;
6651 :
6652 23 : if (m->fs.cfa_reg == stack_pointer_rtx)
6653 0 : m->fs.cfa_offset += UNITS_PER_WORD;
6654 23 : m->fs.sp_offset += UNITS_PER_WORD;
6655 :
6656 23 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6657 :
6658 23 : mem = gen_rtx_MEM (word_mode,
6659 23 : gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6660 :
6661 23 : return gen_pushfl2 (word_mode, mem, flags);
6662 : }
6663 :
6664 : /* Generate an "pop" pattern for input ARG. */
6665 :
6666 : rtx
6667 1461178 : gen_pop (rtx arg, bool ppx_p)
6668 : {
6669 1461178 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6670 26 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6671 :
6672 1461178 : rtx stack = gen_rtx_MEM (word_mode,
6673 1461178 : gen_rtx_POST_INC (Pmode,
6674 : stack_pointer_rtx));
6675 :
6676 2922268 : return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6677 : }
6678 :
6679 : rtx
6680 21 : gen_popfl (void)
6681 : {
6682 21 : rtx flags, mem;
6683 :
6684 21 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6685 :
6686 21 : mem = gen_rtx_MEM (word_mode,
6687 21 : gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6688 :
6689 21 : return gen_popfl1 (word_mode, flags, mem);
6690 : }
6691 :
6692 : /* Generate a "push2" pattern for input ARG. */
6693 : rtx
6694 19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6695 : {
6696 19 : struct machine_function *m = cfun->machine;
6697 19 : const int offset = UNITS_PER_WORD * 2;
6698 :
6699 19 : if (m->fs.cfa_reg == stack_pointer_rtx)
6700 14 : m->fs.cfa_offset += offset;
6701 19 : m->fs.sp_offset += offset;
6702 :
6703 19 : if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6704 0 : reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6705 :
6706 19 : if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6707 0 : reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6708 :
6709 19 : return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6710 4 : : gen_push2_di (mem, reg1, reg2);
6711 : }
6712 :
6713 : /* Return >= 0 if there is an unused call-clobbered register available
6714 : for the entire function. */
6715 :
6716 : static unsigned int
6717 0 : ix86_select_alt_pic_regnum (void)
6718 : {
6719 0 : if (ix86_use_pseudo_pic_reg ())
6720 : return INVALID_REGNUM;
6721 :
6722 0 : if (crtl->is_leaf
6723 0 : && !crtl->profile
6724 0 : && !ix86_current_function_calls_tls_descriptor)
6725 : {
6726 0 : int i, drap;
6727 : /* Can't use the same register for both PIC and DRAP. */
6728 0 : if (crtl->drap_reg)
6729 0 : drap = REGNO (crtl->drap_reg);
6730 : else
6731 : drap = -1;
6732 0 : for (i = 2; i >= 0; --i)
6733 0 : if (i != drap && !df_regs_ever_live_p (i))
6734 : return i;
6735 : }
6736 :
6737 : return INVALID_REGNUM;
6738 : }
6739 :
6740 : /* Return true if REGNO is used by the epilogue. */
6741 :
6742 : bool
6743 1664085058 : ix86_epilogue_uses (int regno)
6744 : {
6745 : /* If there are no caller-saved registers, we preserve all registers,
6746 : except for MMX and x87 registers which aren't supported when saving
6747 : and restoring registers. Don't explicitly save SP register since
6748 : it is always preserved. */
6749 1664085058 : return (epilogue_completed
6750 263344050 : && (cfun->machine->call_saved_registers
6751 263344050 : == TYPE_NO_CALLER_SAVED_REGISTERS)
6752 27140 : && !fixed_regs[regno]
6753 4857 : && !STACK_REGNO_P (regno)
6754 1664089915 : && !MMX_REGNO_P (regno));
6755 : }
6756 :
6757 : /* Return nonzero if register REGNO can be used as a scratch register
6758 : in peephole2. */
6759 :
6760 : static bool
6761 1236134 : ix86_hard_regno_scratch_ok (unsigned int regno)
6762 : {
6763 : /* If there are no caller-saved registers, we can't use any register
6764 : as a scratch register after epilogue and use REGNO as scratch
6765 : register only if it has been used before to avoid saving and
6766 : restoring it. */
6767 1236134 : return ((cfun->machine->call_saved_registers
6768 1236134 : != TYPE_NO_CALLER_SAVED_REGISTERS)
6769 1236134 : || (!epilogue_completed
6770 0 : && df_regs_ever_live_p (regno)));
6771 : }
6772 :
6773 : /* Return TRUE if we need to save REGNO. */
6774 :
6775 : bool
6776 353618094 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6777 : {
6778 353618094 : rtx reg;
6779 :
6780 353618094 : switch (cfun->machine->call_saved_registers)
6781 : {
6782 : case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6783 : break;
6784 :
6785 57152 : case TYPE_NO_CALLER_SAVED_REGISTERS:
6786 : /* If there are no caller-saved registers, we preserve all
6787 : registers, except for MMX and x87 registers which aren't
6788 : supported when saving and restoring registers. Don't
6789 : explicitly save SP register since it is always preserved.
6790 :
6791 : Don't preserve registers used for function return value. */
6792 57152 : reg = crtl->return_rtx;
6793 57152 : if (reg)
6794 : {
6795 768 : unsigned int i = REGNO (reg);
6796 768 : unsigned int nregs = REG_NREGS (reg);
6797 1522 : while (nregs-- > 0)
6798 768 : if ((i + nregs) == regno)
6799 : return false;
6800 : }
6801 :
6802 57138 : return (df_regs_ever_live_p (regno)
6803 6932 : && !fixed_regs[regno]
6804 5964 : && !STACK_REGNO_P (regno)
6805 5964 : && !MMX_REGNO_P (regno)
6806 63102 : && (regno != HARD_FRAME_POINTER_REGNUM
6807 249 : || !frame_pointer_needed));
6808 :
6809 17696 : case TYPE_NO_CALLEE_SAVED_REGISTERS:
6810 17696 : case TYPE_PRESERVE_NONE:
6811 17696 : if (regno != HARD_FRAME_POINTER_REGNUM)
6812 : return false;
6813 : break;
6814 : }
6815 :
6816 387529751 : if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6817 10755632 : && pic_offset_table_rtx)
6818 : {
6819 385700 : if (ix86_use_pseudo_pic_reg ())
6820 : {
6821 : /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6822 : _mcount in prologue. */
6823 385700 : if (!TARGET_64BIT && flag_pic && crtl->profile)
6824 : return true;
6825 : }
6826 0 : else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6827 0 : || crtl->profile
6828 0 : || crtl->calls_eh_return
6829 0 : || crtl->uses_const_pool
6830 0 : || cfun->has_nonlocal_label)
6831 0 : return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6832 : }
6833 :
6834 353543799 : if (crtl->calls_eh_return && maybe_eh_return)
6835 : {
6836 : unsigned i;
6837 13237 : for (i = 0; ; i++)
6838 : {
6839 20181 : unsigned test = EH_RETURN_DATA_REGNO (i);
6840 13671 : if (test == INVALID_REGNUM)
6841 : break;
6842 13671 : if (test == regno)
6843 : return true;
6844 13237 : }
6845 : }
6846 :
6847 353543365 : if (ignore_outlined && cfun->machine->call_ms2sysv)
6848 : {
6849 2650688 : unsigned count = cfun->machine->call_ms2sysv_extra_regs
6850 : + xlogue_layout::MIN_REGS;
6851 2650688 : if (xlogue_layout::is_stub_managed_reg (regno, count))
6852 : return false;
6853 : }
6854 :
6855 353043496 : if (crtl->drap_reg
6856 2194784 : && regno == REGNO (crtl->drap_reg)
6857 353099193 : && !cfun->machine->no_drap_save_restore)
6858 : return true;
6859 :
6860 352987799 : return (df_regs_ever_live_p (regno)
6861 372342283 : && !call_used_or_fixed_reg_p (regno)
6862 371711988 : && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6863 : }
6864 :
6865 : /* Return number of saved general prupose registers. */
6866 :
6867 : static int
6868 8159307 : ix86_nsaved_regs (void)
6869 : {
6870 8159307 : int nregs = 0;
6871 8159307 : int regno;
6872 :
6873 758815551 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6874 750656244 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6875 8172070 : nregs ++;
6876 8159307 : return nregs;
6877 : }
6878 :
6879 : /* Return number of saved SSE registers. */
6880 :
6881 : static int
6882 8194252 : ix86_nsaved_sseregs (void)
6883 : {
6884 8194252 : int nregs = 0;
6885 8194252 : int regno;
6886 :
6887 7392456 : if (!TARGET_64BIT_MS_ABI
6888 8194252 : && (cfun->machine->call_saved_registers
6889 7968846 : != TYPE_NO_CALLER_SAVED_REGISTERS))
6890 : return 0;
6891 21031299 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6892 20805156 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6893 1896395 : nregs ++;
6894 : return nregs;
6895 : }
6896 :
6897 : /* Given FROM and TO register numbers, say whether this elimination is
6898 : allowed. If stack alignment is needed, we can only replace argument
6899 : pointer with hard frame pointer, or replace frame pointer with stack
6900 : pointer. Otherwise, frame pointer elimination is automatically
6901 : handled and all other eliminations are valid. */
6902 :
6903 : static bool
6904 48322220 : ix86_can_eliminate (const int from, const int to)
6905 : {
6906 48322220 : if (stack_realign_fp)
6907 1706656 : return ((from == ARG_POINTER_REGNUM
6908 1706656 : && to == HARD_FRAME_POINTER_REGNUM)
6909 1706656 : || (from == FRAME_POINTER_REGNUM
6910 1706656 : && to == STACK_POINTER_REGNUM));
6911 : else
6912 86735368 : return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6913 : }
6914 :
6915 : /* Return the offset between two registers, one to be eliminated, and the other
6916 : its replacement, at the start of a routine. */
6917 :
6918 : HOST_WIDE_INT
6919 141257046 : ix86_initial_elimination_offset (int from, int to)
6920 : {
6921 141257046 : struct ix86_frame &frame = cfun->machine->frame;
6922 :
6923 141257046 : if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6924 10413478 : return frame.hard_frame_pointer_offset;
6925 130843568 : else if (from == FRAME_POINTER_REGNUM
6926 130843568 : && to == HARD_FRAME_POINTER_REGNUM)
6927 8124036 : return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6928 : else
6929 : {
6930 122719532 : gcc_assert (to == STACK_POINTER_REGNUM);
6931 :
6932 122719532 : if (from == ARG_POINTER_REGNUM)
6933 114595496 : return frame.stack_pointer_offset;
6934 :
6935 8124036 : gcc_assert (from == FRAME_POINTER_REGNUM);
6936 8124036 : return frame.stack_pointer_offset - frame.frame_pointer_offset;
6937 : }
6938 : }
6939 :
6940 : /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6941 : void
6942 0 : warn_once_call_ms2sysv_xlogues (const char *feature)
6943 : {
6944 0 : static bool warned_once = false;
6945 0 : if (!warned_once)
6946 : {
6947 0 : warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6948 : feature);
6949 0 : warned_once = true;
6950 : }
6951 0 : }
6952 :
6953 : /* Return the probing interval for -fstack-clash-protection. */
6954 :
6955 : static HOST_WIDE_INT
6956 496 : get_probe_interval (void)
6957 : {
6958 341 : if (flag_stack_clash_protection)
6959 412 : return (HOST_WIDE_INT_1U
6960 412 : << param_stack_clash_protection_probe_interval);
6961 : else
6962 : return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6963 : }
6964 :
6965 : /* When using -fsplit-stack, the allocation routines set a field in
6966 : the TCB to the bottom of the stack plus this much space, measured
6967 : in bytes. */
6968 :
6969 : #define SPLIT_STACK_AVAILABLE 256
6970 :
6971 : /* Return true if push2/pop2 can be generated. */
6972 :
6973 : static bool
6974 8159961 : ix86_can_use_push2pop2 (void)
6975 : {
6976 : /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6977 8159961 : unsigned int incoming_stack_boundary
6978 8159961 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6979 8159961 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6980 8159961 : return incoming_stack_boundary % 128 == 0;
6981 : }
6982 :
6983 : /* Helper function to determine whether push2/pop2 can be used in prologue or
6984 : epilogue for register save/restore. */
6985 : static bool
6986 8159307 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6987 : {
6988 8159307 : if (!ix86_can_use_push2pop2 ())
6989 : return false;
6990 8123356 : int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6991 8123356 : return TARGET_APX_PUSH2POP2
6992 2844 : && !cfun->machine->frame.save_regs_using_mov
6993 2832 : && cfun->machine->func_type == TYPE_NORMAL
6994 8126180 : && (nregs + aligned) >= 3;
6995 : }
6996 :
6997 : /* Check if push/pop should be used to save/restore registers. */
6998 : static bool
6999 8884705 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
7000 : {
7001 3199743 : return ((!to_allocate && cfun->machine->frame.nregs <= 1)
7002 5934481 : || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7003 : /* If static stack checking is enabled and done with probes,
7004 : the registers need to be saved before allocating the frame. */
7005 5933820 : || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7006 : /* If stack clash probing needs a loop, then it needs a
7007 : scratch register. But the returned register is only guaranteed
7008 : to be safe to use after register saves are complete. So if
7009 : stack clash protections are enabled and the allocated frame is
7010 : larger than the probe interval, then use pushes to save
7011 : callee saved registers. */
7012 14818445 : || (flag_stack_clash_protection
7013 341 : && !ix86_target_stack_probe ()
7014 341 : && to_allocate > get_probe_interval ()));
7015 : }
7016 :
7017 : /* Fill structure ix86_frame about frame of currently computed function. */
7018 :
7019 : static void
7020 8159307 : ix86_compute_frame_layout (void)
7021 : {
7022 8159307 : struct ix86_frame *frame = &cfun->machine->frame;
7023 8159307 : struct machine_function *m = cfun->machine;
7024 8159307 : unsigned HOST_WIDE_INT stack_alignment_needed;
7025 8159307 : HOST_WIDE_INT offset;
7026 8159307 : unsigned HOST_WIDE_INT preferred_alignment;
7027 8159307 : HOST_WIDE_INT size = ix86_get_frame_size ();
7028 8159307 : HOST_WIDE_INT to_allocate;
7029 :
7030 : /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
7031 : * ms_abi functions that call a sysv function. We now need to prune away
7032 : * cases where it should be disabled. */
7033 8159307 : if (TARGET_64BIT && m->call_ms2sysv)
7034 : {
7035 35225 : gcc_assert (TARGET_64BIT_MS_ABI);
7036 35225 : gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
7037 35225 : gcc_assert (!TARGET_SEH);
7038 35225 : gcc_assert (TARGET_SSE);
7039 35225 : gcc_assert (!ix86_using_red_zone ());
7040 :
7041 35225 : if (crtl->calls_eh_return)
7042 : {
7043 0 : gcc_assert (!reload_completed);
7044 0 : m->call_ms2sysv = false;
7045 0 : warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
7046 : }
7047 :
7048 35225 : else if (ix86_static_chain_on_stack)
7049 : {
7050 0 : gcc_assert (!reload_completed);
7051 0 : m->call_ms2sysv = false;
7052 0 : warn_once_call_ms2sysv_xlogues ("static call chains");
7053 : }
7054 :
7055 : /* Finally, compute which registers the stub will manage. */
7056 : else
7057 : {
7058 35225 : unsigned count = xlogue_layout::count_stub_managed_regs ();
7059 35225 : m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
7060 35225 : m->call_ms2sysv_pad_in = 0;
7061 : }
7062 : }
7063 :
7064 8159307 : frame->nregs = ix86_nsaved_regs ();
7065 8159307 : frame->nsseregs = ix86_nsaved_sseregs ();
7066 :
7067 : /* 64-bit MS ABI seem to require stack alignment to be always 16,
7068 : except for function prologues, leaf functions and when the defult
7069 : incoming stack boundary is overriden at command line or via
7070 : force_align_arg_pointer attribute.
7071 :
7072 : Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
7073 : at call sites, including profile function calls.
7074 :
7075 : For APX push2/pop2, the stack also requires 128b alignment. */
7076 8159307 : if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
7077 67 : && crtl->preferred_stack_boundary < 128)
7078 8159372 : || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
7079 225404 : && crtl->preferred_stack_boundary < 128)
7080 0 : && (!crtl->is_leaf || cfun->calls_alloca != 0
7081 0 : || ix86_current_function_calls_tls_descriptor
7082 0 : || (TARGET_MACHO && crtl->profile)
7083 0 : || ix86_incoming_stack_boundary < 128)))
7084 : {
7085 2 : crtl->preferred_stack_boundary = 128;
7086 2 : if (crtl->stack_alignment_needed < 128)
7087 1 : crtl->stack_alignment_needed = 128;
7088 : }
7089 :
7090 8159307 : stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7091 8159307 : preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7092 :
7093 8159307 : gcc_assert (!size || stack_alignment_needed);
7094 8961074 : gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7095 8159307 : gcc_assert (preferred_alignment <= stack_alignment_needed);
7096 :
7097 : /* The only ABI saving SSE regs should be 64-bit ms_abi or with
7098 : no_caller_saved_registers attribue. */
7099 8159307 : gcc_assert (TARGET_64BIT
7100 : || (cfun->machine->call_saved_registers
7101 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7102 : || !frame->nsseregs);
7103 8159307 : if (TARGET_64BIT && m->call_ms2sysv)
7104 : {
7105 35225 : gcc_assert (stack_alignment_needed >= 16);
7106 35225 : gcc_assert ((cfun->machine->call_saved_registers
7107 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7108 : || !frame->nsseregs);
7109 : }
7110 :
7111 : /* For SEH we have to limit the amount of code movement into the prologue.
7112 : At present we do this via a BLOCKAGE, at which point there's very little
7113 : scheduling that can be done, which means that there's very little point
7114 : in doing anything except PUSHs. */
7115 8159307 : if (TARGET_SEH)
7116 : m->use_fast_prologue_epilogue = false;
7117 8159307 : else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7118 : {
7119 7825077 : int count = frame->nregs;
7120 7825077 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
7121 :
7122 : /* The fast prologue uses move instead of push to save registers. This
7123 : is significantly longer, but also executes faster as modern hardware
7124 : can execute the moves in parallel, but can't do that for push/pop.
7125 :
7126 : Be careful about choosing what prologue to emit: When function takes
7127 : many instructions to execute we may use slow version as well as in
7128 : case function is known to be outside hot spot (this is known with
7129 : feedback only). Weight the size of function by number of registers
7130 : to save as it is cheap to use one or two push instructions but very
7131 : slow to use many of them.
7132 :
7133 : Calling this hook multiple times with the same frame requirements
7134 : must produce the same layout, since the RA might otherwise be
7135 : unable to reach a fixed point or might fail its final sanity checks.
7136 : This means that once we've assumed that a function does or doesn't
7137 : have a particular size, we have to stick to that assumption
7138 : regardless of how the function has changed since. */
7139 7825077 : if (count)
7140 2600676 : count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7141 7825077 : if (node->frequency < NODE_FREQUENCY_NORMAL
7142 7131668 : || (flag_branch_probabilities
7143 971 : && node->frequency < NODE_FREQUENCY_HOT))
7144 693719 : m->use_fast_prologue_epilogue = false;
7145 : else
7146 : {
7147 7131358 : if (count != frame->expensive_count)
7148 : {
7149 285068 : frame->expensive_count = count;
7150 285068 : frame->expensive_p = expensive_function_p (count);
7151 : }
7152 7131358 : m->use_fast_prologue_epilogue = !frame->expensive_p;
7153 : }
7154 : }
7155 :
7156 8159307 : frame->save_regs_using_mov
7157 8159307 : = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7158 :
7159 : /* Skip return address and error code in exception handler. */
7160 8159307 : offset = INCOMING_FRAME_SP_OFFSET;
7161 :
7162 : /* Skip pushed static chain. */
7163 8159307 : if (ix86_static_chain_on_stack)
7164 0 : offset += UNITS_PER_WORD;
7165 :
7166 : /* Skip saved base pointer. */
7167 8159307 : if (frame_pointer_needed)
7168 2767252 : offset += UNITS_PER_WORD;
7169 8159307 : frame->hfp_save_offset = offset;
7170 :
7171 : /* The traditional frame pointer location is at the top of the frame. */
7172 8159307 : frame->hard_frame_pointer_offset = offset;
7173 :
7174 : /* Register save area */
7175 8159307 : offset += frame->nregs * UNITS_PER_WORD;
7176 8159307 : frame->reg_save_offset = offset;
7177 :
7178 : /* Calculate the size of the va-arg area (not including padding, if any). */
7179 8159307 : frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7180 :
7181 : /* Also adjust stack_realign_offset for the largest alignment of
7182 : stack slot actually used. */
7183 8159307 : if (stack_realign_fp
7184 7852314 : || (cfun->machine->max_used_stack_alignment != 0
7185 134 : && (offset % cfun->machine->max_used_stack_alignment) != 0))
7186 : {
7187 : /* We may need a 16-byte aligned stack for the remainder of the
7188 : register save area, but the stack frame for the local function
7189 : may require a greater alignment if using AVX/2/512. In order
7190 : to avoid wasting space, we first calculate the space needed for
7191 : the rest of the register saves, add that to the stack pointer,
7192 : and then realign the stack to the boundary of the start of the
7193 : frame for the local function. */
7194 307059 : HOST_WIDE_INT space_needed = 0;
7195 307059 : HOST_WIDE_INT sse_reg_space_needed = 0;
7196 :
7197 307059 : if (TARGET_64BIT)
7198 : {
7199 305259 : if (m->call_ms2sysv)
7200 : {
7201 6415 : m->call_ms2sysv_pad_in = 0;
7202 6415 : space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7203 : }
7204 :
7205 298844 : else if (frame->nsseregs)
7206 : /* The only ABI that has saved SSE registers (Win64) also has a
7207 : 16-byte aligned default stack. However, many programs violate
7208 : the ABI, and Wine64 forces stack realignment to compensate. */
7209 6447 : space_needed = frame->nsseregs * 16;
7210 :
7211 305259 : sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7212 :
7213 : /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7214 : rounding to be pedantic. */
7215 305259 : space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7216 : }
7217 : else
7218 1800 : space_needed = frame->va_arg_size;
7219 :
7220 : /* Record the allocation size required prior to the realignment AND. */
7221 307059 : frame->stack_realign_allocate = space_needed;
7222 :
7223 : /* The re-aligned stack starts at frame->stack_realign_offset. Values
7224 : before this point are not directly comparable with values below
7225 : this point. Use sp_valid_at to determine if the stack pointer is
7226 : valid for a given offset, fp_valid_at for the frame pointer, or
7227 : choose_baseaddr to have a base register chosen for you.
7228 :
7229 : Note that the result of (frame->stack_realign_offset
7230 : & (stack_alignment_needed - 1)) may not equal zero. */
7231 307059 : offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7232 307059 : frame->stack_realign_offset = offset - space_needed;
7233 307059 : frame->sse_reg_save_offset = frame->stack_realign_offset
7234 307059 : + sse_reg_space_needed;
7235 307059 : }
7236 : else
7237 : {
7238 7852248 : frame->stack_realign_offset = offset;
7239 :
7240 7852248 : if (TARGET_64BIT && m->call_ms2sysv)
7241 : {
7242 28810 : m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7243 28810 : offset += xlogue_layout::get_instance ().get_stack_space_used ();
7244 : }
7245 :
7246 : /* Align and set SSE register save area. */
7247 7823438 : else if (frame->nsseregs)
7248 : {
7249 : /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7250 : required and the DRAP re-alignment boundary is at least 16 bytes,
7251 : then we want the SSE register save area properly aligned. */
7252 183182 : if (ix86_incoming_stack_boundary >= 128
7253 6400 : || (stack_realign_drap && stack_alignment_needed >= 16))
7254 183182 : offset = ROUND_UP (offset, 16);
7255 183182 : offset += frame->nsseregs * 16;
7256 : }
7257 7852248 : frame->sse_reg_save_offset = offset;
7258 7852248 : offset += frame->va_arg_size;
7259 : }
7260 :
7261 : /* Align start of frame for local function. When a function call
7262 : is removed, it may become a leaf function. But if argument may
7263 : be passed on stack, we need to align the stack when there is no
7264 : tail call. */
7265 8159307 : if (m->call_ms2sysv
7266 8124082 : || frame->va_arg_size != 0
7267 8044924 : || size != 0
7268 4391394 : || !crtl->is_leaf
7269 2052771 : || (!crtl->tail_call_emit
7270 1730807 : && cfun->machine->outgoing_args_on_stack)
7271 2052721 : || cfun->calls_alloca
7272 10210206 : || ix86_current_function_calls_tls_descriptor)
7273 6108822 : offset = ROUND_UP (offset, stack_alignment_needed);
7274 :
7275 : /* Frame pointer points here. */
7276 8159307 : frame->frame_pointer_offset = offset;
7277 :
7278 8159307 : offset += size;
7279 :
7280 : /* Add outgoing arguments area. Can be skipped if we eliminated
7281 : all the function calls as dead code.
7282 : Skipping is however impossible when function calls alloca. Alloca
7283 : expander assumes that last crtl->outgoing_args_size
7284 : of stack frame are unused. */
7285 8159307 : if (ACCUMULATE_OUTGOING_ARGS
7286 8777228 : && (!crtl->is_leaf || cfun->calls_alloca
7287 391782 : || ix86_current_function_calls_tls_descriptor))
7288 : {
7289 226139 : offset += crtl->outgoing_args_size;
7290 226139 : frame->outgoing_arguments_size = crtl->outgoing_args_size;
7291 : }
7292 : else
7293 7933168 : frame->outgoing_arguments_size = 0;
7294 :
7295 : /* Align stack boundary. Only needed if we're calling another function
7296 : or using alloca. */
7297 2763034 : if (!crtl->is_leaf || cfun->calls_alloca
7298 10918824 : || ix86_current_function_calls_tls_descriptor)
7299 5401588 : offset = ROUND_UP (offset, preferred_alignment);
7300 :
7301 : /* We've reached end of stack frame. */
7302 8159307 : frame->stack_pointer_offset = offset;
7303 :
7304 : /* Size prologue needs to allocate. */
7305 8159307 : to_allocate = offset - frame->sse_reg_save_offset;
7306 :
7307 8159307 : if (save_regs_using_push_pop (to_allocate))
7308 2577034 : frame->save_regs_using_mov = false;
7309 :
7310 8159307 : if (ix86_using_red_zone ()
7311 7131945 : && crtl->sp_is_unchanging
7312 6490960 : && crtl->is_leaf
7313 2663757 : && !cfun->machine->asm_redzone_clobber_seen
7314 2663744 : && !ix86_pc_thunk_call_expanded
7315 10823051 : && !ix86_current_function_calls_tls_descriptor)
7316 : {
7317 2663729 : frame->red_zone_size = to_allocate;
7318 2663729 : if (frame->save_regs_using_mov)
7319 139945 : frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7320 2663729 : if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7321 102151 : frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7322 : }
7323 : else
7324 5495578 : frame->red_zone_size = 0;
7325 8159307 : frame->stack_pointer_offset -= frame->red_zone_size;
7326 :
7327 : /* The SEH frame pointer location is near the bottom of the frame.
7328 : This is enforced by the fact that the difference between the
7329 : stack pointer and the frame pointer is limited to 240 bytes in
7330 : the unwind data structure. */
7331 8159307 : if (TARGET_SEH)
7332 : {
7333 : /* Force the frame pointer to point at or below the lowest register save
7334 : area, see the SEH code in config/i386/winnt.cc for the rationale. */
7335 : frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7336 :
7337 : /* If we can leave the frame pointer where it is, do so; however return
7338 : the establisher frame for __builtin_frame_address (0) or else if the
7339 : frame overflows the SEH maximum frame size.
7340 :
7341 : Note that the value returned by __builtin_frame_address (0) is quite
7342 : constrained, because setjmp is piggybacked on the SEH machinery with
7343 : recent versions of MinGW:
7344 :
7345 : # elif defined(__SEH__)
7346 : # if defined(__aarch64__) || defined(_ARM64_)
7347 : # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7348 : # elif (__MINGW_GCC_VERSION < 40702)
7349 : # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7350 : # else
7351 : # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7352 : # endif
7353 :
7354 : and the second argument passed to _setjmp, if not null, is forwarded
7355 : to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7356 : built an ExceptionRecord on the fly describing the setjmp buffer). */
7357 : const HOST_WIDE_INT diff
7358 : = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7359 : if (diff <= 255 && !crtl->accesses_prior_frames)
7360 : {
7361 : /* The resulting diff will be a multiple of 16 lower than 255,
7362 : i.e. at most 240 as required by the unwind data structure. */
7363 : frame->hard_frame_pointer_offset += (diff & 15);
7364 : }
7365 : else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7366 : {
7367 : /* Ideally we'd determine what portion of the local stack frame
7368 : (within the constraint of the lowest 240) is most heavily used.
7369 : But without that complication, simply bias the frame pointer
7370 : by 128 bytes so as to maximize the amount of the local stack
7371 : frame that is addressable with 8-bit offsets. */
7372 : frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7373 : }
7374 : else
7375 : frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7376 : }
7377 8159307 : }
7378 :
7379 : /* This is semi-inlined memory_address_length, but simplified
7380 : since we know that we're always dealing with reg+offset, and
7381 : to avoid having to create and discard all that rtl. */
7382 :
7383 : static inline int
7384 1013011 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7385 : {
7386 1013011 : int len = 4;
7387 :
7388 0 : if (offset == 0)
7389 : {
7390 : /* EBP and R13 cannot be encoded without an offset. */
7391 0 : len = (regno == BP_REG || regno == R13_REG);
7392 : }
7393 1004739 : else if (IN_RANGE (offset, -128, 127))
7394 634575 : len = 1;
7395 :
7396 : /* ESP and R12 must be encoded with a SIB byte. */
7397 0 : if (regno == SP_REG || regno == R12_REG)
7398 0 : len++;
7399 :
7400 1013011 : return len;
7401 : }
7402 :
7403 : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7404 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7405 :
7406 : static bool
7407 3497151 : sp_valid_at (HOST_WIDE_INT cfa_offset)
7408 : {
7409 3497151 : const struct machine_frame_state &fs = cfun->machine->fs;
7410 3497151 : if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7411 : {
7412 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7413 46484 : gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7414 : return false;
7415 : }
7416 3450667 : return fs.sp_valid;
7417 : }
7418 :
7419 : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7420 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7421 :
7422 : static inline bool
7423 1367013 : fp_valid_at (HOST_WIDE_INT cfa_offset)
7424 : {
7425 1367013 : const struct machine_frame_state &fs = cfun->machine->fs;
7426 1367013 : if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7427 : {
7428 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7429 28328 : gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7430 : return false;
7431 : }
7432 1338685 : return fs.fp_valid;
7433 : }
7434 :
7435 : /* Choose a base register based upon alignment requested, speed and/or
7436 : size. */
7437 :
7438 : static void
7439 1367013 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7440 : HOST_WIDE_INT &base_offset,
7441 : unsigned int align_reqested, unsigned int *align)
7442 : {
7443 1367013 : const struct machine_function *m = cfun->machine;
7444 1367013 : unsigned int hfp_align;
7445 1367013 : unsigned int drap_align;
7446 1367013 : unsigned int sp_align;
7447 1367013 : bool hfp_ok = fp_valid_at (cfa_offset);
7448 1367013 : bool drap_ok = m->fs.drap_valid;
7449 1367013 : bool sp_ok = sp_valid_at (cfa_offset);
7450 :
7451 1367013 : hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7452 :
7453 : /* Filter out any registers that don't meet the requested alignment
7454 : criteria. */
7455 1367013 : if (align_reqested)
7456 : {
7457 966231 : if (m->fs.realigned)
7458 28160 : hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7459 : /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7460 : notes (which we would need to use a realigned stack pointer),
7461 : so disable on SEH targets. */
7462 938071 : else if (m->fs.sp_realigned)
7463 28328 : sp_align = crtl->stack_alignment_needed;
7464 :
7465 966231 : hfp_ok = hfp_ok && hfp_align >= align_reqested;
7466 966231 : drap_ok = drap_ok && drap_align >= align_reqested;
7467 966231 : sp_ok = sp_ok && sp_align >= align_reqested;
7468 : }
7469 :
7470 1367013 : if (m->use_fast_prologue_epilogue)
7471 : {
7472 : /* Choose the base register most likely to allow the most scheduling
7473 : opportunities. Generally FP is valid throughout the function,
7474 : while DRAP must be reloaded within the epilogue. But choose either
7475 : over the SP due to increased encoding size. */
7476 :
7477 649280 : if (hfp_ok)
7478 : {
7479 117856 : base_reg = hard_frame_pointer_rtx;
7480 117856 : base_offset = m->fs.fp_offset - cfa_offset;
7481 : }
7482 531424 : else if (drap_ok)
7483 : {
7484 0 : base_reg = crtl->drap_reg;
7485 0 : base_offset = 0 - cfa_offset;
7486 : }
7487 531424 : else if (sp_ok)
7488 : {
7489 531424 : base_reg = stack_pointer_rtx;
7490 531424 : base_offset = m->fs.sp_offset - cfa_offset;
7491 : }
7492 : }
7493 : else
7494 : {
7495 717733 : HOST_WIDE_INT toffset;
7496 717733 : int len = 16, tlen;
7497 :
7498 : /* Choose the base register with the smallest address encoding.
7499 : With a tie, choose FP > DRAP > SP. */
7500 717733 : if (sp_ok)
7501 : {
7502 700402 : base_reg = stack_pointer_rtx;
7503 700402 : base_offset = m->fs.sp_offset - cfa_offset;
7504 1392532 : len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7505 : }
7506 717733 : if (drap_ok)
7507 : {
7508 0 : toffset = 0 - cfa_offset;
7509 0 : tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7510 0 : if (tlen <= len)
7511 : {
7512 0 : base_reg = crtl->drap_reg;
7513 0 : base_offset = toffset;
7514 0 : len = tlen;
7515 : }
7516 : }
7517 717733 : if (hfp_ok)
7518 : {
7519 312609 : toffset = m->fs.fp_offset - cfa_offset;
7520 312609 : tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7521 312609 : if (tlen <= len)
7522 : {
7523 221977 : base_reg = hard_frame_pointer_rtx;
7524 221977 : base_offset = toffset;
7525 : }
7526 : }
7527 : }
7528 :
7529 : /* Set the align return value. */
7530 1367013 : if (align)
7531 : {
7532 966231 : if (base_reg == stack_pointer_rtx)
7533 684595 : *align = sp_align;
7534 281636 : else if (base_reg == crtl->drap_reg)
7535 0 : *align = drap_align;
7536 281636 : else if (base_reg == hard_frame_pointer_rtx)
7537 281636 : *align = hfp_align;
7538 : }
7539 1367013 : }
7540 :
7541 : /* Return an RTX that points to CFA_OFFSET within the stack frame and
7542 : the alignment of address. If ALIGN is non-null, it should point to
7543 : an alignment value (in bits) that is preferred or zero and will
7544 : recieve the alignment of the base register that was selected,
7545 : irrespective of rather or not CFA_OFFSET is a multiple of that
7546 : alignment value. If it is possible for the base register offset to be
7547 : non-immediate then SCRATCH_REGNO should specify a scratch register to
7548 : use.
7549 :
7550 : The valid base registers are taken from CFUN->MACHINE->FS. */
7551 :
7552 : static rtx
7553 1367013 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7554 : unsigned int scratch_regno = INVALID_REGNUM)
7555 : {
7556 1367013 : rtx base_reg = NULL;
7557 1367013 : HOST_WIDE_INT base_offset = 0;
7558 :
7559 : /* If a specific alignment is requested, try to get a base register
7560 : with that alignment first. */
7561 1367013 : if (align && *align)
7562 966231 : choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7563 :
7564 1367013 : if (!base_reg)
7565 400782 : choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7566 :
7567 1367013 : gcc_assert (base_reg != NULL);
7568 :
7569 1367013 : rtx base_offset_rtx = GEN_INT (base_offset);
7570 :
7571 1419567 : if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7572 : {
7573 1 : gcc_assert (scratch_regno != INVALID_REGNUM);
7574 :
7575 1 : rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7576 1 : emit_move_insn (scratch_reg, base_offset_rtx);
7577 :
7578 1 : return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7579 : }
7580 :
7581 1419566 : return plus_constant (Pmode, base_reg, base_offset);
7582 : }
7583 :
7584 : /* Emit code to save registers in the prologue. */
7585 :
7586 : static void
7587 427594 : ix86_emit_save_regs (void)
7588 : {
7589 427594 : int regno;
7590 427594 : rtx_insn *insn;
7591 427594 : bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7592 :
7593 427594 : if (!TARGET_APX_PUSH2POP2
7594 90 : || !ix86_can_use_push2pop2 ()
7595 427682 : || cfun->machine->func_type != TYPE_NORMAL)
7596 : {
7597 39758151 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7598 39330644 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7599 : {
7600 1194950 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7601 : use_ppx));
7602 1194950 : RTX_FRAME_RELATED_P (insn) = 1;
7603 : }
7604 : }
7605 : else
7606 : {
7607 87 : int regno_list[2];
7608 87 : regno_list[0] = regno_list[1] = -1;
7609 87 : int loaded_regnum = 0;
7610 87 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7611 :
7612 8091 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7613 8004 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7614 : {
7615 127 : if (aligned)
7616 : {
7617 45 : regno_list[loaded_regnum++] = regno;
7618 45 : if (loaded_regnum == 2)
7619 : {
7620 19 : gcc_assert (regno_list[0] != -1
7621 : && regno_list[1] != -1
7622 : && regno_list[0] != regno_list[1]);
7623 19 : const int offset = UNITS_PER_WORD * 2;
7624 19 : rtx mem = gen_rtx_MEM (TImode,
7625 19 : gen_rtx_PRE_DEC (Pmode,
7626 : stack_pointer_rtx));
7627 19 : insn = emit_insn (gen_push2 (mem,
7628 : gen_rtx_REG (word_mode,
7629 : regno_list[0]),
7630 : gen_rtx_REG (word_mode,
7631 : regno_list[1]),
7632 : use_ppx));
7633 19 : RTX_FRAME_RELATED_P (insn) = 1;
7634 19 : rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7635 :
7636 57 : for (int i = 0; i < 2; i++)
7637 : {
7638 76 : rtx dwarf_reg = gen_rtx_REG (word_mode,
7639 38 : regno_list[i]);
7640 38 : rtx sp_offset = plus_constant (Pmode,
7641 : stack_pointer_rtx,
7642 38 : + UNITS_PER_WORD
7643 38 : * (1 - i));
7644 38 : rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7645 : sp_offset),
7646 : dwarf_reg);
7647 38 : RTX_FRAME_RELATED_P (tmp) = 1;
7648 38 : XVECEXP (dwarf, 0, i + 1) = tmp;
7649 : }
7650 19 : rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7651 : plus_constant (Pmode,
7652 : stack_pointer_rtx,
7653 : -offset));
7654 19 : RTX_FRAME_RELATED_P (sp_tmp) = 1;
7655 19 : XVECEXP (dwarf, 0, 0) = sp_tmp;
7656 19 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7657 :
7658 19 : loaded_regnum = 0;
7659 19 : regno_list[0] = regno_list[1] = -1;
7660 : }
7661 : }
7662 : else
7663 : {
7664 82 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7665 : use_ppx));
7666 82 : RTX_FRAME_RELATED_P (insn) = 1;
7667 82 : aligned = true;
7668 : }
7669 : }
7670 87 : if (loaded_regnum == 1)
7671 : {
7672 7 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7673 7 : regno_list[0]),
7674 : use_ppx));
7675 7 : RTX_FRAME_RELATED_P (insn) = 1;
7676 : }
7677 : }
7678 427594 : }
7679 :
7680 : /* Emit a single register save at CFA - CFA_OFFSET. */
7681 :
7682 : static void
7683 612248 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7684 : HOST_WIDE_INT cfa_offset)
7685 : {
7686 612248 : struct machine_function *m = cfun->machine;
7687 612248 : rtx reg = gen_rtx_REG (mode, regno);
7688 612248 : rtx mem, addr, base, insn;
7689 612248 : unsigned int align = GET_MODE_ALIGNMENT (mode);
7690 :
7691 612248 : addr = choose_baseaddr (cfa_offset, &align);
7692 612248 : mem = gen_frame_mem (mode, addr);
7693 :
7694 : /* The location aligment depends upon the base register. */
7695 612248 : align = MIN (GET_MODE_ALIGNMENT (mode), align);
7696 612248 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7697 612248 : set_mem_align (mem, align);
7698 :
7699 612248 : insn = emit_insn (gen_rtx_SET (mem, reg));
7700 612248 : RTX_FRAME_RELATED_P (insn) = 1;
7701 :
7702 612248 : base = addr;
7703 612248 : if (GET_CODE (base) == PLUS)
7704 600442 : base = XEXP (base, 0);
7705 612248 : gcc_checking_assert (REG_P (base));
7706 :
7707 : /* When saving registers into a re-aligned local stack frame, avoid
7708 : any tricky guessing by dwarf2out. */
7709 612248 : if (m->fs.realigned)
7710 : {
7711 12800 : gcc_checking_assert (stack_realign_drap);
7712 :
7713 12800 : if (regno == REGNO (crtl->drap_reg))
7714 : {
7715 : /* A bit of a hack. We force the DRAP register to be saved in
7716 : the re-aligned stack frame, which provides us with a copy
7717 : of the CFA that will last past the prologue. Install it. */
7718 0 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7719 0 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7720 0 : cfun->machine->fs.fp_offset - cfa_offset);
7721 0 : mem = gen_rtx_MEM (mode, addr);
7722 0 : add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7723 : }
7724 : else
7725 : {
7726 : /* The frame pointer is a stable reference within the
7727 : aligned frame. Use it. */
7728 12800 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7729 12800 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7730 12800 : cfun->machine->fs.fp_offset - cfa_offset);
7731 12800 : mem = gen_rtx_MEM (mode, addr);
7732 12800 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7733 : }
7734 : }
7735 :
7736 599448 : else if (base == stack_pointer_rtx && m->fs.sp_realigned
7737 12881 : && cfa_offset >= m->fs.sp_realigned_offset)
7738 : {
7739 12881 : gcc_checking_assert (stack_realign_fp);
7740 12881 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7741 : }
7742 :
7743 : /* The memory may not be relative to the current CFA register,
7744 : which means that we may need to generate a new pattern for
7745 : use by the unwind info. */
7746 586567 : else if (base != m->fs.cfa_reg)
7747 : {
7748 45078 : addr = plus_constant (Pmode, m->fs.cfa_reg,
7749 45078 : m->fs.cfa_offset - cfa_offset);
7750 45078 : mem = gen_rtx_MEM (mode, addr);
7751 45078 : add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7752 : }
7753 612248 : }
7754 :
7755 : /* Emit code to save registers using MOV insns.
7756 : First register is stored at CFA - CFA_OFFSET. */
7757 : static void
7758 44814 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7759 : {
7760 44814 : unsigned int regno;
7761 :
7762 4167702 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7763 4122888 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7764 : {
7765 : /* Skip registers, already processed by shrink wrap separate. */
7766 189107 : if (!cfun->machine->reg_is_wrapped_separately[regno])
7767 84107 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7768 203862 : cfa_offset -= UNITS_PER_WORD;
7769 : }
7770 44814 : }
7771 :
7772 : /* Emit code to save SSE registers using MOV insns.
7773 : First register is stored at CFA - CFA_OFFSET. */
7774 : static void
7775 33353 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7776 : {
7777 33353 : unsigned int regno;
7778 :
7779 3101829 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7780 3068476 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7781 : {
7782 333557 : ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7783 333557 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
7784 : }
7785 33353 : }
7786 :
7787 : static GTY(()) rtx queued_cfa_restores;
7788 :
7789 : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7790 : manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7791 : Don't add the note if the previously saved value will be left untouched
7792 : within stack red-zone till return, as unwinders can find the same value
7793 : in the register and on the stack. */
7794 :
7795 : static void
7796 2281132 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7797 : {
7798 2281132 : if (!crtl->shrink_wrapped
7799 2262555 : && cfa_offset <= cfun->machine->fs.red_zone_offset)
7800 : return;
7801 :
7802 770800 : if (insn)
7803 : {
7804 360544 : add_reg_note (insn, REG_CFA_RESTORE, reg);
7805 360544 : RTX_FRAME_RELATED_P (insn) = 1;
7806 : }
7807 : else
7808 410256 : queued_cfa_restores
7809 410256 : = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7810 : }
7811 :
7812 : /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7813 :
7814 : static void
7815 2548646 : ix86_add_queued_cfa_restore_notes (rtx insn)
7816 : {
7817 2548646 : rtx last;
7818 2548646 : if (!queued_cfa_restores)
7819 : return;
7820 410256 : for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7821 : ;
7822 52910 : XEXP (last, 1) = REG_NOTES (insn);
7823 52910 : REG_NOTES (insn) = queued_cfa_restores;
7824 52910 : queued_cfa_restores = NULL_RTX;
7825 52910 : RTX_FRAME_RELATED_P (insn) = 1;
7826 : }
7827 :
7828 : /* Expand prologue or epilogue stack adjustment.
7829 : The pattern exist to put a dependency on all ebp-based memory accesses.
7830 : STYLE should be negative if instructions should be marked as frame related,
7831 : zero if %r11 register is live and cannot be freely used and positive
7832 : otherwise. */
7833 :
7834 : static rtx
7835 1580874 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7836 : int style, bool set_cfa)
7837 : {
7838 1580874 : struct machine_function *m = cfun->machine;
7839 1580874 : rtx addend = offset;
7840 1580874 : rtx insn;
7841 1580874 : bool add_frame_related_expr = false;
7842 :
7843 1799320 : if (!x86_64_immediate_operand (offset, Pmode))
7844 : {
7845 : /* r11 is used by indirect sibcall return as well, set before the
7846 : epilogue and used after the epilogue. */
7847 199 : if (style)
7848 174 : addend = gen_rtx_REG (Pmode, R11_REG);
7849 : else
7850 : {
7851 25 : gcc_assert (src != hard_frame_pointer_rtx
7852 : && dest != hard_frame_pointer_rtx);
7853 : addend = hard_frame_pointer_rtx;
7854 : }
7855 199 : emit_insn (gen_rtx_SET (addend, offset));
7856 199 : if (style < 0)
7857 88 : add_frame_related_expr = true;
7858 : }
7859 :
7860 : /* Shrink wrap separate may insert prologue between TEST and JMP. In order
7861 : not to affect EFlags, emit add without reg clobbering. */
7862 1580874 : if (crtl->shrink_wrapped_separate)
7863 94806 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
7864 94806 : (Pmode, dest, src, addend));
7865 : else
7866 1486068 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7867 1486068 : (Pmode, dest, src, addend));
7868 :
7869 1580874 : if (style >= 0)
7870 697338 : ix86_add_queued_cfa_restore_notes (insn);
7871 :
7872 1580874 : if (set_cfa)
7873 : {
7874 1219823 : rtx r;
7875 :
7876 1219823 : gcc_assert (m->fs.cfa_reg == src);
7877 1219823 : m->fs.cfa_offset += INTVAL (offset);
7878 1219823 : m->fs.cfa_reg = dest;
7879 :
7880 1415908 : r = gen_rtx_PLUS (Pmode, src, offset);
7881 1219823 : r = gen_rtx_SET (dest, r);
7882 1219823 : add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7883 1219823 : RTX_FRAME_RELATED_P (insn) = 1;
7884 : }
7885 361051 : else if (style < 0)
7886 : {
7887 294534 : RTX_FRAME_RELATED_P (insn) = 1;
7888 294534 : if (add_frame_related_expr)
7889 : {
7890 20 : rtx r = gen_rtx_PLUS (Pmode, src, offset);
7891 20 : r = gen_rtx_SET (dest, r);
7892 20 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7893 : }
7894 : }
7895 :
7896 1580874 : if (dest == stack_pointer_rtx)
7897 : {
7898 1580874 : HOST_WIDE_INT ooffset = m->fs.sp_offset;
7899 1580874 : bool valid = m->fs.sp_valid;
7900 1580874 : bool realigned = m->fs.sp_realigned;
7901 :
7902 1580874 : if (src == hard_frame_pointer_rtx)
7903 : {
7904 29768 : valid = m->fs.fp_valid;
7905 29768 : realigned = false;
7906 29768 : ooffset = m->fs.fp_offset;
7907 : }
7908 1551106 : else if (src == crtl->drap_reg)
7909 : {
7910 0 : valid = m->fs.drap_valid;
7911 0 : realigned = false;
7912 0 : ooffset = 0;
7913 : }
7914 : else
7915 : {
7916 : /* Else there are two possibilities: SP itself, which we set
7917 : up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7918 : taken care of this by hand along the eh_return path. */
7919 1551106 : gcc_checking_assert (src == stack_pointer_rtx
7920 : || offset == const0_rtx);
7921 : }
7922 :
7923 1580874 : m->fs.sp_offset = ooffset - INTVAL (offset);
7924 1580874 : m->fs.sp_valid = valid;
7925 1580874 : m->fs.sp_realigned = realigned;
7926 : }
7927 1580874 : return insn;
7928 : }
7929 :
7930 : /* Find an available register to be used as dynamic realign argument
7931 : pointer regsiter. Such a register will be written in prologue and
7932 : used in begin of body, so it must not be
7933 : 1. parameter passing register.
7934 : 2. GOT pointer.
7935 : We reuse static-chain register if it is available. Otherwise, we
7936 : use DI for i386 and R13 for x86-64. We chose R13 since it has
7937 : shorter encoding.
7938 :
7939 : Return: the regno of chosen register. */
7940 :
7941 : static unsigned int
7942 7300 : find_drap_reg (void)
7943 : {
7944 7300 : tree decl = cfun->decl;
7945 :
7946 : /* Always use callee-saved register if there are no caller-saved
7947 : registers. */
7948 7300 : if (TARGET_64BIT)
7949 : {
7950 : /* Use R13 for nested function or function need static chain.
7951 : Since function with tail call may use any caller-saved
7952 : registers in epilogue, DRAP must not use caller-saved
7953 : register in such case. */
7954 7015 : if (DECL_STATIC_CHAIN (decl)
7955 6973 : || (cfun->machine->call_saved_registers
7956 6973 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7957 13988 : || crtl->tail_call_emit)
7958 190 : return R13_REG;
7959 :
7960 : return R10_REG;
7961 : }
7962 : else
7963 : {
7964 : /* Use DI for nested function or function need static chain.
7965 : Since function with tail call may use any caller-saved
7966 : registers in epilogue, DRAP must not use caller-saved
7967 : register in such case. */
7968 285 : if (DECL_STATIC_CHAIN (decl)
7969 285 : || (cfun->machine->call_saved_registers
7970 285 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7971 285 : || crtl->tail_call_emit
7972 550 : || crtl->calls_eh_return)
7973 : return DI_REG;
7974 :
7975 : /* Reuse static chain register if it isn't used for parameter
7976 : passing. */
7977 265 : if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7978 : {
7979 265 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7980 265 : if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7981 : return CX_REG;
7982 : }
7983 0 : return DI_REG;
7984 : }
7985 : }
7986 :
7987 : /* Return minimum incoming stack alignment. */
7988 :
7989 : static unsigned int
7990 1615193 : ix86_minimum_incoming_stack_boundary (bool sibcall)
7991 : {
7992 1615193 : unsigned int incoming_stack_boundary;
7993 :
7994 : /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7995 1615193 : if (cfun->machine->func_type != TYPE_NORMAL)
7996 120 : incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7997 : /* Prefer the one specified at command line. */
7998 1615073 : else if (ix86_user_incoming_stack_boundary)
7999 : incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8000 : /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8001 : if -mstackrealign is used, it isn't used for sibcall check and
8002 : estimated stack alignment is 128bit. */
8003 1615051 : else if (!sibcall
8004 1480758 : && ix86_force_align_arg_pointer
8005 4574 : && crtl->stack_alignment_estimated == 128)
8006 596 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8007 : else
8008 1614455 : incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8009 :
8010 : /* Incoming stack alignment can be changed on individual functions
8011 : via force_align_arg_pointer attribute. We use the smallest
8012 : incoming stack boundary. */
8013 1615193 : if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8014 3229780 : && lookup_attribute ("force_align_arg_pointer",
8015 1614587 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8016 5708 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8017 :
8018 : /* The incoming stack frame has to be aligned at least at
8019 : parm_stack_boundary. */
8020 1615193 : if (incoming_stack_boundary < crtl->parm_stack_boundary)
8021 : incoming_stack_boundary = crtl->parm_stack_boundary;
8022 :
8023 : /* Stack at entrance of main is aligned by runtime. We use the
8024 : smallest incoming stack boundary. */
8025 1615193 : if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8026 140726 : && DECL_NAME (current_function_decl)
8027 140726 : && MAIN_NAME_P (DECL_NAME (current_function_decl))
8028 1617675 : && DECL_FILE_SCOPE_P (current_function_decl))
8029 2482 : incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8030 :
8031 1615193 : return incoming_stack_boundary;
8032 : }
8033 :
8034 : /* Update incoming stack boundary and estimated stack alignment. */
8035 :
8036 : static void
8037 1480895 : ix86_update_stack_boundary (void)
8038 : {
8039 1480895 : ix86_incoming_stack_boundary
8040 1480895 : = ix86_minimum_incoming_stack_boundary (false);
8041 :
8042 : /* x86_64 vararg needs 16byte stack alignment for register save area. */
8043 1480895 : if (TARGET_64BIT
8044 1354411 : && cfun->stdarg
8045 21367 : && crtl->stack_alignment_estimated < 128)
8046 10178 : crtl->stack_alignment_estimated = 128;
8047 :
8048 : /* __tls_get_addr needs to be called with 16-byte aligned stack. */
8049 1480895 : if (ix86_tls_descriptor_calls_expanded_in_cfun
8050 1072 : && crtl->preferred_stack_boundary < 128)
8051 745 : crtl->preferred_stack_boundary = 128;
8052 :
8053 : /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
8054 : are 32 bits, but if force_align_arg_pointer is specified, it should
8055 : prefer 128 bits for a backward-compatibility reason, which is also
8056 : what the doc suggests. */
8057 1480895 : if (lookup_attribute ("force_align_arg_pointer",
8058 1480895 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
8059 1480895 : && crtl->preferred_stack_boundary < 128)
8060 4 : crtl->preferred_stack_boundary = 128;
8061 1480895 : }
8062 :
8063 : /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8064 : needed or an rtx for DRAP otherwise. */
8065 :
8066 : static rtx
8067 1584222 : ix86_get_drap_rtx (void)
8068 : {
8069 : /* We must use DRAP if there are outgoing arguments on stack or
8070 : the stack pointer register is clobbered by asm statement and
8071 : ACCUMULATE_OUTGOING_ARGS is false. */
8072 1584222 : if (ix86_force_drap
8073 1584222 : || ((cfun->machine->outgoing_args_on_stack
8074 1252693 : || crtl->sp_is_clobbered_by_asm)
8075 329583 : && !ACCUMULATE_OUTGOING_ARGS))
8076 309391 : crtl->need_drap = true;
8077 :
8078 1584222 : if (stack_realign_drap)
8079 : {
8080 : /* Assign DRAP to vDRAP and returns vDRAP */
8081 7300 : unsigned int regno = find_drap_reg ();
8082 7300 : rtx drap_vreg;
8083 7300 : rtx arg_ptr;
8084 7300 : rtx_insn *seq, *insn;
8085 :
8086 7585 : arg_ptr = gen_rtx_REG (Pmode, regno);
8087 7300 : crtl->drap_reg = arg_ptr;
8088 :
8089 7300 : start_sequence ();
8090 7300 : drap_vreg = copy_to_reg (arg_ptr);
8091 7300 : seq = end_sequence ();
8092 :
8093 7300 : insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8094 7300 : if (!optimize)
8095 : {
8096 1894 : add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8097 1894 : RTX_FRAME_RELATED_P (insn) = 1;
8098 : }
8099 7300 : return drap_vreg;
8100 : }
8101 : else
8102 : return NULL;
8103 : }
8104 :
8105 : /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8106 :
8107 : static rtx
8108 1480896 : ix86_internal_arg_pointer (void)
8109 : {
8110 1480896 : return virtual_incoming_args_rtx;
8111 : }
8112 :
8113 : struct scratch_reg {
8114 : rtx reg;
8115 : bool saved;
8116 : };
8117 :
8118 : /* Return a short-lived scratch register for use on function entry.
8119 : In 32-bit mode, it is valid only after the registers are saved
8120 : in the prologue. This register must be released by means of
8121 : release_scratch_register_on_entry once it is dead. */
8122 :
8123 : static void
8124 25 : get_scratch_register_on_entry (struct scratch_reg *sr)
8125 : {
8126 25 : int regno;
8127 :
8128 25 : sr->saved = false;
8129 :
8130 25 : if (TARGET_64BIT)
8131 : {
8132 : /* We always use R11 in 64-bit mode. */
8133 : regno = R11_REG;
8134 : }
8135 : else
8136 : {
8137 0 : tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8138 0 : bool fastcall_p
8139 0 : = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8140 0 : bool thiscall_p
8141 0 : = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8142 0 : bool static_chain_p = DECL_STATIC_CHAIN (decl);
8143 0 : int regparm = ix86_function_regparm (fntype, decl);
8144 0 : int drap_regno
8145 0 : = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8146 :
8147 : /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8148 : for the static chain register. */
8149 0 : if ((regparm < 1 || (fastcall_p && !static_chain_p))
8150 0 : && drap_regno != AX_REG)
8151 : regno = AX_REG;
8152 : /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8153 : for the static chain register. */
8154 0 : else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8155 : regno = AX_REG;
8156 0 : else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8157 : regno = DX_REG;
8158 : /* ecx is the static chain register. */
8159 0 : else if (regparm < 3 && !fastcall_p && !thiscall_p
8160 0 : && !static_chain_p
8161 0 : && drap_regno != CX_REG)
8162 : regno = CX_REG;
8163 0 : else if (ix86_save_reg (BX_REG, true, false))
8164 : regno = BX_REG;
8165 : /* esi is the static chain register. */
8166 0 : else if (!(regparm == 3 && static_chain_p)
8167 0 : && ix86_save_reg (SI_REG, true, false))
8168 : regno = SI_REG;
8169 0 : else if (ix86_save_reg (DI_REG, true, false))
8170 : regno = DI_REG;
8171 : else
8172 : {
8173 0 : regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8174 0 : sr->saved = true;
8175 : }
8176 : }
8177 :
8178 25 : sr->reg = gen_rtx_REG (Pmode, regno);
8179 25 : if (sr->saved)
8180 : {
8181 0 : rtx_insn *insn = emit_insn (gen_push (sr->reg));
8182 0 : RTX_FRAME_RELATED_P (insn) = 1;
8183 : }
8184 25 : }
8185 :
8186 : /* Release a scratch register obtained from the preceding function.
8187 :
8188 : If RELEASE_VIA_POP is true, we just pop the register off the stack
8189 : to release it. This is what non-Linux systems use with -fstack-check.
8190 :
8191 : Otherwise we use OFFSET to locate the saved register and the
8192 : allocated stack space becomes part of the local frame and is
8193 : deallocated by the epilogue. */
8194 :
8195 : static void
8196 25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8197 : bool release_via_pop)
8198 : {
8199 25 : if (sr->saved)
8200 : {
8201 0 : if (release_via_pop)
8202 : {
8203 0 : struct machine_function *m = cfun->machine;
8204 0 : rtx x, insn = emit_insn (gen_pop (sr->reg));
8205 :
8206 : /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8207 0 : RTX_FRAME_RELATED_P (insn) = 1;
8208 0 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8209 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
8210 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8211 0 : m->fs.sp_offset -= UNITS_PER_WORD;
8212 : }
8213 : else
8214 : {
8215 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8216 0 : x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8217 0 : emit_insn (x);
8218 : }
8219 : }
8220 25 : }
8221 :
8222 : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8223 :
8224 : If INT_REGISTERS_SAVED is true, then integer registers have already been
8225 : pushed on the stack.
8226 :
8227 : If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8228 : beyond SIZE bytes.
8229 :
8230 : This assumes no knowledge of the current probing state, i.e. it is never
8231 : allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8232 : a suitable probe. */
8233 :
8234 : static void
8235 127 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8236 : const bool int_registers_saved,
8237 : const bool protection_area)
8238 : {
8239 127 : struct machine_function *m = cfun->machine;
8240 :
8241 : /* If this function does not statically allocate stack space, then
8242 : no probes are needed. */
8243 127 : if (!size)
8244 : {
8245 : /* However, the allocation of space via pushes for register
8246 : saves could be viewed as allocating space, but without the
8247 : need to probe. */
8248 43 : if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8249 23 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8250 : else
8251 20 : dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8252 43 : return;
8253 : }
8254 :
8255 : /* If we are a noreturn function, then we have to consider the
8256 : possibility that we're called via a jump rather than a call.
8257 :
8258 : Thus we don't have the implicit probe generated by saving the
8259 : return address into the stack at the call. Thus, the stack
8260 : pointer could be anywhere in the guard page. The safe thing
8261 : to do is emit a probe now.
8262 :
8263 : The probe can be avoided if we have already emitted any callee
8264 : register saves into the stack or have a frame pointer (which will
8265 : have been saved as well). Those saves will function as implicit
8266 : probes.
8267 :
8268 : ?!? This should be revamped to work like aarch64 and s390 where
8269 : we track the offset from the most recent probe. Normally that
8270 : offset would be zero. For a noreturn function we would reset
8271 : it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8272 : we just probe when we cross PROBE_INTERVAL. */
8273 84 : if (TREE_THIS_VOLATILE (cfun->decl)
8274 15 : && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8275 : {
8276 : /* We can safely use any register here since we're just going to push
8277 : its value and immediately pop it back. But we do try and avoid
8278 : argument passing registers so as not to introduce dependencies in
8279 : the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8280 15 : rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8281 15 : rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8282 15 : rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8283 15 : m->fs.sp_offset -= UNITS_PER_WORD;
8284 15 : if (m->fs.cfa_reg == stack_pointer_rtx)
8285 : {
8286 15 : m->fs.cfa_offset -= UNITS_PER_WORD;
8287 15 : rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8288 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8289 15 : add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8290 15 : RTX_FRAME_RELATED_P (insn_push) = 1;
8291 15 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8292 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8293 15 : add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8294 15 : RTX_FRAME_RELATED_P (insn_pop) = 1;
8295 : }
8296 15 : emit_insn (gen_blockage ());
8297 : }
8298 :
8299 84 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8300 84 : const int dope = 4 * UNITS_PER_WORD;
8301 :
8302 : /* If there is protection area, take it into account in the size. */
8303 84 : if (protection_area)
8304 25 : size += probe_interval + dope;
8305 :
8306 : /* If we allocate less than the size of the guard statically,
8307 : then no probing is necessary, but we do need to allocate
8308 : the stack. */
8309 59 : else if (size < (1 << param_stack_clash_protection_guard_size))
8310 : {
8311 38 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8312 : GEN_INT (-size), -1,
8313 38 : m->fs.cfa_reg == stack_pointer_rtx);
8314 38 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8315 38 : return;
8316 : }
8317 :
8318 : /* We're allocating a large enough stack frame that we need to
8319 : emit probes. Either emit them inline or in a loop depending
8320 : on the size. */
8321 46 : if (size <= 4 * probe_interval)
8322 : {
8323 : HOST_WIDE_INT i;
8324 49 : for (i = probe_interval; i <= size; i += probe_interval)
8325 : {
8326 : /* Allocate PROBE_INTERVAL bytes. */
8327 28 : rtx insn
8328 28 : = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8329 : GEN_INT (-probe_interval), -1,
8330 28 : m->fs.cfa_reg == stack_pointer_rtx);
8331 28 : add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8332 :
8333 : /* And probe at *sp. */
8334 28 : emit_stack_probe (stack_pointer_rtx);
8335 28 : emit_insn (gen_blockage ());
8336 : }
8337 :
8338 : /* We need to allocate space for the residual, but we do not need
8339 : to probe the residual... */
8340 21 : HOST_WIDE_INT residual = (i - probe_interval - size);
8341 21 : if (residual)
8342 : {
8343 21 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8344 : GEN_INT (residual), -1,
8345 21 : m->fs.cfa_reg == stack_pointer_rtx);
8346 :
8347 : /* ...except if there is a protection area to maintain. */
8348 21 : if (protection_area)
8349 12 : emit_stack_probe (stack_pointer_rtx);
8350 : }
8351 :
8352 21 : dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8353 : }
8354 : else
8355 : {
8356 : /* We expect the GP registers to be saved when probes are used
8357 : as the probing sequences might need a scratch register and
8358 : the routine to allocate one assumes the integer registers
8359 : have already been saved. */
8360 25 : gcc_assert (int_registers_saved);
8361 :
8362 25 : struct scratch_reg sr;
8363 25 : get_scratch_register_on_entry (&sr);
8364 :
8365 : /* If we needed to save a register, then account for any space
8366 : that was pushed (we are not going to pop the register when
8367 : we do the restore). */
8368 25 : if (sr.saved)
8369 0 : size -= UNITS_PER_WORD;
8370 :
8371 : /* Step 1: round SIZE down to a multiple of the interval. */
8372 25 : HOST_WIDE_INT rounded_size = size & -probe_interval;
8373 :
8374 : /* Step 2: compute final value of the loop counter. Use lea if
8375 : possible. */
8376 25 : rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8377 25 : rtx insn;
8378 25 : if (address_no_seg_operand (addr, Pmode))
8379 13 : insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8380 : else
8381 : {
8382 12 : emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8383 12 : insn = emit_insn (gen_rtx_SET (sr.reg,
8384 : gen_rtx_PLUS (Pmode, sr.reg,
8385 : stack_pointer_rtx)));
8386 : }
8387 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8388 : {
8389 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8390 22 : plus_constant (Pmode, sr.reg,
8391 22 : m->fs.cfa_offset + rounded_size));
8392 22 : RTX_FRAME_RELATED_P (insn) = 1;
8393 : }
8394 :
8395 : /* Step 3: the loop. */
8396 25 : rtx size_rtx = GEN_INT (rounded_size);
8397 25 : insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8398 : size_rtx));
8399 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8400 : {
8401 22 : m->fs.cfa_offset += rounded_size;
8402 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8403 22 : plus_constant (Pmode, stack_pointer_rtx,
8404 22 : m->fs.cfa_offset));
8405 22 : RTX_FRAME_RELATED_P (insn) = 1;
8406 : }
8407 25 : m->fs.sp_offset += rounded_size;
8408 25 : emit_insn (gen_blockage ());
8409 :
8410 : /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8411 : is equal to ROUNDED_SIZE. */
8412 :
8413 25 : if (size != rounded_size)
8414 : {
8415 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8416 : GEN_INT (rounded_size - size), -1,
8417 25 : m->fs.cfa_reg == stack_pointer_rtx);
8418 :
8419 25 : if (protection_area)
8420 13 : emit_stack_probe (stack_pointer_rtx);
8421 : }
8422 :
8423 25 : dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8424 :
8425 : /* This does not deallocate the space reserved for the scratch
8426 : register. That will be deallocated in the epilogue. */
8427 25 : release_scratch_register_on_entry (&sr, size, false);
8428 : }
8429 :
8430 : /* Adjust back to account for the protection area. */
8431 46 : if (protection_area)
8432 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8433 25 : GEN_INT (probe_interval + dope), -1,
8434 25 : m->fs.cfa_reg == stack_pointer_rtx);
8435 :
8436 : /* Make sure nothing is scheduled before we are done. */
8437 46 : emit_insn (gen_blockage ());
8438 : }
8439 :
8440 : /* Adjust the stack pointer up to REG while probing it. */
8441 :
8442 : const char *
8443 25 : output_adjust_stack_and_probe (rtx reg)
8444 : {
8445 25 : static int labelno = 0;
8446 25 : char loop_lab[32];
8447 25 : rtx xops[2];
8448 :
8449 25 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8450 :
8451 : /* Loop. */
8452 25 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8453 :
8454 : /* SP = SP + PROBE_INTERVAL. */
8455 25 : xops[0] = stack_pointer_rtx;
8456 37 : xops[1] = GEN_INT (get_probe_interval ());
8457 25 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8458 :
8459 : /* Probe at SP. */
8460 25 : xops[1] = const0_rtx;
8461 25 : output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
8462 :
8463 : /* Test if SP == LAST_ADDR. */
8464 25 : xops[0] = stack_pointer_rtx;
8465 25 : xops[1] = reg;
8466 25 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8467 :
8468 : /* Branch. */
8469 25 : fputs ("\tjne\t", asm_out_file);
8470 25 : assemble_name_raw (asm_out_file, loop_lab);
8471 25 : fputc ('\n', asm_out_file);
8472 :
8473 25 : return "";
8474 : }
8475 :
8476 : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8477 : inclusive. These are offsets from the current stack pointer.
8478 :
8479 : INT_REGISTERS_SAVED is true if integer registers have already been
8480 : pushed on the stack. */
8481 :
8482 : static void
8483 0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8484 : const bool int_registers_saved)
8485 : {
8486 0 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8487 :
8488 : /* See if we have a constant small number of probes to generate. If so,
8489 : that's the easy case. The run-time loop is made up of 6 insns in the
8490 : generic case while the compile-time loop is made up of n insns for n #
8491 : of intervals. */
8492 0 : if (size <= 6 * probe_interval)
8493 : {
8494 : HOST_WIDE_INT i;
8495 :
8496 : /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8497 : it exceeds SIZE. If only one probe is needed, this will not
8498 : generate any code. Then probe at FIRST + SIZE. */
8499 0 : for (i = probe_interval; i < size; i += probe_interval)
8500 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8501 0 : -(first + i)));
8502 :
8503 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8504 0 : -(first + size)));
8505 : }
8506 :
8507 : /* Otherwise, do the same as above, but in a loop. Note that we must be
8508 : extra careful with variables wrapping around because we might be at
8509 : the very top (or the very bottom) of the address space and we have
8510 : to be able to handle this case properly; in particular, we use an
8511 : equality test for the loop condition. */
8512 : else
8513 : {
8514 : /* We expect the GP registers to be saved when probes are used
8515 : as the probing sequences might need a scratch register and
8516 : the routine to allocate one assumes the integer registers
8517 : have already been saved. */
8518 0 : gcc_assert (int_registers_saved);
8519 :
8520 0 : HOST_WIDE_INT rounded_size, last;
8521 0 : struct scratch_reg sr;
8522 :
8523 0 : get_scratch_register_on_entry (&sr);
8524 :
8525 :
8526 : /* Step 1: round SIZE to the previous multiple of the interval. */
8527 :
8528 0 : rounded_size = ROUND_DOWN (size, probe_interval);
8529 :
8530 :
8531 : /* Step 2: compute initial and final value of the loop counter. */
8532 :
8533 : /* TEST_OFFSET = FIRST. */
8534 0 : emit_move_insn (sr.reg, GEN_INT (-first));
8535 :
8536 : /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8537 0 : last = first + rounded_size;
8538 :
8539 :
8540 : /* Step 3: the loop
8541 :
8542 : do
8543 : {
8544 : TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8545 : probe at TEST_ADDR
8546 : }
8547 : while (TEST_ADDR != LAST_ADDR)
8548 :
8549 : probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8550 : until it is equal to ROUNDED_SIZE. */
8551 :
8552 0 : emit_insn
8553 0 : (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8554 :
8555 :
8556 : /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8557 : that SIZE is equal to ROUNDED_SIZE. */
8558 :
8559 0 : if (size != rounded_size)
8560 0 : emit_stack_probe (plus_constant (Pmode,
8561 0 : gen_rtx_PLUS (Pmode,
8562 : stack_pointer_rtx,
8563 : sr.reg),
8564 0 : rounded_size - size));
8565 :
8566 0 : release_scratch_register_on_entry (&sr, size, true);
8567 : }
8568 :
8569 : /* Make sure nothing is scheduled before we are done. */
8570 0 : emit_insn (gen_blockage ());
8571 0 : }
8572 :
8573 : /* Probe a range of stack addresses from REG to END, inclusive. These are
8574 : offsets from the current stack pointer. */
8575 :
8576 : const char *
8577 0 : output_probe_stack_range (rtx reg, rtx end)
8578 : {
8579 0 : static int labelno = 0;
8580 0 : char loop_lab[32];
8581 0 : rtx xops[3];
8582 :
8583 0 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8584 :
8585 : /* Loop. */
8586 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8587 :
8588 : /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8589 0 : xops[0] = reg;
8590 0 : xops[1] = GEN_INT (get_probe_interval ());
8591 0 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8592 :
8593 : /* Probe at TEST_ADDR. */
8594 0 : xops[0] = stack_pointer_rtx;
8595 0 : xops[1] = reg;
8596 0 : xops[2] = const0_rtx;
8597 0 : output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
8598 :
8599 : /* Test if TEST_ADDR == LAST_ADDR. */
8600 0 : xops[0] = reg;
8601 0 : xops[1] = end;
8602 0 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8603 :
8604 : /* Branch. */
8605 0 : fputs ("\tjne\t", asm_out_file);
8606 0 : assemble_name_raw (asm_out_file, loop_lab);
8607 0 : fputc ('\n', asm_out_file);
8608 :
8609 0 : return "";
8610 : }
8611 :
8612 : /* Data passed to ix86_update_stack_alignment. */
8613 : struct stack_access_data
8614 : {
8615 : /* The stack access register. */
8616 : const_rtx reg;
8617 : /* Pointer to stack alignment. */
8618 : unsigned int *stack_alignment;
8619 : };
8620 :
8621 : /* Return true if OP references an argument passed on stack. */
8622 :
8623 : static bool
8624 135374 : ix86_argument_passed_on_stack_p (const_rtx op)
8625 : {
8626 135374 : tree mem_expr = MEM_EXPR (op);
8627 135374 : if (mem_expr)
8628 : {
8629 133507 : tree var = get_base_address (mem_expr);
8630 133507 : return TREE_CODE (var) == PARM_DECL;
8631 : }
8632 : return false;
8633 : }
8634 :
8635 : /* Update the maximum stack slot alignment from memory alignment in PAT. */
8636 :
8637 : static void
8638 169625 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8639 : {
8640 : /* This insn may reference stack slot. Update the maximum stack slot
8641 : alignment if the memory is referenced by the stack access register. */
8642 169625 : stack_access_data *p = (stack_access_data *) data;
8643 :
8644 169625 : subrtx_iterator::array_type array;
8645 709369 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
8646 : {
8647 568389 : auto op = *iter;
8648 568389 : if (MEM_P (op))
8649 : {
8650 : /* NB: Ignore arguments passed on stack since caller is
8651 : responsible to align the outgoing stack for arguments
8652 : passed on stack. */
8653 166276 : if (reg_mentioned_p (p->reg, XEXP (op, 0))
8654 166276 : && !ix86_argument_passed_on_stack_p (op))
8655 : {
8656 28645 : unsigned int alignment = MEM_ALIGN (op);
8657 :
8658 28645 : if (alignment > *p->stack_alignment)
8659 28564 : *p->stack_alignment = alignment;
8660 : break;
8661 : }
8662 : else
8663 137631 : iter.skip_subrtxes ();
8664 : }
8665 : }
8666 169625 : }
8667 :
8668 : /* Helper function for ix86_find_all_reg_uses. */
8669 :
8670 : static void
8671 45351149 : ix86_find_all_reg_uses_1 (HARD_REG_SET ®set,
8672 : rtx set, unsigned int regno,
8673 : auto_bitmap &worklist)
8674 : {
8675 45351149 : rtx dest = SET_DEST (set);
8676 :
8677 45351149 : if (!REG_P (dest))
8678 41085370 : return;
8679 :
8680 : /* Reject non-Pmode modes. */
8681 34323346 : if (GET_MODE (dest) != Pmode)
8682 : return;
8683 :
8684 18136988 : unsigned int dst_regno = REGNO (dest);
8685 :
8686 18136988 : if (TEST_HARD_REG_BIT (regset, dst_regno))
8687 : return;
8688 :
8689 4265779 : const_rtx src = SET_SRC (set);
8690 :
8691 4265779 : subrtx_iterator::array_type array;
8692 8473362 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8693 : {
8694 5470908 : auto op = *iter;
8695 :
8696 5470908 : if (MEM_P (op))
8697 2981982 : iter.skip_subrtxes ();
8698 :
8699 5470908 : if (REG_P (op) && REGNO (op) == regno)
8700 : {
8701 : /* Add this register to register set. */
8702 1431691 : add_to_hard_reg_set (®set, Pmode, dst_regno);
8703 1263325 : bitmap_set_bit (worklist, dst_regno);
8704 1263325 : break;
8705 : }
8706 : }
8707 4265779 : }
8708 :
8709 : /* Find all registers defined with register REGNO. */
8710 :
8711 : static void
8712 2279722 : ix86_find_all_reg_uses (HARD_REG_SET ®set,
8713 : unsigned int regno, auto_bitmap &worklist)
8714 : {
8715 2279722 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8716 81621470 : ref != NULL;
8717 79341748 : ref = DF_REF_NEXT_REG (ref))
8718 : {
8719 79341748 : if (DF_REF_IS_ARTIFICIAL (ref))
8720 16580437 : continue;
8721 :
8722 62761311 : rtx_insn *insn = DF_REF_INSN (ref);
8723 :
8724 62761311 : if (!NONJUMP_INSN_P (insn))
8725 18065107 : continue;
8726 :
8727 44696204 : unsigned int ref_regno = DF_REF_REGNO (ref);
8728 :
8729 44696204 : rtx set = single_set (insn);
8730 44696204 : if (set)
8731 : {
8732 43927503 : ix86_find_all_reg_uses_1 (regset, set,
8733 : ref_regno, worklist);
8734 43927503 : continue;
8735 : }
8736 :
8737 768701 : rtx pat = PATTERN (insn);
8738 768701 : if (GET_CODE (pat) != PARALLEL)
8739 123512 : continue;
8740 :
8741 2490932 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8742 : {
8743 1845743 : rtx exp = XVECEXP (pat, 0, i);
8744 :
8745 1845743 : if (GET_CODE (exp) == SET)
8746 1423646 : ix86_find_all_reg_uses_1 (regset, exp,
8747 : ref_regno, worklist);
8748 : }
8749 : }
8750 2279722 : }
8751 :
8752 : /* Return true if the hard register REGNO used for a stack access is
8753 : defined in a basic block that dominates the block where it is used. */
8754 :
8755 : static bool
8756 40356 : ix86_access_stack_p (unsigned int regno, basic_block bb,
8757 : HARD_REG_SET &set_up_by_prologue,
8758 : HARD_REG_SET &prologue_used,
8759 : auto_bitmap reg_dominate_bbs_known[],
8760 : auto_bitmap reg_dominate_bbs[])
8761 : {
8762 40356 : if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
8763 11319 : return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
8764 :
8765 29037 : bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
8766 :
8767 : /* Get all BBs which set REGNO and dominate the current BB from all
8768 : DEFs of REGNO. */
8769 29037 : for (df_ref def = DF_REG_DEF_CHAIN (regno);
8770 1618002 : def;
8771 1588965 : def = DF_REF_NEXT_REG (def))
8772 1616481 : if (!DF_REF_IS_ARTIFICIAL (def)
8773 1614659 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
8774 1586425 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
8775 : {
8776 1584492 : basic_block set_bb = DF_REF_BB (def);
8777 1584492 : if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
8778 : {
8779 88686 : rtx_insn *insn = DF_REF_INSN (def);
8780 : /* Return true if INSN requires stack. */
8781 88686 : if (requires_stack_frame_p (insn, prologue_used,
8782 : set_up_by_prologue))
8783 : {
8784 27516 : bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
8785 27516 : return true;
8786 : }
8787 : }
8788 : }
8789 :
8790 : /* When we get here, REGNO used in the current BB doesn't access
8791 : stack. */
8792 : return false;
8793 : }
8794 :
8795 : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
8796 : needs alignment > ALIGNMENT. */
8797 :
8798 : static bool
8799 27722044 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
8800 : {
8801 27722044 : bool need_alignment = MEM_ALIGN (op) > alignment;
8802 27722044 : tree mem_expr = MEM_EXPR (op);
8803 27722044 : if (!mem_expr)
8804 : return need_alignment;
8805 :
8806 22665700 : tree var = get_base_address (mem_expr);
8807 22665700 : if (!VAR_P (var) || !DECL_RTL_SET_P (var))
8808 : return need_alignment;
8809 :
8810 14357091 : rtx x = DECL_RTL (var);
8811 14357091 : if (!MEM_P (x))
8812 : return need_alignment;
8813 :
8814 14357088 : x = XEXP (x, 0);
8815 14357088 : return !SYMBOLIC_CONST (x) && need_alignment;
8816 : }
8817 :
8818 : /* Return true if SET needs alignment > ALIGNMENT. */
8819 :
8820 : static bool
8821 45350463 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
8822 : {
8823 45350463 : rtx dest = SET_DEST (set);
8824 :
8825 45350463 : if (MEM_P (dest))
8826 17183293 : return ix86_need_alignment_p_2 (dest, alignment);
8827 :
8828 28167170 : const_rtx src = SET_SRC (set);
8829 :
8830 28167170 : subrtx_iterator::array_type array;
8831 81435580 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8832 : {
8833 63807161 : auto op = *iter;
8834 :
8835 63807161 : if (MEM_P (op))
8836 10538751 : return ix86_need_alignment_p_2 (op, alignment);
8837 : }
8838 :
8839 17628419 : return false;
8840 28167170 : }
8841 :
8842 : /* Return true if INSN needs alignment > ALIGNMENT. */
8843 :
8844 : static bool
8845 44696204 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
8846 : {
8847 44696204 : rtx set = single_set (insn);
8848 44696204 : if (set)
8849 43927503 : return ix86_need_alignment_p_1 (set, alignment);
8850 :
8851 768701 : rtx pat = PATTERN (insn);
8852 768701 : if (GET_CODE (pat) != PARALLEL)
8853 : return false;
8854 :
8855 2489281 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8856 : {
8857 1844861 : rtx exp = XVECEXP (pat, 0, i);
8858 :
8859 1844861 : if (GET_CODE (exp) == SET
8860 1844861 : && ix86_need_alignment_p_1 (exp, alignment))
8861 : return true;
8862 : }
8863 :
8864 : return false;
8865 : }
8866 :
8867 : /* Set stack_frame_required to false if stack frame isn't required.
8868 : Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8869 : slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8870 :
8871 : static void
8872 1480044 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8873 : bool check_stack_slot)
8874 : {
8875 1480044 : HARD_REG_SET set_up_by_prologue, prologue_used;
8876 1480044 : basic_block bb;
8877 :
8878 5920176 : CLEAR_HARD_REG_SET (prologue_used);
8879 1480044 : CLEAR_HARD_REG_SET (set_up_by_prologue);
8880 1606637 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8881 1480044 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8882 1480044 : add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8883 : HARD_FRAME_POINTER_REGNUM);
8884 :
8885 1480044 : bool require_stack_frame = false;
8886 :
8887 15862060 : FOR_EACH_BB_FN (bb, cfun)
8888 : {
8889 14382016 : rtx_insn *insn;
8890 90350328 : FOR_BB_INSNS (bb, insn)
8891 83835005 : if (NONDEBUG_INSN_P (insn)
8892 83835005 : && requires_stack_frame_p (insn, prologue_used,
8893 : set_up_by_prologue))
8894 : {
8895 : require_stack_frame = true;
8896 : break;
8897 : }
8898 : }
8899 :
8900 1480044 : cfun->machine->stack_frame_required = require_stack_frame;
8901 :
8902 : /* Stop if we don't need to check stack slot. */
8903 1480044 : if (!check_stack_slot)
8904 788861 : return;
8905 :
8906 : /* The preferred stack alignment is the minimum stack alignment. */
8907 691183 : if (stack_alignment > crtl->preferred_stack_boundary)
8908 143005 : stack_alignment = crtl->preferred_stack_boundary;
8909 :
8910 : HARD_REG_SET stack_slot_access;
8911 691183 : CLEAR_HARD_REG_SET (stack_slot_access);
8912 :
8913 : /* Stack slot can be accessed by stack pointer, frame pointer or
8914 : registers defined by stack pointer or frame pointer. */
8915 691183 : auto_bitmap worklist;
8916 :
8917 750693 : add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8918 691183 : bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8919 :
8920 691183 : if (frame_pointer_needed)
8921 : {
8922 334240 : add_to_hard_reg_set (&stack_slot_access, Pmode,
8923 : HARD_FRAME_POINTER_REGNUM);
8924 325214 : bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8925 : }
8926 :
8927 : /* Registers on HARD_STACK_SLOT_ACCESS always access stack. */
8928 691183 : HARD_REG_SET hard_stack_slot_access = stack_slot_access;
8929 :
8930 691183 : calculate_dominance_info (CDI_DOMINATORS);
8931 :
8932 2279722 : unsigned int regno;
8933 :
8934 2279722 : do
8935 : {
8936 2279722 : regno = bitmap_clear_first_set_bit (worklist);
8937 2279722 : ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
8938 : }
8939 2279722 : while (!bitmap_empty_p (worklist));
8940 :
8941 : hard_reg_set_iterator hrsi;
8942 : stack_access_data data;
8943 :
8944 127868855 : auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
8945 127868855 : auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
8946 :
8947 691183 : data.stack_alignment = &stack_alignment;
8948 :
8949 2970905 : EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8950 : {
8951 2279722 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8952 81621470 : ref != NULL;
8953 79341748 : ref = DF_REF_NEXT_REG (ref))
8954 : {
8955 79341748 : if (DF_REF_IS_ARTIFICIAL (ref))
8956 16580437 : continue;
8957 :
8958 62761311 : rtx_insn *insn = DF_REF_INSN (ref);
8959 :
8960 62761311 : if (!NONJUMP_INSN_P (insn))
8961 18065107 : continue;
8962 :
8963 : /* Call ix86_access_stack_p only if INSN needs alignment >
8964 : STACK_ALIGNMENT. */
8965 44696204 : if (ix86_need_alignment_p (insn, stack_alignment)
8966 44696204 : && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
8967 40356 : || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
8968 : set_up_by_prologue,
8969 : prologue_used,
8970 : reg_dominate_bbs_known,
8971 : reg_dominate_bbs)))
8972 : {
8973 : /* Update stack alignment if REGNO is used for stack
8974 : access. */
8975 162917 : data.reg = DF_REF_REG (ref);
8976 162917 : note_stores (insn, ix86_update_stack_alignment, &data);
8977 : }
8978 : }
8979 : }
8980 :
8981 691183 : free_dominance_info (CDI_DOMINATORS);
8982 129251221 : }
8983 :
8984 : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8985 : will guide prologue/epilogue to be generated in correct form. */
8986 :
8987 : static void
8988 3434721 : ix86_finalize_stack_frame_flags (void)
8989 : {
8990 : /* Check if stack realign is really needed after reload, and
8991 : stores result in cfun */
8992 3434721 : unsigned int incoming_stack_boundary
8993 3434721 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8994 3434721 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8995 3434721 : unsigned int stack_alignment
8996 1180946 : = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8997 4615667 : ? crtl->max_used_stack_slot_alignment
8998 3434721 : : crtl->stack_alignment_needed);
8999 3434721 : unsigned int stack_realign
9000 3434721 : = (incoming_stack_boundary < stack_alignment);
9001 3434721 : bool recompute_frame_layout_p = false;
9002 :
9003 3434721 : if (crtl->stack_realign_finalized)
9004 : {
9005 : /* After stack_realign_needed is finalized, we can't no longer
9006 : change it. */
9007 1954677 : gcc_assert (crtl->stack_realign_needed == stack_realign);
9008 1954677 : return;
9009 : }
9010 :
9011 : /* It is always safe to compute max_used_stack_alignment. We
9012 : compute it only if 128-bit aligned load/store may be generated
9013 : on misaligned stack slot which will lead to segfault. */
9014 2960088 : bool check_stack_slot
9015 1480044 : = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
9016 1480044 : ix86_find_max_used_stack_alignment (stack_alignment,
9017 : check_stack_slot);
9018 :
9019 : /* If the only reason for frame_pointer_needed is that we conservatively
9020 : assumed stack realignment might be needed or -fno-omit-frame-pointer
9021 : is used, but in the end nothing that needed the stack alignment had
9022 : been spilled nor stack access, clear frame_pointer_needed and say we
9023 : don't need stack realignment.
9024 :
9025 : When vector register is used for piecewise move and store, we don't
9026 : increase stack_alignment_needed as there is no register spill for
9027 : piecewise move and store. Since stack_realign_needed is set to true
9028 : by checking stack_alignment_estimated which is updated by pseudo
9029 : vector register usage, we also need to check stack_realign_needed to
9030 : eliminate frame pointer. */
9031 1480044 : if ((stack_realign
9032 1413749 : || (!flag_omit_frame_pointer && optimize)
9033 1403499 : || crtl->stack_realign_needed)
9034 77201 : && frame_pointer_needed
9035 77201 : && crtl->is_leaf
9036 52701 : && crtl->sp_is_unchanging
9037 52649 : && !ix86_current_function_calls_tls_descriptor
9038 52649 : && !crtl->accesses_prior_frames
9039 52649 : && !cfun->calls_alloca
9040 52649 : && !crtl->calls_eh_return
9041 : /* See ira_setup_eliminable_regset for the rationale. */
9042 52649 : && !(STACK_CHECK_MOVING_SP
9043 52649 : && flag_stack_check
9044 0 : && flag_exceptions
9045 0 : && cfun->can_throw_non_call_exceptions)
9046 52649 : && !ix86_frame_pointer_required ()
9047 52648 : && ix86_get_frame_size () == 0
9048 34945 : && ix86_nsaved_sseregs () == 0
9049 1514989 : && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9050 : {
9051 34945 : if (cfun->machine->stack_frame_required)
9052 : {
9053 : /* Stack frame is required. If stack alignment needed is less
9054 : than incoming stack boundary, don't realign stack. */
9055 278 : stack_realign = incoming_stack_boundary < stack_alignment;
9056 278 : if (!stack_realign)
9057 : {
9058 278 : crtl->max_used_stack_slot_alignment
9059 278 : = incoming_stack_boundary;
9060 278 : crtl->stack_alignment_needed
9061 278 : = incoming_stack_boundary;
9062 : /* Also update preferred_stack_boundary for leaf
9063 : functions. */
9064 278 : crtl->preferred_stack_boundary
9065 278 : = incoming_stack_boundary;
9066 : }
9067 : }
9068 : else
9069 : {
9070 : /* If drap has been set, but it actually isn't live at the
9071 : start of the function, there is no reason to set it up. */
9072 34667 : if (crtl->drap_reg)
9073 : {
9074 35 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9075 70 : if (! REGNO_REG_SET_P (DF_LR_IN (bb),
9076 : REGNO (crtl->drap_reg)))
9077 : {
9078 35 : crtl->drap_reg = NULL_RTX;
9079 35 : crtl->need_drap = false;
9080 : }
9081 : }
9082 : else
9083 34632 : cfun->machine->no_drap_save_restore = true;
9084 :
9085 34667 : frame_pointer_needed = false;
9086 34667 : stack_realign = false;
9087 34667 : crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9088 34667 : crtl->stack_alignment_needed = incoming_stack_boundary;
9089 34667 : crtl->stack_alignment_estimated = incoming_stack_boundary;
9090 34667 : if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9091 1 : crtl->preferred_stack_boundary = incoming_stack_boundary;
9092 34667 : df_finish_pass (true);
9093 34667 : df_scan_alloc (NULL);
9094 34667 : df_scan_blocks ();
9095 34667 : df_compute_regs_ever_live (true);
9096 34667 : df_analyze ();
9097 :
9098 34667 : if (flag_var_tracking)
9099 : {
9100 : /* Since frame pointer is no longer available, replace it with
9101 : stack pointer - UNITS_PER_WORD in debug insns. */
9102 133 : df_ref ref, next;
9103 133 : for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
9104 133 : ref; ref = next)
9105 : {
9106 0 : next = DF_REF_NEXT_REG (ref);
9107 0 : if (!DF_REF_INSN_INFO (ref))
9108 0 : continue;
9109 :
9110 : /* Make sure the next ref is for a different instruction,
9111 : so that we're not affected by the rescan. */
9112 0 : rtx_insn *insn = DF_REF_INSN (ref);
9113 0 : while (next && DF_REF_INSN (next) == insn)
9114 0 : next = DF_REF_NEXT_REG (next);
9115 :
9116 0 : if (DEBUG_INSN_P (insn))
9117 : {
9118 : bool changed = false;
9119 0 : for (; ref != next; ref = DF_REF_NEXT_REG (ref))
9120 : {
9121 0 : rtx *loc = DF_REF_LOC (ref);
9122 0 : if (*loc == hard_frame_pointer_rtx)
9123 : {
9124 0 : *loc = plus_constant (Pmode,
9125 : stack_pointer_rtx,
9126 0 : -UNITS_PER_WORD);
9127 0 : changed = true;
9128 : }
9129 : }
9130 0 : if (changed)
9131 0 : df_insn_rescan (insn);
9132 : }
9133 : }
9134 : }
9135 :
9136 : recompute_frame_layout_p = true;
9137 : }
9138 : }
9139 1445099 : else if (crtl->max_used_stack_slot_alignment >= 128
9140 655211 : && cfun->machine->stack_frame_required)
9141 : {
9142 : /* We don't need to realign stack. max_used_stack_alignment is
9143 : used to decide how stack frame should be aligned. This is
9144 : independent of any psABIs nor 32-bit vs 64-bit. */
9145 610007 : cfun->machine->max_used_stack_alignment
9146 610007 : = stack_alignment / BITS_PER_UNIT;
9147 : }
9148 :
9149 1480044 : if (crtl->stack_realign_needed != stack_realign)
9150 35178 : recompute_frame_layout_p = true;
9151 1480044 : crtl->stack_realign_needed = stack_realign;
9152 1480044 : crtl->stack_realign_finalized = true;
9153 1480044 : if (recompute_frame_layout_p)
9154 35271 : ix86_compute_frame_layout ();
9155 : }
9156 :
9157 : /* Delete SET_GOT right after entry block if it is allocated to reg. */
9158 :
9159 : static void
9160 0 : ix86_elim_entry_set_got (rtx reg)
9161 : {
9162 0 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9163 0 : rtx_insn *c_insn = BB_HEAD (bb);
9164 0 : if (!NONDEBUG_INSN_P (c_insn))
9165 0 : c_insn = next_nonnote_nondebug_insn (c_insn);
9166 0 : if (c_insn && NONJUMP_INSN_P (c_insn))
9167 : {
9168 0 : rtx pat = PATTERN (c_insn);
9169 0 : if (GET_CODE (pat) == PARALLEL)
9170 : {
9171 0 : rtx set = XVECEXP (pat, 0, 0);
9172 0 : if (GET_CODE (set) == SET
9173 0 : && GET_CODE (SET_SRC (set)) == UNSPEC
9174 0 : && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
9175 0 : && REGNO (SET_DEST (set)) == REGNO (reg))
9176 0 : delete_insn (c_insn);
9177 : }
9178 : }
9179 0 : }
9180 :
9181 : static rtx
9182 193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
9183 : {
9184 193166 : rtx addr, mem;
9185 :
9186 193166 : if (offset)
9187 184480 : addr = plus_constant (Pmode, frame_reg, offset);
9188 193166 : mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
9189 193166 : return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
9190 : }
9191 :
9192 : static inline rtx
9193 100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
9194 : {
9195 100333 : return gen_frame_set (reg, frame_reg, offset, false);
9196 : }
9197 :
9198 : static inline rtx
9199 92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
9200 : {
9201 92833 : return gen_frame_set (reg, frame_reg, offset, true);
9202 : }
9203 :
9204 : static void
9205 7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
9206 : {
9207 7045 : struct machine_function *m = cfun->machine;
9208 7045 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9209 7045 : + m->call_ms2sysv_extra_regs;
9210 7045 : rtvec v = rtvec_alloc (ncregs + 1);
9211 7045 : unsigned int align, i, vi = 0;
9212 7045 : rtx_insn *insn;
9213 7045 : rtx sym, addr;
9214 7045 : rtx rax = gen_rtx_REG (word_mode, AX_REG);
9215 7045 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9216 :
9217 : /* AL should only be live with sysv_abi. */
9218 7045 : gcc_assert (!ix86_eax_live_at_start_p ());
9219 7045 : gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
9220 :
9221 : /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
9222 : we've actually realigned the stack or not. */
9223 7045 : align = GET_MODE_ALIGNMENT (V4SFmode);
9224 7045 : addr = choose_baseaddr (frame.stack_realign_offset
9225 7045 : + xlogue.get_stub_ptr_offset (), &align, AX_REG);
9226 7045 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9227 :
9228 7045 : emit_insn (gen_rtx_SET (rax, addr));
9229 :
9230 : /* Get the stub symbol. */
9231 8327 : sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
9232 : : XLOGUE_STUB_SAVE);
9233 7045 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9234 :
9235 99878 : for (i = 0; i < ncregs; ++i)
9236 : {
9237 92833 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9238 92833 : rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
9239 92833 : r.regno);
9240 92833 : RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
9241 : }
9242 :
9243 7045 : gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
9244 :
9245 7045 : insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
9246 7045 : RTX_FRAME_RELATED_P (insn) = true;
9247 7045 : }
9248 :
9249 : /* Generate and return an insn body to AND X with Y. */
9250 :
9251 : static rtx_insn *
9252 31773 : gen_and2_insn (rtx x, rtx y)
9253 : {
9254 31773 : enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
9255 :
9256 31773 : gcc_assert (insn_operand_matches (icode, 0, x));
9257 31773 : gcc_assert (insn_operand_matches (icode, 1, x));
9258 31773 : gcc_assert (insn_operand_matches (icode, 2, y));
9259 :
9260 31773 : return GEN_FCN (icode) (x, x, y);
9261 : }
9262 :
9263 : /* Expand the prologue into a bunch of separate insns. */
9264 :
9265 : void
9266 1524871 : ix86_expand_prologue (void)
9267 : {
9268 1524871 : struct machine_function *m = cfun->machine;
9269 1524871 : rtx insn, t;
9270 1524871 : HOST_WIDE_INT allocate;
9271 1524871 : bool int_registers_saved;
9272 1524871 : bool sse_registers_saved;
9273 1524871 : bool save_stub_call_needed;
9274 1524871 : rtx static_chain = NULL_RTX;
9275 :
9276 1524871 : ix86_last_zero_store_uid = 0;
9277 1524871 : if (ix86_function_naked (current_function_decl))
9278 : {
9279 74 : if (flag_stack_usage_info)
9280 0 : current_function_static_stack_size = 0;
9281 74 : return;
9282 : }
9283 :
9284 1524797 : ix86_finalize_stack_frame_flags ();
9285 :
9286 : /* DRAP should not coexist with stack_realign_fp */
9287 1524797 : gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9288 :
9289 1524797 : memset (&m->fs, 0, sizeof (m->fs));
9290 :
9291 : /* Initialize CFA state for before the prologue. */
9292 1524797 : m->fs.cfa_reg = stack_pointer_rtx;
9293 1524797 : m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9294 :
9295 : /* Track SP offset to the CFA. We continue tracking this after we've
9296 : swapped the CFA register away from SP. In the case of re-alignment
9297 : this is fudged; we're interested to offsets within the local frame. */
9298 1524797 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9299 1524797 : m->fs.sp_valid = true;
9300 1524797 : m->fs.sp_realigned = false;
9301 :
9302 1524797 : const struct ix86_frame &frame = cfun->machine->frame;
9303 :
9304 1524797 : if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9305 : {
9306 : /* We should have already generated an error for any use of
9307 : ms_hook on a nested function. */
9308 0 : gcc_checking_assert (!ix86_static_chain_on_stack);
9309 :
9310 : /* Check if profiling is active and we shall use profiling before
9311 : prologue variant. If so sorry. */
9312 0 : if (crtl->profile && flag_fentry != 0)
9313 0 : sorry ("%<ms_hook_prologue%> attribute is not compatible "
9314 : "with %<-mfentry%> for 32-bit");
9315 :
9316 : /* In ix86_asm_output_function_label we emitted:
9317 : 8b ff movl.s %edi,%edi
9318 : 55 push %ebp
9319 : 8b ec movl.s %esp,%ebp
9320 :
9321 : This matches the hookable function prologue in Win32 API
9322 : functions in Microsoft Windows XP Service Pack 2 and newer.
9323 : Wine uses this to enable Windows apps to hook the Win32 API
9324 : functions provided by Wine.
9325 :
9326 : What that means is that we've already set up the frame pointer. */
9327 :
9328 0 : if (frame_pointer_needed
9329 0 : && !(crtl->drap_reg && crtl->stack_realign_needed))
9330 : {
9331 0 : rtx push, mov;
9332 :
9333 : /* We've decided to use the frame pointer already set up.
9334 : Describe this to the unwinder by pretending that both
9335 : push and mov insns happen right here.
9336 :
9337 : Putting the unwind info here at the end of the ms_hook
9338 : is done so that we can make absolutely certain we get
9339 : the required byte sequence at the start of the function,
9340 : rather than relying on an assembler that can produce
9341 : the exact encoding required.
9342 :
9343 : However it does mean (in the unpatched case) that we have
9344 : a 1 insn window where the asynchronous unwind info is
9345 : incorrect. However, if we placed the unwind info at
9346 : its correct location we would have incorrect unwind info
9347 : in the patched case. Which is probably all moot since
9348 : I don't expect Wine generates dwarf2 unwind info for the
9349 : system libraries that use this feature. */
9350 :
9351 0 : insn = emit_insn (gen_blockage ());
9352 :
9353 0 : push = gen_push (hard_frame_pointer_rtx);
9354 0 : mov = gen_rtx_SET (hard_frame_pointer_rtx,
9355 : stack_pointer_rtx);
9356 0 : RTX_FRAME_RELATED_P (push) = 1;
9357 0 : RTX_FRAME_RELATED_P (mov) = 1;
9358 :
9359 0 : RTX_FRAME_RELATED_P (insn) = 1;
9360 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9361 : gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9362 :
9363 : /* Note that gen_push incremented m->fs.cfa_offset, even
9364 : though we didn't emit the push insn here. */
9365 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9366 0 : m->fs.fp_offset = m->fs.cfa_offset;
9367 0 : m->fs.fp_valid = true;
9368 0 : }
9369 : else
9370 : {
9371 : /* The frame pointer is not needed so pop %ebp again.
9372 : This leaves us with a pristine state. */
9373 0 : emit_insn (gen_pop (hard_frame_pointer_rtx));
9374 : }
9375 : }
9376 :
9377 : /* The first insn of a function that accepts its static chain on the
9378 : stack is to push the register that would be filled in by a direct
9379 : call. This insn will be skipped by the trampoline. */
9380 1524797 : else if (ix86_static_chain_on_stack)
9381 : {
9382 0 : static_chain = ix86_static_chain (cfun->decl, false);
9383 0 : insn = emit_insn (gen_push (static_chain));
9384 0 : emit_insn (gen_blockage ());
9385 :
9386 : /* We don't want to interpret this push insn as a register save,
9387 : only as a stack adjustment. The real copy of the register as
9388 : a save will be done later, if needed. */
9389 0 : t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9390 0 : t = gen_rtx_SET (stack_pointer_rtx, t);
9391 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9392 0 : RTX_FRAME_RELATED_P (insn) = 1;
9393 : }
9394 :
9395 : /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9396 : of DRAP is needed and stack realignment is really needed after reload */
9397 1524797 : if (stack_realign_drap)
9398 : {
9399 7084 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9400 :
9401 : /* Can't use DRAP in interrupt function. */
9402 7084 : if (cfun->machine->func_type != TYPE_NORMAL)
9403 0 : sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9404 : "in interrupt service routine. This may be worked "
9405 : "around by avoiding functions with aggregate return.");
9406 :
9407 : /* Only need to push parameter pointer reg if it is caller saved. */
9408 7084 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9409 : {
9410 : /* Push arg pointer reg */
9411 136 : insn = emit_insn (gen_push (crtl->drap_reg));
9412 136 : RTX_FRAME_RELATED_P (insn) = 1;
9413 : }
9414 :
9415 : /* Grab the argument pointer. */
9416 7369 : t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9417 7084 : insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9418 7084 : RTX_FRAME_RELATED_P (insn) = 1;
9419 7084 : m->fs.cfa_reg = crtl->drap_reg;
9420 7084 : m->fs.cfa_offset = 0;
9421 :
9422 : /* Align the stack. */
9423 7084 : insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9424 7084 : GEN_INT (-align_bytes)));
9425 7084 : RTX_FRAME_RELATED_P (insn) = 1;
9426 :
9427 : /* Replicate the return address on the stack so that return
9428 : address can be reached via (argp - 1) slot. This is needed
9429 : to implement macro RETURN_ADDR_RTX and intrinsic function
9430 : expand_builtin_return_addr etc. */
9431 7654 : t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9432 7084 : t = gen_frame_mem (word_mode, t);
9433 7084 : insn = emit_insn (gen_push (t));
9434 7084 : RTX_FRAME_RELATED_P (insn) = 1;
9435 :
9436 : /* For the purposes of frame and register save area addressing,
9437 : we've started over with a new frame. */
9438 7084 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9439 7084 : m->fs.realigned = true;
9440 :
9441 7084 : if (static_chain)
9442 : {
9443 : /* Replicate static chain on the stack so that static chain
9444 : can be reached via (argp - 2) slot. This is needed for
9445 : nested function with stack realignment. */
9446 0 : insn = emit_insn (gen_push (static_chain));
9447 0 : RTX_FRAME_RELATED_P (insn) = 1;
9448 : }
9449 : }
9450 :
9451 1524797 : int_registers_saved = (frame.nregs == 0);
9452 1524797 : sse_registers_saved = (frame.nsseregs == 0);
9453 1524797 : save_stub_call_needed = (m->call_ms2sysv);
9454 1524797 : gcc_assert (sse_registers_saved || !save_stub_call_needed);
9455 :
9456 1524797 : if (frame_pointer_needed && !m->fs.fp_valid)
9457 : {
9458 : /* Note: AT&T enter does NOT have reversed args. Enter is probably
9459 : slower on all targets. Also sdb didn't like it. */
9460 479942 : insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9461 479942 : RTX_FRAME_RELATED_P (insn) = 1;
9462 :
9463 479942 : if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9464 : {
9465 479942 : insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9466 479942 : RTX_FRAME_RELATED_P (insn) = 1;
9467 :
9468 479942 : if (m->fs.cfa_reg == stack_pointer_rtx)
9469 472858 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9470 479942 : m->fs.fp_offset = m->fs.sp_offset;
9471 479942 : m->fs.fp_valid = true;
9472 : }
9473 : }
9474 :
9475 1524797 : if (!int_registers_saved)
9476 : {
9477 : /* If saving registers via PUSH, do so now. */
9478 472408 : if (!frame.save_regs_using_mov)
9479 : {
9480 427594 : ix86_emit_save_regs ();
9481 427594 : m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9482 427594 : int_registers_saved = true;
9483 427594 : gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9484 : }
9485 :
9486 : /* When using red zone we may start register saving before allocating
9487 : the stack frame saving one cycle of the prologue. However, avoid
9488 : doing this if we have to probe the stack; at least on x86_64 the
9489 : stack probe can turn into a call that clobbers a red zone location. */
9490 44814 : else if (ix86_using_red_zone ()
9491 44814 : && (! TARGET_STACK_PROBE
9492 0 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9493 : {
9494 40353 : HOST_WIDE_INT allocate_offset;
9495 40353 : if (crtl->shrink_wrapped_separate)
9496 : {
9497 40297 : allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
9498 :
9499 : /* Adjust the total offset at the beginning of the function. */
9500 40297 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9501 : GEN_INT (allocate_offset), -1,
9502 40297 : m->fs.cfa_reg == stack_pointer_rtx);
9503 40297 : m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
9504 : }
9505 :
9506 40353 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9507 40353 : int_registers_saved = true;
9508 : }
9509 : }
9510 :
9511 1524797 : if (frame.red_zone_size != 0)
9512 139987 : cfun->machine->red_zone_used = true;
9513 :
9514 1524797 : if (stack_realign_fp)
9515 : {
9516 24689 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9517 25041 : gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9518 :
9519 : /* Record last valid frame pointer offset. */
9520 24689 : m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9521 :
9522 : /* The computation of the size of the re-aligned stack frame means
9523 : that we must allocate the size of the register save area before
9524 : performing the actual alignment. Otherwise we cannot guarantee
9525 : that there's enough storage above the realignment point. */
9526 24689 : allocate = frame.reg_save_offset - m->fs.sp_offset
9527 24689 : + frame.stack_realign_allocate;
9528 24689 : if (allocate)
9529 2691 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9530 : GEN_INT (-allocate), -1, false);
9531 :
9532 : /* Align the stack. */
9533 24689 : emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9534 24689 : m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9535 24689 : m->fs.sp_realigned_offset = m->fs.sp_offset
9536 24689 : - frame.stack_realign_allocate;
9537 : /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9538 : Beyond this point, stack access should be done via choose_baseaddr or
9539 : by using sp_valid_at and fp_valid_at to determine the correct base
9540 : register. Henceforth, any CFA offset should be thought of as logical
9541 : and not physical. */
9542 24689 : gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9543 24689 : gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9544 24689 : m->fs.sp_realigned = true;
9545 :
9546 : /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9547 : is needed to describe where a register is saved using a realigned
9548 : stack pointer, so we need to invalidate the stack pointer for that
9549 : target. */
9550 24689 : if (TARGET_SEH)
9551 : m->fs.sp_valid = false;
9552 :
9553 : /* If SP offset is non-immediate after allocation of the stack frame,
9554 : then emit SSE saves or stub call prior to allocating the rest of the
9555 : stack frame. This is less efficient for the out-of-line stub because
9556 : we can't combine allocations across the call barrier, but it's better
9557 : than using a scratch register. */
9558 24689 : else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9559 : - m->fs.sp_realigned_offset),
9560 24689 : Pmode))
9561 : {
9562 3 : if (!sse_registers_saved)
9563 : {
9564 1 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9565 1 : sse_registers_saved = true;
9566 : }
9567 2 : else if (save_stub_call_needed)
9568 : {
9569 1 : ix86_emit_outlined_ms2sysv_save (frame);
9570 1 : save_stub_call_needed = false;
9571 : }
9572 : }
9573 : }
9574 :
9575 1524797 : allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9576 :
9577 1524797 : if (flag_stack_usage_info)
9578 : {
9579 : /* We start to count from ARG_POINTER. */
9580 355 : HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9581 :
9582 : /* If it was realigned, take into account the fake frame. */
9583 355 : if (stack_realign_drap)
9584 : {
9585 1 : if (ix86_static_chain_on_stack)
9586 0 : stack_size += UNITS_PER_WORD;
9587 :
9588 1 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9589 0 : stack_size += UNITS_PER_WORD;
9590 :
9591 : /* This over-estimates by 1 minimal-stack-alignment-unit but
9592 : mitigates that by counting in the new return address slot. */
9593 1 : current_function_dynamic_stack_size
9594 1 : += crtl->stack_alignment_needed / BITS_PER_UNIT;
9595 : }
9596 :
9597 355 : current_function_static_stack_size = stack_size;
9598 : }
9599 :
9600 : /* On SEH target with very large frame size, allocate an area to save
9601 : SSE registers (as the very large allocation won't be described). */
9602 1524797 : if (TARGET_SEH
9603 : && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9604 : && !sse_registers_saved)
9605 : {
9606 : HOST_WIDE_INT sse_size
9607 : = frame.sse_reg_save_offset - frame.reg_save_offset;
9608 :
9609 : gcc_assert (int_registers_saved);
9610 :
9611 : /* No need to do stack checking as the area will be immediately
9612 : written. */
9613 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9614 : GEN_INT (-sse_size), -1,
9615 : m->fs.cfa_reg == stack_pointer_rtx);
9616 : allocate -= sse_size;
9617 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9618 : sse_registers_saved = true;
9619 : }
9620 :
9621 : /* If stack clash protection is requested, then probe the stack, unless it
9622 : is already probed on the target. */
9623 1524797 : if (allocate >= 0
9624 1524793 : && flag_stack_clash_protection
9625 1524895 : && !ix86_target_stack_probe ())
9626 : {
9627 98 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9628 98 : allocate = 0;
9629 : }
9630 :
9631 : /* The stack has already been decremented by the instruction calling us
9632 : so probe if the size is non-negative to preserve the protection area. */
9633 1524699 : else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9634 : {
9635 46 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
9636 :
9637 46 : if (STACK_CHECK_MOVING_SP)
9638 : {
9639 46 : if (crtl->is_leaf
9640 18 : && !cfun->calls_alloca
9641 18 : && allocate <= probe_interval)
9642 : ;
9643 :
9644 : else
9645 : {
9646 29 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9647 29 : allocate = 0;
9648 : }
9649 : }
9650 :
9651 : else
9652 : {
9653 : HOST_WIDE_INT size = allocate;
9654 :
9655 : if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9656 : size = 0x80000000 - get_stack_check_protect () - 1;
9657 :
9658 : if (TARGET_STACK_PROBE)
9659 : {
9660 : if (crtl->is_leaf && !cfun->calls_alloca)
9661 : {
9662 : if (size > probe_interval)
9663 : ix86_emit_probe_stack_range (0, size, int_registers_saved);
9664 : }
9665 : else
9666 : ix86_emit_probe_stack_range (0,
9667 : size + get_stack_check_protect (),
9668 : int_registers_saved);
9669 : }
9670 : else
9671 : {
9672 : if (crtl->is_leaf && !cfun->calls_alloca)
9673 : {
9674 : if (size > probe_interval
9675 : && size > get_stack_check_protect ())
9676 : ix86_emit_probe_stack_range (get_stack_check_protect (),
9677 : (size
9678 : - get_stack_check_protect ()),
9679 : int_registers_saved);
9680 : }
9681 : else
9682 : ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9683 : int_registers_saved);
9684 : }
9685 : }
9686 : }
9687 :
9688 1524793 : if (allocate == 0)
9689 : ;
9690 840456 : else if (!ix86_target_stack_probe ()
9691 840456 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9692 : {
9693 840411 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9694 : GEN_INT (-allocate), -1,
9695 840411 : m->fs.cfa_reg == stack_pointer_rtx);
9696 : }
9697 : else
9698 : {
9699 45 : rtx eax = gen_rtx_REG (Pmode, AX_REG);
9700 45 : rtx r10 = NULL;
9701 45 : const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9702 45 : bool eax_live = ix86_eax_live_at_start_p ();
9703 45 : bool r10_live = false;
9704 :
9705 45 : if (TARGET_64BIT)
9706 45 : r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9707 :
9708 45 : if (eax_live)
9709 : {
9710 0 : insn = emit_insn (gen_push (eax));
9711 0 : allocate -= UNITS_PER_WORD;
9712 : /* Note that SEH directives need to continue tracking the stack
9713 : pointer even after the frame pointer has been set up. */
9714 0 : if (sp_is_cfa_reg || TARGET_SEH)
9715 : {
9716 0 : if (sp_is_cfa_reg)
9717 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9718 0 : RTX_FRAME_RELATED_P (insn) = 1;
9719 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9720 0 : gen_rtx_SET (stack_pointer_rtx,
9721 : plus_constant (Pmode,
9722 : stack_pointer_rtx,
9723 : -UNITS_PER_WORD)));
9724 : }
9725 : }
9726 :
9727 45 : if (r10_live)
9728 : {
9729 0 : r10 = gen_rtx_REG (Pmode, R10_REG);
9730 0 : insn = emit_insn (gen_push (r10));
9731 0 : allocate -= UNITS_PER_WORD;
9732 0 : if (sp_is_cfa_reg || TARGET_SEH)
9733 : {
9734 0 : if (sp_is_cfa_reg)
9735 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9736 0 : RTX_FRAME_RELATED_P (insn) = 1;
9737 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9738 0 : gen_rtx_SET (stack_pointer_rtx,
9739 : plus_constant (Pmode,
9740 : stack_pointer_rtx,
9741 : -UNITS_PER_WORD)));
9742 : }
9743 : }
9744 :
9745 45 : emit_move_insn (eax, GEN_INT (allocate));
9746 45 : emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9747 :
9748 : /* Use the fact that AX still contains ALLOCATE. */
9749 45 : insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9750 45 : (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9751 :
9752 45 : if (sp_is_cfa_reg || TARGET_SEH)
9753 : {
9754 37 : if (sp_is_cfa_reg)
9755 37 : m->fs.cfa_offset += allocate;
9756 37 : RTX_FRAME_RELATED_P (insn) = 1;
9757 37 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9758 37 : gen_rtx_SET (stack_pointer_rtx,
9759 : plus_constant (Pmode, stack_pointer_rtx,
9760 : -allocate)));
9761 : }
9762 45 : m->fs.sp_offset += allocate;
9763 :
9764 : /* Use stack_pointer_rtx for relative addressing so that code works for
9765 : realigned stack. But this means that we need a blockage to prevent
9766 : stores based on the frame pointer from being scheduled before. */
9767 45 : if (r10_live && eax_live)
9768 : {
9769 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9770 0 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9771 : gen_frame_mem (word_mode, t));
9772 0 : t = plus_constant (Pmode, t, UNITS_PER_WORD);
9773 0 : emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9774 : gen_frame_mem (word_mode, t));
9775 0 : emit_insn (gen_memory_blockage ());
9776 : }
9777 45 : else if (eax_live || r10_live)
9778 : {
9779 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9780 0 : emit_move_insn (gen_rtx_REG (word_mode,
9781 : (eax_live ? AX_REG : R10_REG)),
9782 : gen_frame_mem (word_mode, t));
9783 0 : emit_insn (gen_memory_blockage ());
9784 : }
9785 : }
9786 1524797 : gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9787 :
9788 : /* If we havn't already set up the frame pointer, do so now. */
9789 1524797 : if (frame_pointer_needed && !m->fs.fp_valid)
9790 : {
9791 0 : insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9792 0 : GEN_INT (frame.stack_pointer_offset
9793 : - frame.hard_frame_pointer_offset));
9794 0 : insn = emit_insn (insn);
9795 0 : RTX_FRAME_RELATED_P (insn) = 1;
9796 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9797 :
9798 0 : if (m->fs.cfa_reg == stack_pointer_rtx)
9799 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9800 0 : m->fs.fp_offset = frame.hard_frame_pointer_offset;
9801 0 : m->fs.fp_valid = true;
9802 : }
9803 :
9804 1524797 : if (!int_registers_saved)
9805 4461 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9806 1524797 : if (!sse_registers_saved)
9807 33352 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9808 1491445 : else if (save_stub_call_needed)
9809 7044 : ix86_emit_outlined_ms2sysv_save (frame);
9810 :
9811 : /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9812 : in PROLOGUE. */
9813 1524797 : if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9814 : {
9815 0 : rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9816 0 : insn = emit_insn (gen_set_got (pic));
9817 0 : RTX_FRAME_RELATED_P (insn) = 1;
9818 0 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9819 0 : emit_insn (gen_prologue_use (pic));
9820 : /* Deleting already emmitted SET_GOT if exist and allocated to
9821 : REAL_PIC_OFFSET_TABLE_REGNUM. */
9822 0 : ix86_elim_entry_set_got (pic);
9823 : }
9824 :
9825 1524797 : if (crtl->drap_reg && !crtl->stack_realign_needed)
9826 : {
9827 : /* vDRAP is setup but after reload it turns out stack realign
9828 : isn't necessary, here we will emit prologue to setup DRAP
9829 : without stack realign adjustment */
9830 178 : t = choose_baseaddr (0, NULL);
9831 178 : emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9832 : }
9833 :
9834 : /* Prevent instructions from being scheduled into register save push
9835 : sequence when access to the redzone area is done through frame pointer.
9836 : The offset between the frame pointer and the stack pointer is calculated
9837 : relative to the value of the stack pointer at the end of the function
9838 : prologue, and moving instructions that access redzone area via frame
9839 : pointer inside push sequence violates this assumption. */
9840 1524797 : if (frame_pointer_needed && frame.red_zone_size)
9841 129166 : emit_insn (gen_memory_blockage ());
9842 :
9843 : /* SEH requires that the prologue end within 256 bytes of the start of
9844 : the function. Prevent instruction schedules that would extend that.
9845 : Further, prevent alloca modifications to the stack pointer from being
9846 : combined with prologue modifications. */
9847 : if (TARGET_SEH)
9848 : emit_insn (gen_prologue_use (stack_pointer_rtx));
9849 : }
9850 :
9851 : /* Emit code to restore REG using a POP or POPP insn. */
9852 :
9853 : static void
9854 1461123 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9855 : {
9856 1461123 : struct machine_function *m = cfun->machine;
9857 1461123 : rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9858 :
9859 1461123 : ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9860 1461123 : m->fs.sp_offset -= UNITS_PER_WORD;
9861 :
9862 1461123 : if (m->fs.cfa_reg == crtl->drap_reg
9863 1461123 : && REGNO (reg) == REGNO (crtl->drap_reg))
9864 : {
9865 : /* Previously we'd represented the CFA as an expression
9866 : like *(%ebp - 8). We've just popped that value from
9867 : the stack, which means we need to reset the CFA to
9868 : the drap register. This will remain until we restore
9869 : the stack pointer. */
9870 4033 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9871 4033 : RTX_FRAME_RELATED_P (insn) = 1;
9872 :
9873 : /* This means that the DRAP register is valid for addressing too. */
9874 4033 : m->fs.drap_valid = true;
9875 4033 : return;
9876 : }
9877 :
9878 1457090 : if (m->fs.cfa_reg == stack_pointer_rtx)
9879 : {
9880 1374144 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9881 1011063 : x = gen_rtx_SET (stack_pointer_rtx, x);
9882 1011063 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9883 1011063 : RTX_FRAME_RELATED_P (insn) = 1;
9884 :
9885 1192596 : m->fs.cfa_offset -= UNITS_PER_WORD;
9886 : }
9887 :
9888 : /* When the frame pointer is the CFA, and we pop it, we are
9889 : swapping back to the stack pointer as the CFA. This happens
9890 : for stack frames that don't allocate other data, so we assume
9891 : the stack pointer is now pointing at the return address, i.e.
9892 : the function entry state, which makes the offset be 1 word. */
9893 1457090 : if (reg == hard_frame_pointer_rtx)
9894 : {
9895 237411 : m->fs.fp_valid = false;
9896 237411 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9897 : {
9898 233365 : m->fs.cfa_reg = stack_pointer_rtx;
9899 233365 : m->fs.cfa_offset -= UNITS_PER_WORD;
9900 :
9901 233365 : add_reg_note (insn, REG_CFA_DEF_CFA,
9902 233365 : plus_constant (Pmode, stack_pointer_rtx,
9903 233365 : m->fs.cfa_offset));
9904 233365 : RTX_FRAME_RELATED_P (insn) = 1;
9905 : }
9906 : }
9907 : }
9908 :
9909 : /* Emit code to restore REG using a POP2 insn. */
9910 : static void
9911 19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9912 : {
9913 19 : struct machine_function *m = cfun->machine;
9914 19 : const int offset = UNITS_PER_WORD * 2;
9915 19 : rtx_insn *insn;
9916 :
9917 19 : rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9918 : stack_pointer_rtx));
9919 :
9920 19 : if (ppx_p)
9921 15 : insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9922 : else
9923 4 : insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9924 :
9925 19 : RTX_FRAME_RELATED_P (insn) = 1;
9926 :
9927 19 : rtx dwarf = NULL_RTX;
9928 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9929 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9930 19 : REG_NOTES (insn) = dwarf;
9931 19 : m->fs.sp_offset -= offset;
9932 :
9933 19 : if (m->fs.cfa_reg == crtl->drap_reg
9934 19 : && (REGNO (reg1) == REGNO (crtl->drap_reg)
9935 3 : || REGNO (reg2) == REGNO (crtl->drap_reg)))
9936 : {
9937 : /* Previously we'd represented the CFA as an expression
9938 : like *(%ebp - 8). We've just popped that value from
9939 : the stack, which means we need to reset the CFA to
9940 : the drap register. This will remain until we restore
9941 : the stack pointer. */
9942 1 : add_reg_note (insn, REG_CFA_DEF_CFA,
9943 1 : REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9944 1 : RTX_FRAME_RELATED_P (insn) = 1;
9945 :
9946 : /* This means that the DRAP register is valid for addressing too. */
9947 1 : m->fs.drap_valid = true;
9948 1 : return;
9949 : }
9950 :
9951 18 : if (m->fs.cfa_reg == stack_pointer_rtx)
9952 : {
9953 14 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9954 14 : x = gen_rtx_SET (stack_pointer_rtx, x);
9955 14 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9956 14 : RTX_FRAME_RELATED_P (insn) = 1;
9957 :
9958 14 : m->fs.cfa_offset -= offset;
9959 : }
9960 :
9961 : /* When the frame pointer is the CFA, and we pop it, we are
9962 : swapping back to the stack pointer as the CFA. This happens
9963 : for stack frames that don't allocate other data, so we assume
9964 : the stack pointer is now pointing at the return address, i.e.
9965 : the function entry state, which makes the offset be 1 word. */
9966 18 : if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9967 : {
9968 0 : m->fs.fp_valid = false;
9969 0 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9970 : {
9971 0 : m->fs.cfa_reg = stack_pointer_rtx;
9972 0 : m->fs.cfa_offset -= offset;
9973 :
9974 0 : add_reg_note (insn, REG_CFA_DEF_CFA,
9975 0 : plus_constant (Pmode, stack_pointer_rtx,
9976 0 : m->fs.cfa_offset));
9977 0 : RTX_FRAME_RELATED_P (insn) = 1;
9978 : }
9979 : }
9980 : }
9981 :
9982 : /* Emit code to restore saved registers using POP insns. */
9983 :
9984 : static void
9985 1355234 : ix86_emit_restore_regs_using_pop (bool ppx_p)
9986 : {
9987 1355234 : unsigned int regno;
9988 :
9989 126036762 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9990 124681528 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9991 1223393 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
9992 1355234 : }
9993 :
9994 : /* Emit code to restore saved registers using POP2 insns. */
9995 :
9996 : static void
9997 561 : ix86_emit_restore_regs_using_pop2 (void)
9998 : {
9999 561 : int regno;
10000 561 : int regno_list[2];
10001 561 : regno_list[0] = regno_list[1] = -1;
10002 561 : int loaded_regnum = 0;
10003 561 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
10004 :
10005 52173 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10006 51612 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
10007 : {
10008 127 : if (aligned)
10009 : {
10010 120 : regno_list[loaded_regnum++] = regno;
10011 120 : if (loaded_regnum == 2)
10012 : {
10013 19 : gcc_assert (regno_list[0] != -1
10014 : && regno_list[1] != -1
10015 : && regno_list[0] != regno_list[1]);
10016 :
10017 19 : ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
10018 : regno_list[0]),
10019 : gen_rtx_REG (word_mode,
10020 : regno_list[1]),
10021 19 : TARGET_APX_PPX);
10022 19 : loaded_regnum = 0;
10023 19 : regno_list[0] = regno_list[1] = -1;
10024 : }
10025 : }
10026 : else
10027 : {
10028 14 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
10029 7 : TARGET_APX_PPX);
10030 7 : aligned = true;
10031 : }
10032 : }
10033 :
10034 561 : if (loaded_regnum == 1)
10035 82 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
10036 82 : TARGET_APX_PPX);
10037 561 : }
10038 :
10039 : /* Emit code and notes for the LEAVE instruction. If insn is non-null,
10040 : omits the emit and only attaches the notes. */
10041 :
10042 : static void
10043 243828 : ix86_emit_leave (rtx_insn *insn)
10044 : {
10045 243828 : struct machine_function *m = cfun->machine;
10046 :
10047 243828 : if (!insn)
10048 242857 : insn = emit_insn (gen_leave (word_mode));
10049 :
10050 243828 : ix86_add_queued_cfa_restore_notes (insn);
10051 :
10052 243828 : gcc_assert (m->fs.fp_valid);
10053 243828 : m->fs.sp_valid = true;
10054 243828 : m->fs.sp_realigned = false;
10055 243828 : m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10056 243828 : m->fs.fp_valid = false;
10057 :
10058 243828 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10059 : {
10060 240684 : m->fs.cfa_reg = stack_pointer_rtx;
10061 240684 : m->fs.cfa_offset = m->fs.sp_offset;
10062 :
10063 240684 : add_reg_note (insn, REG_CFA_DEF_CFA,
10064 240684 : plus_constant (Pmode, stack_pointer_rtx,
10065 240684 : m->fs.sp_offset));
10066 240684 : RTX_FRAME_RELATED_P (insn) = 1;
10067 : }
10068 243828 : ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10069 : m->fs.fp_offset);
10070 243828 : }
10071 :
10072 : /* Emit code to restore saved registers using MOV insns.
10073 : First register is restored from CFA - CFA_OFFSET. */
10074 : static void
10075 96200 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10076 : bool maybe_eh_return)
10077 : {
10078 96200 : struct machine_function *m = cfun->machine;
10079 96200 : unsigned int regno;
10080 :
10081 8946600 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10082 8850400 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10083 : {
10084 :
10085 : /* Skip registers, already processed by shrink wrap separate. */
10086 263037 : if (!cfun->machine->reg_is_wrapped_separately[regno])
10087 : {
10088 139675 : rtx reg = gen_rtx_REG (word_mode, regno);
10089 139675 : rtx mem;
10090 139675 : rtx_insn *insn;
10091 :
10092 139675 : mem = choose_baseaddr (cfa_offset, NULL);
10093 139675 : mem = gen_frame_mem (word_mode, mem);
10094 139675 : insn = emit_move_insn (reg, mem);
10095 :
10096 139675 : if (m->fs.cfa_reg == crtl->drap_reg
10097 139675 : && regno == REGNO (crtl->drap_reg))
10098 : {
10099 : /* Previously we'd represented the CFA as an expression
10100 : like *(%ebp - 8). We've just popped that value from
10101 : the stack, which means we need to reset the CFA to
10102 : the drap register. This will remain until we restore
10103 : the stack pointer. */
10104 3144 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10105 3144 : RTX_FRAME_RELATED_P (insn) = 1;
10106 :
10107 : /* DRAP register is valid for addressing. */
10108 3144 : m->fs.drap_valid = true;
10109 : }
10110 : else
10111 136531 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10112 : }
10113 283564 : cfa_offset -= UNITS_PER_WORD;
10114 : }
10115 96200 : }
10116 :
10117 : /* Emit code to restore saved registers using MOV insns.
10118 : First register is restored from CFA - CFA_OFFSET. */
10119 : static void
10120 33929 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10121 : bool maybe_eh_return)
10122 : {
10123 33929 : unsigned int regno;
10124 :
10125 3155397 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10126 3121468 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10127 : {
10128 339317 : rtx reg = gen_rtx_REG (V4SFmode, regno);
10129 339317 : rtx mem;
10130 339317 : unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
10131 :
10132 339317 : mem = choose_baseaddr (cfa_offset, &align);
10133 339317 : mem = gen_rtx_MEM (V4SFmode, mem);
10134 :
10135 : /* The location aligment depends upon the base register. */
10136 339317 : align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
10137 339317 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
10138 339317 : set_mem_align (mem, align);
10139 339317 : emit_insn (gen_rtx_SET (reg, mem));
10140 :
10141 339317 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10142 :
10143 339317 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
10144 : }
10145 33929 : }
10146 :
10147 : static void
10148 7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
10149 : bool use_call, int style)
10150 : {
10151 7621 : struct machine_function *m = cfun->machine;
10152 7621 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
10153 7621 : + m->call_ms2sysv_extra_regs;
10154 7621 : rtvec v;
10155 7621 : unsigned int elems_needed, align, i, vi = 0;
10156 7621 : rtx_insn *insn;
10157 7621 : rtx sym, tmp;
10158 7621 : rtx rsi = gen_rtx_REG (word_mode, SI_REG);
10159 7621 : rtx r10 = NULL_RTX;
10160 7621 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
10161 7621 : HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
10162 7621 : HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
10163 7621 : rtx rsi_frame_load = NULL_RTX;
10164 7621 : HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
10165 7621 : enum xlogue_stub stub;
10166 :
10167 7621 : gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
10168 :
10169 : /* If using a realigned stack, we should never start with padding. */
10170 7621 : gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
10171 :
10172 : /* Setup RSI as the stub's base pointer. */
10173 7621 : align = GET_MODE_ALIGNMENT (V4SFmode);
10174 7621 : tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
10175 7621 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
10176 :
10177 7621 : emit_insn (gen_rtx_SET (rsi, tmp));
10178 :
10179 : /* Get a symbol for the stub. */
10180 7621 : if (frame_pointer_needed)
10181 5955 : stub = use_call ? XLOGUE_STUB_RESTORE_HFP
10182 : : XLOGUE_STUB_RESTORE_HFP_TAIL;
10183 : else
10184 1666 : stub = use_call ? XLOGUE_STUB_RESTORE
10185 : : XLOGUE_STUB_RESTORE_TAIL;
10186 7621 : sym = xlogue.get_stub_rtx (stub);
10187 :
10188 7621 : elems_needed = ncregs;
10189 7621 : if (use_call)
10190 6498 : elems_needed += 1;
10191 : else
10192 1275 : elems_needed += frame_pointer_needed ? 5 : 3;
10193 7621 : v = rtvec_alloc (elems_needed);
10194 :
10195 : /* We call the epilogue stub when we need to pop incoming args or we are
10196 : doing a sibling call as the tail. Otherwise, we will emit a jmp to the
10197 : epilogue stub and it is the tail-call. */
10198 7621 : if (use_call)
10199 6498 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10200 : else
10201 : {
10202 1123 : RTVEC_ELT (v, vi++) = ret_rtx;
10203 1123 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10204 1123 : if (frame_pointer_needed)
10205 : {
10206 971 : rtx rbp = gen_rtx_REG (DImode, BP_REG);
10207 971 : gcc_assert (m->fs.fp_valid);
10208 971 : gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
10209 :
10210 971 : tmp = plus_constant (DImode, rbp, 8);
10211 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
10212 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
10213 971 : tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10214 971 : RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
10215 : }
10216 : else
10217 : {
10218 : /* If no hard frame pointer, we set R10 to the SP restore value. */
10219 152 : gcc_assert (!m->fs.fp_valid);
10220 152 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10221 152 : gcc_assert (m->fs.sp_valid);
10222 :
10223 152 : r10 = gen_rtx_REG (DImode, R10_REG);
10224 152 : tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
10225 152 : emit_insn (gen_rtx_SET (r10, tmp));
10226 :
10227 152 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
10228 : }
10229 : }
10230 :
10231 : /* Generate frame load insns and restore notes. */
10232 107954 : for (i = 0; i < ncregs; ++i)
10233 : {
10234 100333 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
10235 100333 : machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
10236 100333 : rtx reg, frame_load;
10237 :
10238 100333 : reg = gen_rtx_REG (mode, r.regno);
10239 100333 : frame_load = gen_frame_load (reg, rsi, r.offset);
10240 :
10241 : /* Save RSI frame load insn & note to add last. */
10242 100333 : if (r.regno == SI_REG)
10243 : {
10244 7621 : gcc_assert (!rsi_frame_load);
10245 7621 : rsi_frame_load = frame_load;
10246 7621 : rsi_restore_offset = r.offset;
10247 : }
10248 : else
10249 : {
10250 92712 : RTVEC_ELT (v, vi++) = frame_load;
10251 92712 : ix86_add_cfa_restore_note (NULL, reg, r.offset);
10252 : }
10253 : }
10254 :
10255 : /* Add RSI frame load & restore note at the end. */
10256 7621 : gcc_assert (rsi_frame_load);
10257 7621 : gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
10258 7621 : RTVEC_ELT (v, vi++) = rsi_frame_load;
10259 7621 : ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
10260 : rsi_restore_offset);
10261 :
10262 : /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
10263 7621 : if (!use_call && !frame_pointer_needed)
10264 : {
10265 152 : gcc_assert (m->fs.sp_valid);
10266 152 : gcc_assert (!m->fs.sp_realigned);
10267 :
10268 : /* At this point, R10 should point to frame.stack_realign_offset. */
10269 152 : if (m->fs.cfa_reg == stack_pointer_rtx)
10270 152 : m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
10271 152 : m->fs.sp_offset = frame.stack_realign_offset;
10272 : }
10273 :
10274 7621 : gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
10275 7621 : tmp = gen_rtx_PARALLEL (VOIDmode, v);
10276 7621 : if (use_call)
10277 6498 : insn = emit_insn (tmp);
10278 : else
10279 : {
10280 1123 : insn = emit_jump_insn (tmp);
10281 1123 : JUMP_LABEL (insn) = ret_rtx;
10282 :
10283 1123 : if (frame_pointer_needed)
10284 971 : ix86_emit_leave (insn);
10285 : else
10286 : {
10287 : /* Need CFA adjust note. */
10288 152 : tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10289 152 : add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10290 : }
10291 : }
10292 :
10293 7621 : RTX_FRAME_RELATED_P (insn) = true;
10294 7621 : ix86_add_queued_cfa_restore_notes (insn);
10295 :
10296 : /* If we're not doing a tail-call, we need to adjust the stack. */
10297 7621 : if (use_call && m->fs.sp_valid)
10298 : {
10299 3706 : HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10300 3706 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10301 : GEN_INT (dealloc), style,
10302 3706 : m->fs.cfa_reg == stack_pointer_rtx);
10303 : }
10304 7621 : }
10305 :
10306 : /* Restore function stack, frame, and registers. */
10307 :
10308 : void
10309 1650066 : ix86_expand_epilogue (int style)
10310 : {
10311 1650066 : struct machine_function *m = cfun->machine;
10312 1650066 : struct machine_frame_state frame_state_save = m->fs;
10313 1650066 : bool restore_regs_via_mov;
10314 1650066 : bool using_drap;
10315 1650066 : bool restore_stub_is_tail = false;
10316 :
10317 1650066 : if (ix86_function_naked (current_function_decl))
10318 : {
10319 : /* The program should not reach this point. */
10320 74 : emit_insn (gen_ud2 ());
10321 125314 : return;
10322 : }
10323 :
10324 1649992 : ix86_finalize_stack_frame_flags ();
10325 1649992 : const struct ix86_frame &frame = cfun->machine->frame;
10326 :
10327 1649992 : m->fs.sp_realigned = stack_realign_fp;
10328 31913 : m->fs.sp_valid = stack_realign_fp
10329 1625257 : || !frame_pointer_needed
10330 2106523 : || crtl->sp_is_unchanging;
10331 1649992 : gcc_assert (!m->fs.sp_valid
10332 : || m->fs.sp_offset == frame.stack_pointer_offset);
10333 :
10334 : /* The FP must be valid if the frame pointer is present. */
10335 1649992 : gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10336 1649992 : gcc_assert (!m->fs.fp_valid
10337 : || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10338 :
10339 : /* We must have *some* valid pointer to the stack frame. */
10340 1649992 : gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10341 :
10342 : /* The DRAP is never valid at this point. */
10343 1649992 : gcc_assert (!m->fs.drap_valid);
10344 :
10345 : /* See the comment about red zone and frame
10346 : pointer usage in ix86_expand_prologue. */
10347 1649992 : if (frame_pointer_needed && frame.red_zone_size)
10348 129199 : emit_insn (gen_memory_blockage ());
10349 :
10350 1649992 : using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10351 7178 : gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10352 :
10353 : /* Determine the CFA offset of the end of the red-zone. */
10354 1649992 : m->fs.red_zone_offset = 0;
10355 1649992 : if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10356 : {
10357 : /* The red-zone begins below return address and error code in
10358 : exception handler. */
10359 1472629 : m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10360 :
10361 : /* When the register save area is in the aligned portion of
10362 : the stack, determine the maximum runtime displacement that
10363 : matches up with the aligned frame. */
10364 1472629 : if (stack_realign_drap)
10365 8626 : m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10366 4313 : + UNITS_PER_WORD);
10367 : }
10368 :
10369 1649992 : HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10370 :
10371 : /* Special care must be taken for the normal return case of a function
10372 : using eh_return: the eax and edx registers are marked as saved, but
10373 : not restored along this path. Adjust the save location to match. */
10374 1649992 : if (crtl->calls_eh_return && style != 2)
10375 37 : reg_save_offset -= 2 * UNITS_PER_WORD;
10376 :
10377 : /* EH_RETURN requires the use of moves to function properly. */
10378 1649992 : if (crtl->calls_eh_return)
10379 : restore_regs_via_mov = true;
10380 : /* SEH requires the use of pops to identify the epilogue. */
10381 1649934 : else if (TARGET_SEH)
10382 : restore_regs_via_mov = false;
10383 : /* If we already save reg with pushp, don't use move at epilogue. */
10384 1649934 : else if (m->fs.apx_ppx_used)
10385 : restore_regs_via_mov = false;
10386 : /* If we're only restoring one register and sp cannot be used then
10387 : using a move instruction to restore the register since it's
10388 : less work than reloading sp and popping the register. */
10389 1649847 : else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
10390 : restore_regs_via_mov = true;
10391 1588949 : else if (crtl->shrink_wrapped_separate
10392 1536550 : || (TARGET_EPILOGUE_USING_MOVE
10393 56735 : && cfun->machine->use_fast_prologue_epilogue
10394 56679 : && (frame.nregs > 1
10395 56666 : || m->fs.sp_offset != reg_save_offset)))
10396 : restore_regs_via_mov = true;
10397 1536315 : else if (frame_pointer_needed
10398 417770 : && !frame.nregs
10399 322640 : && m->fs.sp_offset != reg_save_offset)
10400 : restore_regs_via_mov = true;
10401 1385509 : else if (frame_pointer_needed
10402 266964 : && TARGET_USE_LEAVE
10403 266889 : && cfun->machine->use_fast_prologue_epilogue
10404 209986 : && frame.nregs == 1)
10405 : restore_regs_via_mov = true;
10406 : else
10407 1649992 : restore_regs_via_mov = false;
10408 :
10409 1649992 : if (crtl->shrink_wrapped_separate)
10410 52430 : gcc_assert (restore_regs_via_mov);
10411 :
10412 1597562 : if (restore_regs_via_mov || frame.nsseregs)
10413 : {
10414 : /* Ensure that the entire register save area is addressable via
10415 : the stack pointer, if we will restore SSE regs via sp. */
10416 327772 : if (TARGET_64BIT
10417 315150 : && m->fs.sp_offset > 0x7fffffff
10418 23 : && sp_valid_at (frame.stack_realign_offset + 1)
10419 327794 : && (frame.nsseregs + frame.nregs) != 0)
10420 : {
10421 6 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10422 6 : GEN_INT (m->fs.sp_offset
10423 : - frame.sse_reg_save_offset),
10424 : style,
10425 6 : m->fs.cfa_reg == stack_pointer_rtx);
10426 : }
10427 : }
10428 :
10429 : /* If there are any SSE registers to restore, then we have to do it
10430 : via moves, since there's obviously no pop for SSE regs. */
10431 1649992 : if (frame.nsseregs)
10432 33929 : ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10433 : style == 2);
10434 :
10435 1649992 : if (m->call_ms2sysv)
10436 : {
10437 7621 : int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10438 :
10439 : /* We cannot use a tail-call for the stub if:
10440 : 1. We have to pop incoming args,
10441 : 2. We have additional int regs to restore, or
10442 : 3. A sibling call will be the tail-call, or
10443 : 4. We are emitting an eh_return_internal epilogue.
10444 :
10445 : TODO: Item 4 has not yet tested!
10446 :
10447 : If any of the above are true, we will call the stub rather than
10448 : jump to it. */
10449 7621 : restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10450 7621 : ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
10451 : }
10452 :
10453 : /* If using out-of-line stub that is a tail-call, then...*/
10454 1649992 : if (m->call_ms2sysv && restore_stub_is_tail)
10455 : {
10456 : /* TODO: parinoid tests. (remove eventually) */
10457 1123 : gcc_assert (m->fs.sp_valid);
10458 1123 : gcc_assert (!m->fs.sp_realigned);
10459 1123 : gcc_assert (!m->fs.fp_valid);
10460 1123 : gcc_assert (!m->fs.realigned);
10461 1123 : gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10462 1123 : gcc_assert (!crtl->drap_reg);
10463 1123 : gcc_assert (!frame.nregs);
10464 1123 : gcc_assert (!crtl->shrink_wrapped_separate);
10465 : }
10466 1648869 : else if (restore_regs_via_mov)
10467 : {
10468 293074 : rtx t;
10469 :
10470 293074 : if (frame.nregs)
10471 96200 : ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
10472 :
10473 : /* eh_return epilogues need %ecx added to the stack pointer. */
10474 293074 : if (style == 2)
10475 : {
10476 37 : rtx sa = EH_RETURN_STACKADJ_RTX;
10477 29 : rtx_insn *insn;
10478 :
10479 29 : gcc_assert (!crtl->shrink_wrapped_separate);
10480 :
10481 : /* Stack realignment doesn't work with eh_return. */
10482 29 : if (crtl->stack_realign_needed)
10483 0 : sorry ("Stack realignment not supported with "
10484 : "%<__builtin_eh_return%>");
10485 :
10486 : /* regparm nested functions don't work with eh_return. */
10487 29 : if (ix86_static_chain_on_stack)
10488 0 : sorry ("regparm nested function not supported with "
10489 : "%<__builtin_eh_return%>");
10490 :
10491 29 : if (frame_pointer_needed)
10492 : {
10493 35 : t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10494 43 : t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10495 27 : emit_insn (gen_rtx_SET (sa, t));
10496 :
10497 : /* NB: eh_return epilogues must restore the frame pointer
10498 : in word_mode since the upper 32 bits of RBP register
10499 : can have any values. */
10500 27 : t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10501 27 : rtx frame_reg = gen_rtx_REG (word_mode,
10502 : HARD_FRAME_POINTER_REGNUM);
10503 27 : insn = emit_move_insn (frame_reg, t);
10504 :
10505 : /* Note that we use SA as a temporary CFA, as the return
10506 : address is at the proper place relative to it. We
10507 : pretend this happens at the FP restore insn because
10508 : prior to this insn the FP would be stored at the wrong
10509 : offset relative to SA, and after this insn we have no
10510 : other reasonable register to use for the CFA. We don't
10511 : bother resetting the CFA to the SP for the duration of
10512 : the return insn, unless the control flow instrumentation
10513 : is done. In this case the SP is used later and we have
10514 : to reset CFA to SP. */
10515 27 : add_reg_note (insn, REG_CFA_DEF_CFA,
10516 35 : plus_constant (Pmode, sa, UNITS_PER_WORD));
10517 27 : ix86_add_queued_cfa_restore_notes (insn);
10518 27 : add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10519 27 : RTX_FRAME_RELATED_P (insn) = 1;
10520 :
10521 27 : m->fs.cfa_reg = sa;
10522 27 : m->fs.cfa_offset = UNITS_PER_WORD;
10523 27 : m->fs.fp_valid = false;
10524 :
10525 27 : pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10526 : const0_rtx, style,
10527 27 : flag_cf_protection);
10528 : }
10529 : else
10530 : {
10531 2 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10532 2 : t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10533 2 : insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10534 2 : ix86_add_queued_cfa_restore_notes (insn);
10535 :
10536 2 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10537 2 : if (m->fs.cfa_offset != UNITS_PER_WORD)
10538 : {
10539 2 : m->fs.cfa_offset = UNITS_PER_WORD;
10540 2 : add_reg_note (insn, REG_CFA_DEF_CFA,
10541 2 : plus_constant (Pmode, stack_pointer_rtx,
10542 2 : UNITS_PER_WORD));
10543 2 : RTX_FRAME_RELATED_P (insn) = 1;
10544 : }
10545 : }
10546 29 : m->fs.sp_offset = UNITS_PER_WORD;
10547 29 : m->fs.sp_valid = true;
10548 29 : m->fs.sp_realigned = false;
10549 : }
10550 : }
10551 : else
10552 : {
10553 : /* SEH requires that the function end with (1) a stack adjustment
10554 : if necessary, (2) a sequence of pops, and (3) a return or
10555 : jump instruction. Prevent insns from the function body from
10556 : being scheduled into this sequence. */
10557 1355795 : if (TARGET_SEH)
10558 : {
10559 : /* Prevent a catch region from being adjacent to the standard
10560 : epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10561 : nor several other flags that would be interesting to test are
10562 : set up yet. */
10563 : if (flag_non_call_exceptions)
10564 : emit_insn (gen_nops (const1_rtx));
10565 : else
10566 : emit_insn (gen_blockage ());
10567 : }
10568 :
10569 : /* First step is to deallocate the stack frame so that we can
10570 : pop the registers. If the stack pointer was realigned, it needs
10571 : to be restored now. Also do it on SEH target for very large
10572 : frame as the emitted instructions aren't allowed by the ABI
10573 : in epilogues. */
10574 1355795 : if (!m->fs.sp_valid || m->fs.sp_realigned
10575 : || (TARGET_SEH
10576 : && (m->fs.sp_offset - reg_save_offset
10577 : >= SEH_MAX_FRAME_SIZE)))
10578 : {
10579 29756 : pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10580 29756 : GEN_INT (m->fs.fp_offset
10581 : - reg_save_offset),
10582 : style, false);
10583 : }
10584 1326039 : else if (m->fs.sp_offset != reg_save_offset)
10585 : {
10586 613669 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10587 : GEN_INT (m->fs.sp_offset
10588 : - reg_save_offset),
10589 : style,
10590 613669 : m->fs.cfa_reg == stack_pointer_rtx);
10591 : }
10592 :
10593 1355795 : if (TARGET_APX_PUSH2POP2
10594 564 : && ix86_can_use_push2pop2 ()
10595 1356357 : && m->func_type == TYPE_NORMAL)
10596 561 : ix86_emit_restore_regs_using_pop2 ();
10597 : else
10598 1355234 : ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10599 : }
10600 :
10601 : /* If we used a stack pointer and haven't already got rid of it,
10602 : then do so now. */
10603 1649992 : if (m->fs.fp_valid)
10604 : {
10605 : /* If the stack pointer is valid and pointing at the frame
10606 : pointer store address, then we only need a pop. */
10607 480268 : if (sp_valid_at (frame.hfp_save_offset)
10608 480268 : && m->fs.sp_offset == frame.hfp_save_offset)
10609 237399 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10610 : /* Leave results in shorter dependency chains on CPUs that are
10611 : able to grok it fast. */
10612 242869 : else if (TARGET_USE_LEAVE
10613 12 : || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10614 242881 : || !cfun->machine->use_fast_prologue_epilogue)
10615 242857 : ix86_emit_leave (NULL);
10616 : else
10617 : {
10618 12 : pro_epilogue_adjust_stack (stack_pointer_rtx,
10619 : hard_frame_pointer_rtx,
10620 12 : const0_rtx, style, !using_drap);
10621 12 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10622 : }
10623 : }
10624 :
10625 1649992 : if (using_drap)
10626 : {
10627 7178 : int param_ptr_offset = UNITS_PER_WORD;
10628 7178 : rtx_insn *insn;
10629 :
10630 7178 : gcc_assert (stack_realign_drap);
10631 :
10632 7178 : if (ix86_static_chain_on_stack)
10633 0 : param_ptr_offset += UNITS_PER_WORD;
10634 7178 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10635 230 : param_ptr_offset += UNITS_PER_WORD;
10636 :
10637 7483 : insn = emit_insn (gen_rtx_SET
10638 : (stack_pointer_rtx,
10639 : plus_constant (Pmode, crtl->drap_reg,
10640 : -param_ptr_offset)));
10641 7178 : m->fs.cfa_reg = stack_pointer_rtx;
10642 7178 : m->fs.cfa_offset = param_ptr_offset;
10643 7178 : m->fs.sp_offset = param_ptr_offset;
10644 7178 : m->fs.realigned = false;
10645 :
10646 7483 : add_reg_note (insn, REG_CFA_DEF_CFA,
10647 7178 : plus_constant (Pmode, stack_pointer_rtx,
10648 7178 : param_ptr_offset));
10649 7178 : RTX_FRAME_RELATED_P (insn) = 1;
10650 :
10651 7178 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10652 230 : ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10653 : }
10654 :
10655 : /* At this point the stack pointer must be valid, and we must have
10656 : restored all of the registers. We may not have deallocated the
10657 : entire stack frame. We've delayed this until now because it may
10658 : be possible to merge the local stack deallocation with the
10659 : deallocation forced by ix86_static_chain_on_stack. */
10660 1649992 : gcc_assert (m->fs.sp_valid);
10661 1649992 : gcc_assert (!m->fs.sp_realigned);
10662 1649992 : gcc_assert (!m->fs.fp_valid);
10663 1649992 : gcc_assert (!m->fs.realigned);
10664 1785639 : if (m->fs.sp_offset != UNITS_PER_WORD)
10665 : {
10666 50162 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10667 : GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10668 : style, true);
10669 : }
10670 : else
10671 1599830 : ix86_add_queued_cfa_restore_notes (get_last_insn ());
10672 :
10673 : /* Sibcall epilogues don't want a return instruction. */
10674 1649992 : if (style == 0)
10675 : {
10676 125166 : m->fs = frame_state_save;
10677 125166 : return;
10678 : }
10679 :
10680 1524826 : if (cfun->machine->func_type != TYPE_NORMAL)
10681 120 : emit_jump_insn (gen_interrupt_return ());
10682 1524706 : else if (crtl->args.pops_args && crtl->args.size)
10683 : {
10684 25987 : rtx popc = GEN_INT (crtl->args.pops_args);
10685 :
10686 : /* i386 can only pop 64K bytes. If asked to pop more, pop return
10687 : address, do explicit add, and jump indirectly to the caller. */
10688 :
10689 25987 : if (crtl->args.pops_args >= 65536)
10690 : {
10691 0 : rtx ecx = gen_rtx_REG (SImode, CX_REG);
10692 0 : rtx_insn *insn;
10693 :
10694 : /* There is no "pascal" calling convention in any 64bit ABI. */
10695 0 : gcc_assert (!TARGET_64BIT);
10696 :
10697 0 : insn = emit_insn (gen_pop (ecx));
10698 0 : m->fs.cfa_offset -= UNITS_PER_WORD;
10699 0 : m->fs.sp_offset -= UNITS_PER_WORD;
10700 :
10701 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10702 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
10703 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10704 0 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10705 0 : RTX_FRAME_RELATED_P (insn) = 1;
10706 :
10707 0 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10708 : popc, -1, true);
10709 0 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10710 : }
10711 : else
10712 25987 : emit_jump_insn (gen_simple_return_pop_internal (popc));
10713 : }
10714 1498719 : else if (!m->call_ms2sysv || !restore_stub_is_tail)
10715 : {
10716 : /* In case of return from EH a simple return cannot be used
10717 : as a return address will be compared with a shadow stack
10718 : return address. Use indirect jump instead. */
10719 1497596 : if (style == 2 && flag_cf_protection)
10720 : {
10721 : /* Register used in indirect jump must be in word_mode. But
10722 : Pmode may not be the same as word_mode for x32. */
10723 17 : rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10724 17 : rtx_insn *insn;
10725 :
10726 17 : insn = emit_insn (gen_pop (ecx));
10727 17 : m->fs.cfa_offset -= UNITS_PER_WORD;
10728 17 : m->fs.sp_offset -= UNITS_PER_WORD;
10729 :
10730 33 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10731 17 : x = gen_rtx_SET (stack_pointer_rtx, x);
10732 17 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10733 17 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10734 17 : RTX_FRAME_RELATED_P (insn) = 1;
10735 :
10736 17 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10737 17 : }
10738 : else
10739 1497579 : emit_jump_insn (gen_simple_return_internal ());
10740 : }
10741 :
10742 : /* Restore the state back to the state from the prologue,
10743 : so that it's correct for the next epilogue. */
10744 1524826 : m->fs = frame_state_save;
10745 : }
10746 :
10747 : /* Reset from the function's potential modifications. */
10748 :
10749 : static void
10750 1485481 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10751 : {
10752 1485481 : if (pic_offset_table_rtx
10753 1485481 : && !ix86_use_pseudo_pic_reg ())
10754 0 : SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10755 :
10756 1485481 : if (TARGET_MACHO)
10757 : {
10758 : rtx_insn *insn = get_last_insn ();
10759 : rtx_insn *deleted_debug_label = NULL;
10760 :
10761 : /* Mach-O doesn't support labels at the end of objects, so if
10762 : it looks like we might want one, take special action.
10763 : First, collect any sequence of deleted debug labels. */
10764 : while (insn
10765 : && NOTE_P (insn)
10766 : && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10767 : {
10768 : /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10769 : notes only, instead set their CODE_LABEL_NUMBER to -1,
10770 : otherwise there would be code generation differences
10771 : in between -g and -g0. */
10772 : if (NOTE_P (insn) && NOTE_KIND (insn)
10773 : == NOTE_INSN_DELETED_DEBUG_LABEL)
10774 : deleted_debug_label = insn;
10775 : insn = PREV_INSN (insn);
10776 : }
10777 :
10778 : /* If we have:
10779 : label:
10780 : barrier
10781 : then this needs to be detected, so skip past the barrier. */
10782 :
10783 : if (insn && BARRIER_P (insn))
10784 : insn = PREV_INSN (insn);
10785 :
10786 : /* Up to now we've only seen notes or barriers. */
10787 : if (insn)
10788 : {
10789 : if (LABEL_P (insn)
10790 : || (NOTE_P (insn)
10791 : && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10792 : /* Trailing label. */
10793 : fputs ("\tnop\n", file);
10794 : else if (cfun && ! cfun->is_thunk)
10795 : {
10796 : /* See if we have a completely empty function body, skipping
10797 : the special case of the picbase thunk emitted as asm. */
10798 : while (insn && ! INSN_P (insn))
10799 : insn = PREV_INSN (insn);
10800 : /* If we don't find any insns, we've got an empty function body;
10801 : I.e. completely empty - without a return or branch. This is
10802 : taken as the case where a function body has been removed
10803 : because it contains an inline __builtin_unreachable(). GCC
10804 : declares that reaching __builtin_unreachable() means UB so
10805 : we're not obliged to do anything special; however, we want
10806 : non-zero-sized function bodies. To meet this, and help the
10807 : user out, let's trap the case. */
10808 : if (insn == NULL)
10809 : fputs ("\tud2\n", file);
10810 : }
10811 : }
10812 : else if (deleted_debug_label)
10813 : for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10814 : if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10815 : CODE_LABEL_NUMBER (insn) = -1;
10816 : }
10817 1485481 : }
10818 :
10819 : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10820 :
10821 : void
10822 59 : ix86_print_patchable_function_entry (FILE *file,
10823 : unsigned HOST_WIDE_INT patch_area_size,
10824 : bool record_p)
10825 : {
10826 59 : if (cfun->machine->function_label_emitted)
10827 : {
10828 : /* NB: When ix86_print_patchable_function_entry is called after
10829 : function table has been emitted, we have inserted or queued
10830 : a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10831 : place. There is nothing to do here. */
10832 : return;
10833 : }
10834 :
10835 8 : default_print_patchable_function_entry (file, patch_area_size,
10836 : record_p);
10837 : }
10838 :
10839 : /* Output patchable area. NB: default_print_patchable_function_entry
10840 : isn't available in i386.md. */
10841 :
10842 : void
10843 51 : ix86_output_patchable_area (unsigned int patch_area_size,
10844 : bool record_p)
10845 : {
10846 51 : default_print_patchable_function_entry (asm_out_file,
10847 : patch_area_size,
10848 : record_p);
10849 51 : }
10850 :
10851 : /* Return a scratch register to use in the split stack prologue. The
10852 : split stack prologue is used for -fsplit-stack. It is the first
10853 : instructions in the function, even before the regular prologue.
10854 : The scratch register can be any caller-saved register which is not
10855 : used for parameters or for the static chain. */
10856 :
10857 : static unsigned int
10858 24613 : split_stack_prologue_scratch_regno (void)
10859 : {
10860 24613 : if (TARGET_64BIT)
10861 : return R11_REG;
10862 : else
10863 : {
10864 6950 : bool is_fastcall, is_thiscall;
10865 6950 : int regparm;
10866 :
10867 6950 : is_fastcall = (lookup_attribute ("fastcall",
10868 6950 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10869 : != NULL);
10870 6950 : is_thiscall = (lookup_attribute ("thiscall",
10871 6950 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10872 : != NULL);
10873 6950 : regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10874 :
10875 6950 : if (is_fastcall)
10876 : {
10877 0 : if (DECL_STATIC_CHAIN (cfun->decl))
10878 : {
10879 0 : sorry ("%<-fsplit-stack%> does not support fastcall with "
10880 : "nested function");
10881 0 : return INVALID_REGNUM;
10882 : }
10883 : return AX_REG;
10884 : }
10885 6950 : else if (is_thiscall)
10886 : {
10887 0 : if (!DECL_STATIC_CHAIN (cfun->decl))
10888 : return DX_REG;
10889 0 : return AX_REG;
10890 : }
10891 6950 : else if (regparm < 3)
10892 : {
10893 6950 : if (!DECL_STATIC_CHAIN (cfun->decl))
10894 : return CX_REG;
10895 : else
10896 : {
10897 459 : if (regparm >= 2)
10898 : {
10899 0 : sorry ("%<-fsplit-stack%> does not support 2 register "
10900 : "parameters for a nested function");
10901 0 : return INVALID_REGNUM;
10902 : }
10903 : return DX_REG;
10904 : }
10905 : }
10906 : else
10907 : {
10908 : /* FIXME: We could make this work by pushing a register
10909 : around the addition and comparison. */
10910 0 : sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10911 0 : return INVALID_REGNUM;
10912 : }
10913 : }
10914 : }
10915 :
10916 : /* A SYMBOL_REF for the function which allocates new stackspace for
10917 : -fsplit-stack. */
10918 :
10919 : static GTY(()) rtx split_stack_fn;
10920 :
10921 : /* A SYMBOL_REF for the more stack function when using the large model. */
10922 :
10923 : static GTY(()) rtx split_stack_fn_large;
10924 :
10925 : /* Return location of the stack guard value in the TLS block. */
10926 :
10927 : rtx
10928 259942 : ix86_split_stack_guard (void)
10929 : {
10930 259942 : int offset;
10931 259942 : addr_space_t as = DEFAULT_TLS_SEG_REG;
10932 259942 : rtx r;
10933 :
10934 259942 : gcc_assert (flag_split_stack);
10935 :
10936 : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10937 259942 : offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10938 : #else
10939 : gcc_unreachable ();
10940 : #endif
10941 :
10942 259942 : r = GEN_INT (offset);
10943 357899 : r = gen_const_mem (Pmode, r);
10944 259942 : set_mem_addr_space (r, as);
10945 :
10946 259942 : return r;
10947 : }
10948 :
10949 : /* Handle -fsplit-stack. These are the first instructions in the
10950 : function, even before the regular prologue. */
10951 :
10952 : void
10953 259932 : ix86_expand_split_stack_prologue (void)
10954 : {
10955 259932 : HOST_WIDE_INT allocate;
10956 259932 : unsigned HOST_WIDE_INT args_size;
10957 259932 : rtx_code_label *label;
10958 259932 : rtx limit, current, allocate_rtx, call_fusage;
10959 259932 : rtx_insn *call_insn;
10960 259932 : unsigned int scratch_regno = INVALID_REGNUM;
10961 259932 : rtx scratch_reg = NULL_RTX;
10962 259932 : rtx_code_label *varargs_label = NULL;
10963 259932 : rtx fn;
10964 :
10965 259932 : gcc_assert (flag_split_stack && reload_completed);
10966 :
10967 259932 : ix86_finalize_stack_frame_flags ();
10968 259932 : struct ix86_frame &frame = cfun->machine->frame;
10969 259932 : allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10970 :
10971 : /* This is the label we will branch to if we have enough stack
10972 : space. We expect the basic block reordering pass to reverse this
10973 : branch if optimizing, so that we branch in the unlikely case. */
10974 259932 : label = gen_label_rtx ();
10975 :
10976 : /* We need to compare the stack pointer minus the frame size with
10977 : the stack boundary in the TCB. The stack boundary always gives
10978 : us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10979 : can compare directly. Otherwise we need to do an addition. */
10980 :
10981 259932 : limit = ix86_split_stack_guard ();
10982 :
10983 259932 : if (allocate >= SPLIT_STACK_AVAILABLE
10984 235482 : || flag_force_indirect_call)
10985 : {
10986 24465 : scratch_regno = split_stack_prologue_scratch_regno ();
10987 24465 : if (scratch_regno == INVALID_REGNUM)
10988 0 : return;
10989 : }
10990 :
10991 259932 : if (allocate >= SPLIT_STACK_AVAILABLE)
10992 : {
10993 24450 : rtx offset;
10994 :
10995 : /* We need a scratch register to hold the stack pointer minus
10996 : the required frame size. Since this is the very start of the
10997 : function, the scratch register can be any caller-saved
10998 : register which is not used for parameters. */
10999 24450 : offset = GEN_INT (- allocate);
11000 :
11001 31346 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11002 24450 : if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11003 : {
11004 : /* We don't use gen_add in this case because it will
11005 : want to split to lea, but when not optimizing the insn
11006 : will not be split after this point. */
11007 31346 : emit_insn (gen_rtx_SET (scratch_reg,
11008 : gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11009 : offset)));
11010 : }
11011 : else
11012 : {
11013 0 : emit_move_insn (scratch_reg, offset);
11014 0 : emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
11015 : }
11016 : current = scratch_reg;
11017 : }
11018 : else
11019 235482 : current = stack_pointer_rtx;
11020 :
11021 259932 : ix86_expand_branch (GEU, current, limit, label);
11022 259932 : rtx_insn *jump_insn = get_last_insn ();
11023 259932 : JUMP_LABEL (jump_insn) = label;
11024 :
11025 : /* Mark the jump as very likely to be taken. */
11026 259932 : add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
11027 :
11028 259932 : if (split_stack_fn == NULL_RTX)
11029 : {
11030 5451 : split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11031 4347 : SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
11032 : }
11033 259932 : fn = split_stack_fn;
11034 :
11035 : /* Get more stack space. We pass in the desired stack space and the
11036 : size of the arguments to copy to the new stack. In 32-bit mode
11037 : we push the parameters; __morestack will return on a new stack
11038 : anyhow. In 64-bit mode we pass the parameters in r10 and
11039 : r11. */
11040 259932 : allocate_rtx = GEN_INT (allocate);
11041 259932 : args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
11042 259932 : call_fusage = NULL_RTX;
11043 259932 : rtx pop = NULL_RTX;
11044 259932 : if (TARGET_64BIT)
11045 : {
11046 161975 : rtx reg10, reg11;
11047 :
11048 161975 : reg10 = gen_rtx_REG (DImode, R10_REG);
11049 161975 : reg11 = gen_rtx_REG (DImode, R11_REG);
11050 :
11051 : /* If this function uses a static chain, it will be in %r10.
11052 : Preserve it across the call to __morestack. */
11053 161975 : if (DECL_STATIC_CHAIN (cfun->decl))
11054 : {
11055 7505 : rtx rax;
11056 :
11057 7505 : rax = gen_rtx_REG (word_mode, AX_REG);
11058 7505 : emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11059 7505 : use_reg (&call_fusage, rax);
11060 : }
11061 :
11062 161975 : if (flag_force_indirect_call
11063 161960 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11064 : {
11065 16 : HOST_WIDE_INT argval;
11066 :
11067 16 : if (split_stack_fn_large == NULL_RTX)
11068 : {
11069 7 : split_stack_fn_large
11070 7 : = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11071 7 : SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
11072 : }
11073 :
11074 16 : fn = split_stack_fn_large;
11075 :
11076 16 : if (ix86_cmodel == CM_LARGE_PIC)
11077 : {
11078 3 : rtx_code_label *label;
11079 3 : rtx x;
11080 :
11081 3 : gcc_assert (Pmode == DImode);
11082 :
11083 3 : label = gen_label_rtx ();
11084 3 : emit_label (label);
11085 3 : LABEL_PRESERVE_P (label) = 1;
11086 3 : emit_insn (gen_set_rip_rex64 (reg10, label));
11087 3 : emit_insn (gen_set_got_offset_rex64 (reg11, label));
11088 3 : emit_insn (gen_add2_insn (reg10, reg11));
11089 3 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
11090 3 : x = gen_rtx_CONST (Pmode, x);
11091 3 : emit_move_insn (reg11, x);
11092 3 : x = gen_rtx_PLUS (Pmode, reg10, reg11);
11093 3 : x = gen_const_mem (Pmode, x);
11094 3 : fn = copy_to_suggested_reg (x, reg11, Pmode);
11095 : }
11096 13 : else if (ix86_cmodel == CM_LARGE)
11097 1 : fn = copy_to_suggested_reg (fn, reg11, Pmode);
11098 :
11099 : /* When using the large model we need to load the address
11100 : into a register, and we've run out of registers. So we
11101 : switch to a different calling convention, and we call a
11102 : different function: __morestack_large. We pass the
11103 : argument size in the upper 32 bits of r10 and pass the
11104 : frame size in the lower 32 bits. */
11105 16 : gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
11106 16 : gcc_assert ((args_size & 0xffffffff) == args_size);
11107 :
11108 16 : argval = ((args_size << 16) << 16) + allocate;
11109 16 : emit_move_insn (reg10, GEN_INT (argval));
11110 16 : }
11111 : else
11112 : {
11113 161959 : emit_move_insn (reg10, allocate_rtx);
11114 161959 : emit_move_insn (reg11, GEN_INT (args_size));
11115 161959 : use_reg (&call_fusage, reg11);
11116 : }
11117 :
11118 161975 : use_reg (&call_fusage, reg10);
11119 : }
11120 : else
11121 : {
11122 97957 : if (flag_force_indirect_call && flag_pic)
11123 : {
11124 0 : rtx x;
11125 :
11126 0 : gcc_assert (Pmode == SImode);
11127 :
11128 0 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11129 :
11130 0 : emit_insn (gen_set_got (scratch_reg));
11131 0 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
11132 : UNSPEC_GOT);
11133 0 : x = gen_rtx_CONST (Pmode, x);
11134 0 : x = gen_rtx_PLUS (Pmode, scratch_reg, x);
11135 0 : x = gen_const_mem (Pmode, x);
11136 0 : fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
11137 : }
11138 :
11139 97957 : rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
11140 195914 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
11141 97957 : insn = emit_insn (gen_push (allocate_rtx));
11142 195914 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
11143 195914 : pop = GEN_INT (2 * UNITS_PER_WORD);
11144 : }
11145 :
11146 259932 : if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
11147 : {
11148 12 : scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
11149 :
11150 12 : if (GET_MODE (fn) != word_mode)
11151 0 : fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
11152 :
11153 12 : fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
11154 : }
11155 :
11156 259932 : call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11157 259932 : GEN_INT (UNITS_PER_WORD), constm1_rtx,
11158 : pop, false);
11159 259932 : add_function_usage_to (call_insn, call_fusage);
11160 259932 : if (!TARGET_64BIT)
11161 97957 : add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
11162 : /* Indicate that this function can't jump to non-local gotos. */
11163 259932 : make_reg_eh_region_note_nothrow_nononlocal (call_insn);
11164 :
11165 : /* In order to make call/return prediction work right, we now need
11166 : to execute a return instruction. See
11167 : libgcc/config/i386/morestack.S for the details on how this works.
11168 :
11169 : For flow purposes gcc must not see this as a return
11170 : instruction--we need control flow to continue at the subsequent
11171 : label. Therefore, we use an unspec. */
11172 259932 : gcc_assert (crtl->args.pops_args < 65536);
11173 259932 : rtx_insn *ret_insn
11174 259932 : = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11175 :
11176 259932 : if ((flag_cf_protection & CF_BRANCH))
11177 : {
11178 : /* Insert ENDBR since __morestack will jump back here via indirect
11179 : call. */
11180 21 : rtx cet_eb = gen_nop_endbr ();
11181 21 : emit_insn_after (cet_eb, ret_insn);
11182 : }
11183 :
11184 : /* If we are in 64-bit mode and this function uses a static chain,
11185 : we saved %r10 in %rax before calling _morestack. */
11186 259932 : if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11187 7505 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11188 : gen_rtx_REG (word_mode, AX_REG));
11189 :
11190 : /* If this function calls va_start, we need to store a pointer to
11191 : the arguments on the old stack, because they may not have been
11192 : all copied to the new stack. At this point the old stack can be
11193 : found at the frame pointer value used by __morestack, because
11194 : __morestack has set that up before calling back to us. Here we
11195 : store that pointer in a scratch register, and in
11196 : ix86_expand_prologue we store the scratch register in a stack
11197 : slot. */
11198 259932 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11199 : {
11200 12 : rtx frame_reg;
11201 12 : int words;
11202 :
11203 12 : scratch_regno = split_stack_prologue_scratch_regno ();
11204 16 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11205 16 : frame_reg = gen_rtx_REG (Pmode, BP_REG);
11206 :
11207 : /* 64-bit:
11208 : fp -> old fp value
11209 : return address within this function
11210 : return address of caller of this function
11211 : stack arguments
11212 : So we add three words to get to the stack arguments.
11213 :
11214 : 32-bit:
11215 : fp -> old fp value
11216 : return address within this function
11217 : first argument to __morestack
11218 : second argument to __morestack
11219 : return address of caller of this function
11220 : stack arguments
11221 : So we add five words to get to the stack arguments.
11222 : */
11223 12 : words = TARGET_64BIT ? 3 : 5;
11224 20 : emit_insn (gen_rtx_SET (scratch_reg,
11225 : plus_constant (Pmode, frame_reg,
11226 : words * UNITS_PER_WORD)));
11227 :
11228 12 : varargs_label = gen_label_rtx ();
11229 12 : emit_jump_insn (gen_jump (varargs_label));
11230 12 : JUMP_LABEL (get_last_insn ()) = varargs_label;
11231 :
11232 12 : emit_barrier ();
11233 : }
11234 :
11235 259932 : emit_label (label);
11236 259932 : LABEL_NUSES (label) = 1;
11237 :
11238 : /* If this function calls va_start, we now have to set the scratch
11239 : register for the case where we do not call __morestack. In this
11240 : case we need to set it based on the stack pointer. */
11241 259932 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11242 : {
11243 20 : emit_insn (gen_rtx_SET (scratch_reg,
11244 : plus_constant (Pmode, stack_pointer_rtx,
11245 : UNITS_PER_WORD)));
11246 :
11247 12 : emit_label (varargs_label);
11248 12 : LABEL_NUSES (varargs_label) = 1;
11249 : }
11250 : }
11251 :
11252 : /* We may have to tell the dataflow pass that the split stack prologue
11253 : is initializing a scratch register. */
11254 :
11255 : static void
11256 15851749 : ix86_live_on_entry (bitmap regs)
11257 : {
11258 15851749 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11259 : {
11260 124 : gcc_assert (flag_split_stack);
11261 124 : bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11262 : }
11263 15851749 : }
11264 :
11265 : /* Extract the parts of an RTL expression that is a valid memory address
11266 : for an instruction. Return false if the structure of the address is
11267 : grossly off. */
11268 :
11269 : bool
11270 4333103405 : ix86_decompose_address (rtx addr, struct ix86_address *out)
11271 : {
11272 4333103405 : rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11273 4333103405 : rtx base_reg, index_reg;
11274 4333103405 : HOST_WIDE_INT scale = 1;
11275 4333103405 : rtx scale_rtx = NULL_RTX;
11276 4333103405 : rtx tmp;
11277 4333103405 : addr_space_t seg = ADDR_SPACE_GENERIC;
11278 :
11279 : /* Allow zero-extended SImode addresses,
11280 : they will be emitted with addr32 prefix. */
11281 4333103405 : if (TARGET_64BIT && GET_MODE (addr) == DImode)
11282 : {
11283 2290324611 : if (GET_CODE (addr) == ZERO_EXTEND
11284 2176855 : && GET_MODE (XEXP (addr, 0)) == SImode)
11285 : {
11286 2083078 : addr = XEXP (addr, 0);
11287 2083078 : if (CONST_INT_P (addr))
11288 : return false;
11289 : }
11290 2288241533 : else if (GET_CODE (addr) == AND)
11291 : {
11292 2748438 : rtx mask = XEXP (addr, 1);
11293 2748438 : rtx shift_val;
11294 :
11295 2748438 : if (const_32bit_mask (mask, DImode)
11296 : /* For ASHIFT inside AND, combine will not generate
11297 : canonical zero-extend. Merge mask for AND and shift_count
11298 : to check if it is canonical zero-extend. */
11299 2748438 : || (CONST_INT_P (mask)
11300 1766410 : && GET_CODE (XEXP (addr, 0)) == ASHIFT
11301 141575 : && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11302 138478 : && ((UINTVAL (mask)
11303 138478 : | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11304 : == HOST_WIDE_INT_UC (0xffffffff))))
11305 : {
11306 82793 : addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11307 82793 : if (addr == NULL_RTX)
11308 : return false;
11309 :
11310 82793 : if (CONST_INT_P (addr))
11311 : return false;
11312 : }
11313 : }
11314 : }
11315 :
11316 : /* Allow SImode subregs of DImode addresses,
11317 : they will be emitted with addr32 prefix. */
11318 4333103405 : if (TARGET_64BIT && GET_MODE (addr) == SImode)
11319 : {
11320 17458230 : if (SUBREG_P (addr)
11321 208839 : && GET_MODE (SUBREG_REG (addr)) == DImode)
11322 : {
11323 187364 : addr = SUBREG_REG (addr);
11324 187364 : if (CONST_INT_P (addr))
11325 : return false;
11326 : }
11327 : }
11328 :
11329 4333103405 : if (REG_P (addr))
11330 : base = addr;
11331 : else if (SUBREG_P (addr))
11332 : {
11333 444617 : if (REG_P (SUBREG_REG (addr)))
11334 : base = addr;
11335 : else
11336 : return false;
11337 : }
11338 : else if (GET_CODE (addr) == PLUS)
11339 : {
11340 : rtx addends[4], op;
11341 : int n = 0, i;
11342 :
11343 : op = addr;
11344 3162729874 : do
11345 : {
11346 3162729874 : if (n >= 4)
11347 643338806 : return false;
11348 3162723716 : addends[n++] = XEXP (op, 1);
11349 3162723716 : op = XEXP (op, 0);
11350 : }
11351 3162723716 : while (GET_CODE (op) == PLUS);
11352 3098015226 : if (n >= 4)
11353 : return false;
11354 3098007025 : addends[n] = op;
11355 :
11356 8075134068 : for (i = n; i >= 0; --i)
11357 : {
11358 5620451490 : op = addends[i];
11359 5620451490 : switch (GET_CODE (op))
11360 : {
11361 61357954 : case MULT:
11362 61357954 : if (index)
11363 : return false;
11364 61316177 : index = XEXP (op, 0);
11365 61316177 : scale_rtx = XEXP (op, 1);
11366 61316177 : break;
11367 :
11368 12800486 : case ASHIFT:
11369 12800486 : if (index)
11370 : return false;
11371 12729299 : index = XEXP (op, 0);
11372 12729299 : tmp = XEXP (op, 1);
11373 12729299 : if (!CONST_INT_P (tmp))
11374 : return false;
11375 12714712 : scale = INTVAL (tmp);
11376 12714712 : if ((unsigned HOST_WIDE_INT) scale > 3)
11377 : return false;
11378 12301754 : scale = 1 << scale;
11379 12301754 : break;
11380 :
11381 1129335 : case ZERO_EXTEND:
11382 1129335 : op = XEXP (op, 0);
11383 1129335 : if (GET_CODE (op) != UNSPEC)
11384 : return false;
11385 : /* FALLTHRU */
11386 :
11387 707203 : case UNSPEC:
11388 707203 : if (XINT (op, 1) == UNSPEC_TP
11389 698916 : && TARGET_TLS_DIRECT_SEG_REFS
11390 698916 : && seg == ADDR_SPACE_GENERIC)
11391 698916 : seg = DEFAULT_TLS_SEG_REG;
11392 : else
11393 : return false;
11394 : break;
11395 :
11396 523828 : case SUBREG:
11397 523828 : if (!REG_P (SUBREG_REG (op)))
11398 : return false;
11399 : /* FALLTHRU */
11400 :
11401 2526067078 : case REG:
11402 2526067078 : if (!base)
11403 : base = op;
11404 83146529 : else if (!index)
11405 : index = op;
11406 : else
11407 : return false;
11408 : break;
11409 :
11410 2377566609 : case CONST:
11411 2377566609 : case CONST_INT:
11412 2377566609 : case SYMBOL_REF:
11413 2377566609 : case LABEL_REF:
11414 2377566609 : if (disp)
11415 : return false;
11416 : disp = op;
11417 : break;
11418 :
11419 : default:
11420 : return false;
11421 : }
11422 : }
11423 : }
11424 : else if (GET_CODE (addr) == MULT)
11425 : {
11426 3584313 : index = XEXP (addr, 0); /* index*scale */
11427 3584313 : scale_rtx = XEXP (addr, 1);
11428 : }
11429 : else if (GET_CODE (addr) == ASHIFT)
11430 : {
11431 : /* We're called for lea too, which implements ashift on occasion. */
11432 3266213 : index = XEXP (addr, 0);
11433 3266213 : tmp = XEXP (addr, 1);
11434 3266213 : if (!CONST_INT_P (tmp))
11435 : return false;
11436 2879815 : scale = INTVAL (tmp);
11437 2879815 : if ((unsigned HOST_WIDE_INT) scale > 3)
11438 : return false;
11439 2135857 : scale = 1 << scale;
11440 : }
11441 : else
11442 : disp = addr; /* displacement */
11443 :
11444 2460402748 : if (index)
11445 : {
11446 152788878 : if (REG_P (index))
11447 : ;
11448 4028041 : else if (SUBREG_P (index)
11449 278174 : && REG_P (SUBREG_REG (index)))
11450 : ;
11451 : else
11452 : return false;
11453 : }
11454 :
11455 : /* Extract the integral value of scale. */
11456 3684825691 : if (scale_rtx)
11457 : {
11458 56492876 : if (!CONST_INT_P (scale_rtx))
11459 : return false;
11460 55896536 : scale = INTVAL (scale_rtx);
11461 : }
11462 :
11463 3684229351 : base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11464 3684229351 : index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11465 :
11466 : /* Avoid useless 0 displacement. */
11467 3684229351 : if (disp == const0_rtx && (base || index))
11468 3684229351 : disp = NULL_RTX;
11469 :
11470 : /* Allow arg pointer and stack pointer as index if there is not scaling. */
11471 2682615586 : if (base_reg && index_reg && scale == 1
11472 3765887000 : && (REGNO (index_reg) == ARG_POINTER_REGNUM
11473 : || REGNO (index_reg) == FRAME_POINTER_REGNUM
11474 : || REGNO (index_reg) == SP_REG))
11475 : {
11476 : std::swap (base, index);
11477 : std::swap (base_reg, index_reg);
11478 : }
11479 :
11480 : /* Special case: %ebp cannot be encoded as a base without a displacement.
11481 : Similarly %r13. */
11482 323352119 : if (!disp && base_reg
11483 4003381402 : && (REGNO (base_reg) == ARG_POINTER_REGNUM
11484 : || REGNO (base_reg) == FRAME_POINTER_REGNUM
11485 : || REGNO (base_reg) == BP_REG
11486 : || REGNO (base_reg) == R13_REG))
11487 : disp = const0_rtx;
11488 :
11489 : /* Special case: on K6, [%esi] makes the instruction vector decoded.
11490 : Avoid this by transforming to [%esi+0].
11491 : Reload calls address legitimization without cfun defined, so we need
11492 : to test cfun for being non-NULL. */
11493 0 : if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11494 0 : && base_reg && !index_reg && !disp
11495 3684229351 : && REGNO (base_reg) == SI_REG)
11496 0 : disp = const0_rtx;
11497 :
11498 : /* Special case: encode reg+reg instead of reg*2. */
11499 3684229351 : if (!base && index && scale == 2)
11500 1001613765 : base = index, base_reg = index_reg, scale = 1;
11501 :
11502 : /* Special case: scaling cannot be encoded without base or displacement. */
11503 1001613765 : if (!base && !disp && index && scale != 1)
11504 3261386 : disp = const0_rtx;
11505 :
11506 3684229351 : out->base = base;
11507 3684229351 : out->index = index;
11508 3684229351 : out->disp = disp;
11509 3684229351 : out->scale = scale;
11510 3684229351 : out->seg = seg;
11511 :
11512 3684229351 : return true;
11513 : }
11514 :
11515 : /* Return cost of the memory address x.
11516 : For i386, it is better to use a complex address than let gcc copy
11517 : the address into a reg and make a new pseudo. But not if the address
11518 : requires to two regs - that would mean more pseudos with longer
11519 : lifetimes. */
11520 : static int
11521 10912481 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11522 : {
11523 10912481 : struct ix86_address parts;
11524 10912481 : int cost = 1;
11525 10912481 : int ok = ix86_decompose_address (x, &parts);
11526 :
11527 10912481 : gcc_assert (ok);
11528 :
11529 10912481 : if (parts.base && SUBREG_P (parts.base))
11530 500 : parts.base = SUBREG_REG (parts.base);
11531 10912481 : if (parts.index && SUBREG_P (parts.index))
11532 21 : parts.index = SUBREG_REG (parts.index);
11533 :
11534 : /* Attempt to minimize number of registers in the address by increasing
11535 : address cost for each used register. We don't increase address cost
11536 : for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11537 : is not invariant itself it most likely means that base or index is not
11538 : invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11539 : which is not profitable for x86. */
11540 10912481 : if (parts.base
11541 9501023 : && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11542 20113846 : && (current_pass->type == GIMPLE_PASS
11543 2795359 : || !pic_offset_table_rtx
11544 132804 : || !REG_P (parts.base)
11545 132804 : || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11546 : cost++;
11547 :
11548 10912481 : if (parts.index
11549 5213944 : && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11550 16112223 : && (current_pass->type == GIMPLE_PASS
11551 672179 : || !pic_offset_table_rtx
11552 57261 : || !REG_P (parts.index)
11553 57261 : || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11554 5198464 : cost++;
11555 :
11556 : /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11557 : since it's predecode logic can't detect the length of instructions
11558 : and it degenerates to vector decoded. Increase cost of such
11559 : addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11560 : to split such addresses or even refuse such addresses at all.
11561 :
11562 : Following addressing modes are affected:
11563 : [base+scale*index]
11564 : [scale*index+disp]
11565 : [base+index]
11566 :
11567 : The first and last case may be avoidable by explicitly coding the zero in
11568 : memory address, but I don't have AMD-K6 machine handy to check this
11569 : theory. */
11570 :
11571 10912481 : if (TARGET_CPU_P (K6)
11572 0 : && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11573 0 : || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11574 0 : || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11575 0 : cost += 10;
11576 :
11577 10912481 : return cost;
11578 : }
11579 :
11580 : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
11581 :
11582 : bool
11583 1182116 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
11584 : unsigned int align,
11585 : enum by_pieces_operation op,
11586 : bool speed_p)
11587 : {
11588 : /* Return true when we are currently expanding memcpy/memset epilogue
11589 : with move_by_pieces or store_by_pieces. */
11590 1182116 : if (cfun->machine->by_pieces_in_use)
11591 : return true;
11592 :
11593 1180010 : return default_use_by_pieces_infrastructure_p (size, align, op,
11594 1180010 : speed_p);
11595 : }
11596 :
11597 : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11598 : this is used for to form addresses to local data when -fPIC is in
11599 : use. */
11600 :
11601 : static bool
11602 0 : darwin_local_data_pic (rtx disp)
11603 : {
11604 0 : return (GET_CODE (disp) == UNSPEC
11605 0 : && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11606 : }
11607 :
11608 : /* True if the function symbol operand X should be loaded from GOT.
11609 : If CALL_P is true, X is a call operand.
11610 :
11611 : NB: -mno-direct-extern-access doesn't force load from GOT for
11612 : call.
11613 :
11614 : NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11615 : statements, since a PIC register could not be available at the
11616 : call site. */
11617 :
11618 : bool
11619 1851435129 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
11620 : {
11621 96351031 : return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11622 : && !TARGET_PECOFF && !TARGET_MACHO
11623 1848566787 : && (!flag_pic || this_is_asm_operands)
11624 1828197842 : && ix86_cmodel != CM_LARGE
11625 1828191813 : && ix86_cmodel != CM_LARGE_PIC
11626 1828191812 : && SYMBOL_REF_P (x)
11627 1828191810 : && ((!call_p
11628 1822756886 : && (!ix86_direct_extern_access
11629 1822754616 : || (SYMBOL_REF_DECL (x)
11630 1641398617 : && lookup_attribute ("nodirect_extern_access",
11631 1641398617 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11632 1828189086 : || (SYMBOL_REF_FUNCTION_P (x)
11633 689521307 : && (!flag_plt
11634 689516896 : || (SYMBOL_REF_DECL (x)
11635 689516896 : && lookup_attribute ("noplt",
11636 689516896 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11637 1851442663 : && !SYMBOL_REF_LOCAL_P (x));
11638 : }
11639 :
11640 : /* Determine if a given RTX is a valid constant. We already know this
11641 : satisfies CONSTANT_P. */
11642 :
11643 : static bool
11644 1557466768 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
11645 : {
11646 1557466768 : switch (GET_CODE (x))
11647 : {
11648 138469406 : case CONST:
11649 138469406 : x = XEXP (x, 0);
11650 :
11651 138469406 : if (GET_CODE (x) == PLUS)
11652 : {
11653 138352943 : if (!CONST_INT_P (XEXP (x, 1)))
11654 : return false;
11655 138352943 : x = XEXP (x, 0);
11656 : }
11657 :
11658 138469406 : if (TARGET_MACHO && darwin_local_data_pic (x))
11659 : return true;
11660 :
11661 : /* Only some unspecs are valid as "constants". */
11662 138469406 : if (GET_CODE (x) == UNSPEC)
11663 493535 : switch (XINT (x, 1))
11664 : {
11665 21077 : case UNSPEC_GOT:
11666 21077 : case UNSPEC_GOTOFF:
11667 21077 : case UNSPEC_PLTOFF:
11668 21077 : return TARGET_64BIT;
11669 472095 : case UNSPEC_TPOFF:
11670 472095 : case UNSPEC_NTPOFF:
11671 472095 : x = XVECEXP (x, 0, 0);
11672 472095 : return (SYMBOL_REF_P (x)
11673 472095 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11674 275 : case UNSPEC_DTPOFF:
11675 275 : x = XVECEXP (x, 0, 0);
11676 275 : return (SYMBOL_REF_P (x)
11677 275 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11678 0 : case UNSPEC_SECREL32:
11679 0 : x = XVECEXP (x, 0, 0);
11680 0 : return SYMBOL_REF_P (x);
11681 : default:
11682 : return false;
11683 : }
11684 :
11685 : /* We must have drilled down to a symbol. */
11686 137975871 : if (LABEL_REF_P (x))
11687 : return true;
11688 137970503 : if (!SYMBOL_REF_P (x))
11689 : return false;
11690 : /* FALLTHRU */
11691 :
11692 927946124 : case SYMBOL_REF:
11693 : /* TLS symbols are never valid. */
11694 927946124 : if (SYMBOL_REF_TLS_MODEL (x))
11695 : return false;
11696 :
11697 : /* DLLIMPORT symbols are never valid. */
11698 927842462 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11699 : && SYMBOL_REF_DLLIMPORT_P (x))
11700 : return false;
11701 :
11702 : #if TARGET_MACHO
11703 : /* mdynamic-no-pic */
11704 : if (MACHO_DYNAMIC_NO_PIC_P)
11705 : return machopic_symbol_defined_p (x);
11706 : #endif
11707 :
11708 : /* External function address should be loaded
11709 : via the GOT slot to avoid PLT. */
11710 927842462 : if (ix86_force_load_from_GOT_p (x))
11711 : return false;
11712 :
11713 : break;
11714 :
11715 608000257 : CASE_CONST_SCALAR_INT:
11716 608000257 : if (ix86_endbr_immediate_operand (x, VOIDmode))
11717 : return false;
11718 :
11719 608000056 : switch (mode)
11720 : {
11721 1461191 : case E_TImode:
11722 1461191 : if (TARGET_64BIT)
11723 : return true;
11724 : /* FALLTHRU */
11725 25719 : case E_OImode:
11726 25719 : case E_XImode:
11727 25719 : if (!standard_sse_constant_p (x, mode)
11728 42642 : && GET_MODE_SIZE (TARGET_AVX512F
11729 : ? XImode
11730 : : (TARGET_AVX
11731 : ? OImode
11732 : : (TARGET_SSE2
11733 16923 : ? TImode : DImode))) < GET_MODE_SIZE (mode))
11734 : return false;
11735 : default:
11736 : break;
11737 : }
11738 : break;
11739 :
11740 8675090 : case CONST_VECTOR:
11741 8675090 : if (!standard_sse_constant_p (x, mode))
11742 : return false;
11743 : break;
11744 :
11745 7672047 : case CONST_DOUBLE:
11746 7672047 : if (mode == E_BFmode)
11747 : return false;
11748 :
11749 : default:
11750 : break;
11751 : }
11752 :
11753 : /* Otherwise we handle everything else in the move patterns. */
11754 : return true;
11755 : }
11756 :
11757 : /* Determine if it's legal to put X into the constant pool. This
11758 : is not possible for the address of thread-local symbols, which
11759 : is checked above. */
11760 :
11761 : static bool
11762 61674797 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11763 : {
11764 : /* We can put any immediate constant in memory. */
11765 61674797 : switch (GET_CODE (x))
11766 : {
11767 : CASE_CONST_ANY:
11768 : return false;
11769 :
11770 1798665 : default:
11771 1798665 : break;
11772 : }
11773 :
11774 1798665 : return !ix86_legitimate_constant_p (mode, x);
11775 : }
11776 :
11777 : /* Return a unique alias set for the GOT. */
11778 :
11779 : alias_set_type
11780 188822 : ix86_GOT_alias_set (void)
11781 : {
11782 188822 : static alias_set_type set = -1;
11783 188822 : if (set == -1)
11784 2966 : set = new_alias_set ();
11785 188822 : return set;
11786 : }
11787 :
11788 : /* Nonzero if the constant value X is a legitimate general operand
11789 : when generating PIC code. It is given that flag_pic is on and
11790 : that X satisfies CONSTANT_P. */
11791 :
11792 : bool
11793 126471771 : legitimate_pic_operand_p (rtx x)
11794 : {
11795 126471771 : rtx inner;
11796 :
11797 126471771 : switch (GET_CODE (x))
11798 : {
11799 2514255 : case CONST:
11800 2514255 : inner = XEXP (x, 0);
11801 2514255 : if (GET_CODE (inner) == PLUS
11802 357596 : && CONST_INT_P (XEXP (inner, 1)))
11803 357596 : inner = XEXP (inner, 0);
11804 :
11805 : /* Only some unspecs are valid as "constants". */
11806 2514255 : if (GET_CODE (inner) == UNSPEC)
11807 2264627 : switch (XINT (inner, 1))
11808 : {
11809 2203964 : case UNSPEC_GOT:
11810 2203964 : case UNSPEC_GOTOFF:
11811 2203964 : case UNSPEC_PLTOFF:
11812 2203964 : return TARGET_64BIT;
11813 0 : case UNSPEC_TPOFF:
11814 0 : x = XVECEXP (inner, 0, 0);
11815 0 : return (SYMBOL_REF_P (x)
11816 0 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11817 0 : case UNSPEC_SECREL32:
11818 0 : x = XVECEXP (inner, 0, 0);
11819 0 : return SYMBOL_REF_P (x);
11820 0 : case UNSPEC_MACHOPIC_OFFSET:
11821 0 : return legitimate_pic_address_disp_p (x);
11822 : default:
11823 : return false;
11824 : }
11825 : /* FALLTHRU */
11826 :
11827 7011984 : case SYMBOL_REF:
11828 7011984 : case LABEL_REF:
11829 7011984 : return legitimate_pic_address_disp_p (x);
11830 :
11831 : default:
11832 : return true;
11833 : }
11834 : }
11835 :
11836 : /* Determine if a given CONST RTX is a valid memory displacement
11837 : in PIC mode. */
11838 :
11839 : bool
11840 65569063 : legitimate_pic_address_disp_p (rtx disp)
11841 : {
11842 65569063 : bool saw_plus;
11843 :
11844 : /* In 64bit mode we can allow direct addresses of symbols and labels
11845 : when they are not dynamic symbols. */
11846 65569063 : if (TARGET_64BIT)
11847 : {
11848 40364014 : rtx op0 = disp, op1;
11849 :
11850 40364014 : switch (GET_CODE (disp))
11851 : {
11852 : case LABEL_REF:
11853 : return true;
11854 :
11855 10967101 : case CONST:
11856 10967101 : if (GET_CODE (XEXP (disp, 0)) != PLUS)
11857 : break;
11858 1173060 : op0 = XEXP (XEXP (disp, 0), 0);
11859 1173060 : op1 = XEXP (XEXP (disp, 0), 1);
11860 1173060 : if (!CONST_INT_P (op1))
11861 : break;
11862 1173060 : if (GET_CODE (op0) == UNSPEC
11863 296 : && (XINT (op0, 1) == UNSPEC_DTPOFF
11864 296 : || XINT (op0, 1) == UNSPEC_NTPOFF)
11865 1173356 : && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11866 : return true;
11867 1172764 : if (INTVAL (op1) >= 16*1024*1024
11868 1172764 : || INTVAL (op1) < -16*1024*1024)
11869 : break;
11870 1172676 : if (LABEL_REF_P (op0))
11871 : return true;
11872 1172676 : if (GET_CODE (op0) == CONST
11873 0 : && GET_CODE (XEXP (op0, 0)) == UNSPEC
11874 0 : && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11875 : return true;
11876 1172676 : if (GET_CODE (op0) == UNSPEC
11877 0 : && XINT (op0, 1) == UNSPEC_PCREL)
11878 : return true;
11879 1172676 : if (!SYMBOL_REF_P (op0))
11880 : break;
11881 : /* FALLTHRU */
11882 :
11883 30346743 : case SYMBOL_REF:
11884 : /* TLS references should always be enclosed in UNSPEC.
11885 : The dllimported symbol needs always to be resolved. */
11886 30346743 : if (SYMBOL_REF_TLS_MODEL (op0)
11887 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11888 : return false;
11889 :
11890 30191212 : if (TARGET_PECOFF)
11891 : {
11892 : #if TARGET_PECOFF
11893 : if (is_imported_p (op0))
11894 : return true;
11895 : #endif
11896 :
11897 : if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11898 : break;
11899 :
11900 : /* Non-external-weak function symbols need to be resolved only
11901 : for the large model. Non-external symbols don't need to be
11902 : resolved for large and medium models. For the small model,
11903 : we don't need to resolve anything here. */
11904 : if ((ix86_cmodel != CM_LARGE_PIC
11905 : && SYMBOL_REF_FUNCTION_P (op0)
11906 : && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11907 : || !SYMBOL_REF_EXTERNAL_P (op0)
11908 : || ix86_cmodel == CM_SMALL_PIC)
11909 : return true;
11910 : }
11911 30191212 : else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11912 30191208 : && (SYMBOL_REF_LOCAL_P (op0)
11913 18418178 : || ((ix86_direct_extern_access
11914 36665050 : && !(SYMBOL_REF_DECL (op0)
11915 18247035 : && lookup_attribute ("nodirect_extern_access",
11916 18247035 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11917 : && HAVE_LD_PIE_COPYRELOC
11918 18417852 : && flag_pie
11919 34047 : && !SYMBOL_REF_WEAK (op0)
11920 33659 : && !SYMBOL_REF_FUNCTION_P (op0)))
11921 41967975 : && ix86_cmodel != CM_LARGE_PIC)
11922 : return true;
11923 : break;
11924 :
11925 : default:
11926 : break;
11927 : }
11928 : }
11929 53417491 : if (GET_CODE (disp) != CONST)
11930 : return false;
11931 15017204 : disp = XEXP (disp, 0);
11932 :
11933 15017204 : if (TARGET_64BIT)
11934 : {
11935 : /* We are unsafe to allow PLUS expressions. This limit allowed distance
11936 : of GOT tables. We should not need these anyway. */
11937 9846434 : if (GET_CODE (disp) != UNSPEC
11938 9794041 : || (XINT (disp, 1) != UNSPEC_GOTPCREL
11939 9794041 : && XINT (disp, 1) != UNSPEC_GOTOFF
11940 : && XINT (disp, 1) != UNSPEC_PCREL
11941 : && XINT (disp, 1) != UNSPEC_PLTOFF))
11942 : return false;
11943 :
11944 9794041 : if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11945 9794041 : && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
11946 : return false;
11947 : return true;
11948 : }
11949 :
11950 5170770 : saw_plus = false;
11951 5170770 : if (GET_CODE (disp) == PLUS)
11952 : {
11953 588497 : if (!CONST_INT_P (XEXP (disp, 1)))
11954 : return false;
11955 588497 : disp = XEXP (disp, 0);
11956 588497 : saw_plus = true;
11957 : }
11958 :
11959 5170770 : if (TARGET_MACHO && darwin_local_data_pic (disp))
11960 : return true;
11961 :
11962 5170770 : if (GET_CODE (disp) != UNSPEC)
11963 : return false;
11964 :
11965 5005592 : switch (XINT (disp, 1))
11966 : {
11967 2268339 : case UNSPEC_GOT:
11968 2268339 : if (saw_plus)
11969 : return false;
11970 : /* We need to check for both symbols and labels because VxWorks loads
11971 : text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11972 : details. */
11973 2268338 : return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11974 2268338 : || LABEL_REF_P (XVECEXP (disp, 0, 0)));
11975 2737253 : case UNSPEC_GOTOFF:
11976 : /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11977 : While ABI specify also 32bit relocation but we don't produce it in
11978 : small PIC model at all. */
11979 2737253 : if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11980 2737253 : || LABEL_REF_P (XVECEXP (disp, 0, 0)))
11981 : && !TARGET_64BIT)
11982 5474506 : return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11983 : return false;
11984 0 : case UNSPEC_GOTTPOFF:
11985 0 : case UNSPEC_GOTNTPOFF:
11986 0 : case UNSPEC_INDNTPOFF:
11987 0 : if (saw_plus)
11988 : return false;
11989 0 : disp = XVECEXP (disp, 0, 0);
11990 0 : return (SYMBOL_REF_P (disp)
11991 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11992 0 : case UNSPEC_NTPOFF:
11993 0 : disp = XVECEXP (disp, 0, 0);
11994 0 : return (SYMBOL_REF_P (disp)
11995 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11996 0 : case UNSPEC_DTPOFF:
11997 0 : disp = XVECEXP (disp, 0, 0);
11998 0 : return (SYMBOL_REF_P (disp)
11999 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12000 0 : case UNSPEC_SECREL32:
12001 0 : disp = XVECEXP (disp, 0, 0);
12002 0 : return SYMBOL_REF_P (disp);
12003 : }
12004 :
12005 : return false;
12006 : }
12007 :
12008 : /* Determine if op is suitable RTX for an address register.
12009 : Return naked register if a register or a register subreg is
12010 : found, otherwise return NULL_RTX. */
12011 :
12012 : static rtx
12013 1372167860 : ix86_validate_address_register (rtx op)
12014 : {
12015 1372167860 : machine_mode mode = GET_MODE (op);
12016 :
12017 : /* Only SImode or DImode registers can form the address. */
12018 1372167860 : if (mode != SImode && mode != DImode)
12019 : return NULL_RTX;
12020 :
12021 1372160990 : if (REG_P (op))
12022 : return op;
12023 702145 : else if (SUBREG_P (op))
12024 : {
12025 702145 : rtx reg = SUBREG_REG (op);
12026 :
12027 702145 : if (!REG_P (reg))
12028 : return NULL_RTX;
12029 :
12030 702145 : mode = GET_MODE (reg);
12031 :
12032 : /* Don't allow SUBREGs that span more than a word. It can
12033 : lead to spill failures when the register is one word out
12034 : of a two word structure. */
12035 1450521 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12036 : return NULL_RTX;
12037 :
12038 : /* Allow only SUBREGs of non-eliminable hard registers. */
12039 243091 : if (register_no_elim_operand (reg, mode))
12040 : return reg;
12041 : }
12042 :
12043 : /* Op is not a register. */
12044 : return NULL_RTX;
12045 : }
12046 :
12047 : /* Determine which memory address register set insn can use. */
12048 :
12049 : static enum attr_addr
12050 255637521 : ix86_memory_address_reg_class (rtx_insn* insn)
12051 : {
12052 : /* LRA can do some initialization with NULL insn,
12053 : return maximum register class in this case. */
12054 255637521 : enum attr_addr addr_rclass = ADDR_GPR32;
12055 :
12056 255637521 : if (!insn)
12057 : return addr_rclass;
12058 :
12059 72694531 : if (asm_noperands (PATTERN (insn)) >= 0
12060 72694531 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)
12061 75172 : return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
12062 :
12063 : /* Return maximum register class for unrecognized instructions. */
12064 72656945 : if (INSN_CODE (insn) < 0)
12065 : return addr_rclass;
12066 :
12067 : /* Try to recognize the insn before calling get_attr_addr.
12068 : Save current recog_data and current alternative. */
12069 72656945 : struct recog_data_d saved_recog_data = recog_data;
12070 72656945 : int saved_alternative = which_alternative;
12071 :
12072 : /* Update recog_data for processing of alternatives. */
12073 72656945 : extract_insn_cached (insn);
12074 :
12075 : /* If current alternative is not set, loop throught enabled
12076 : alternatives and get the most limited register class. */
12077 72656945 : if (saved_alternative == -1)
12078 : {
12079 72656945 : alternative_mask enabled = get_enabled_alternatives (insn);
12080 :
12081 1253468539 : for (int i = 0; i < recog_data.n_alternatives; i++)
12082 : {
12083 1180811594 : if (!TEST_BIT (enabled, i))
12084 348983185 : continue;
12085 :
12086 831828409 : which_alternative = i;
12087 831828409 : addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
12088 : }
12089 : }
12090 : else
12091 : {
12092 0 : which_alternative = saved_alternative;
12093 0 : addr_rclass = get_attr_addr (insn);
12094 : }
12095 :
12096 72656945 : recog_data = saved_recog_data;
12097 72656945 : which_alternative = saved_alternative;
12098 :
12099 72656945 : return addr_rclass;
12100 : }
12101 :
12102 : /* Return memory address register class insn can use. */
12103 :
12104 : enum reg_class
12105 214899889 : ix86_insn_base_reg_class (rtx_insn* insn)
12106 : {
12107 214899889 : switch (ix86_memory_address_reg_class (insn))
12108 : {
12109 : case ADDR_GPR8:
12110 : return LEGACY_GENERAL_REGS;
12111 : case ADDR_GPR16:
12112 : return GENERAL_GPR16;
12113 : case ADDR_GPR32:
12114 : break;
12115 0 : default:
12116 0 : gcc_unreachable ();
12117 : }
12118 :
12119 : return BASE_REG_CLASS;
12120 : }
12121 :
12122 : bool
12123 1285106 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
12124 : {
12125 1285106 : switch (ix86_memory_address_reg_class (insn))
12126 : {
12127 0 : case ADDR_GPR8:
12128 0 : return LEGACY_INT_REGNO_P (regno);
12129 0 : case ADDR_GPR16:
12130 0 : return GENERAL_GPR16_REGNO_P (regno);
12131 1285106 : case ADDR_GPR32:
12132 1285106 : break;
12133 0 : default:
12134 0 : gcc_unreachable ();
12135 : }
12136 :
12137 1285106 : return GENERAL_REGNO_P (regno);
12138 : }
12139 :
12140 : enum reg_class
12141 39452526 : ix86_insn_index_reg_class (rtx_insn* insn)
12142 : {
12143 39452526 : switch (ix86_memory_address_reg_class (insn))
12144 : {
12145 : case ADDR_GPR8:
12146 : return LEGACY_INDEX_REGS;
12147 : case ADDR_GPR16:
12148 : return INDEX_GPR16;
12149 : case ADDR_GPR32:
12150 : break;
12151 0 : default:
12152 0 : gcc_unreachable ();
12153 : }
12154 :
12155 : return INDEX_REG_CLASS;
12156 : }
12157 :
12158 : /* Recognizes RTL expressions that are valid memory addresses for an
12159 : instruction. The MODE argument is the machine mode for the MEM
12160 : expression that wants to use this address.
12161 :
12162 : It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12163 : convert common non-canonical forms to canonical form so that they will
12164 : be recognized. */
12165 :
12166 : static bool
12167 2247915803 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
12168 : code_helper = ERROR_MARK)
12169 : {
12170 2247915803 : struct ix86_address parts;
12171 2247915803 : rtx base, index, disp;
12172 2247915803 : HOST_WIDE_INT scale;
12173 2247915803 : addr_space_t seg;
12174 :
12175 2247915803 : if (ix86_decompose_address (addr, &parts) == 0)
12176 : /* Decomposition failed. */
12177 : return false;
12178 :
12179 2236255338 : base = parts.base;
12180 2236255338 : index = parts.index;
12181 2236255338 : disp = parts.disp;
12182 2236255338 : scale = parts.scale;
12183 2236255338 : seg = parts.seg;
12184 :
12185 : /* Validate base register. */
12186 2236255338 : if (base)
12187 : {
12188 1284458516 : rtx reg = ix86_validate_address_register (base);
12189 :
12190 1284458516 : if (reg == NULL_RTX)
12191 : return false;
12192 :
12193 1284034555 : unsigned int regno = REGNO (reg);
12194 1284034555 : if ((strict && !REGNO_OK_FOR_BASE_P (regno))
12195 1279569519 : || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
12196 : /* Base is not valid. */
12197 : return false;
12198 : }
12199 :
12200 : /* Validate index register. */
12201 2234447044 : if (index)
12202 : {
12203 87709344 : rtx reg = ix86_validate_address_register (index);
12204 :
12205 87709344 : if (reg == NULL_RTX)
12206 : return false;
12207 :
12208 87667205 : unsigned int regno = REGNO (reg);
12209 87667205 : if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
12210 87658695 : || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
12211 : /* Index is not valid. */
12212 : return false;
12213 : }
12214 :
12215 : /* Index and base should have the same mode. */
12216 2234402933 : if (base && index
12217 78147280 : && GET_MODE (base) != GET_MODE (index))
12218 : return false;
12219 :
12220 : /* Address override works only on the (%reg) part of %fs:(%reg). */
12221 2234072610 : if (seg != ADDR_SPACE_GENERIC
12222 2234072610 : && ((base && GET_MODE (base) != word_mode)
12223 339242 : || (index && GET_MODE (index) != word_mode)))
12224 : return false;
12225 :
12226 : /* Validate scale factor. */
12227 2234072581 : if (scale != 1)
12228 : {
12229 39801265 : if (!index)
12230 : /* Scale without index. */
12231 : return false;
12232 :
12233 39801265 : if (scale != 2 && scale != 4 && scale != 8)
12234 : /* Scale is not a valid multiplier. */
12235 : return false;
12236 : }
12237 :
12238 : /* Validate displacement. */
12239 2230918999 : if (disp)
12240 : {
12241 2004435042 : if (ix86_endbr_immediate_operand (disp, VOIDmode))
12242 : return false;
12243 :
12244 2004434999 : if (GET_CODE (disp) == CONST
12245 149004757 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
12246 15447027 : && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12247 15447027 : switch (XINT (XEXP (disp, 0), 1))
12248 : {
12249 : /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
12250 : when used. While ABI specify also 32bit relocations, we
12251 : don't produce them at all and use IP relative instead.
12252 : Allow GOT in 32bit mode for both PIC and non-PIC if symbol
12253 : should be loaded via GOT. */
12254 2268397 : case UNSPEC_GOT:
12255 2268397 : if (!TARGET_64BIT
12256 2268397 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12257 0 : goto is_legitimate_pic;
12258 : /* FALLTHRU */
12259 4582476 : case UNSPEC_GOTOFF:
12260 4582476 : gcc_assert (flag_pic);
12261 4582476 : if (!TARGET_64BIT)
12262 4582273 : goto is_legitimate_pic;
12263 :
12264 : /* 64bit address unspec. */
12265 : return false;
12266 :
12267 9794013 : case UNSPEC_GOTPCREL:
12268 9794013 : if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12269 2534 : goto is_legitimate_pic;
12270 : /* FALLTHRU */
12271 9791479 : case UNSPEC_PCREL:
12272 9791479 : gcc_assert (flag_pic);
12273 9791479 : goto is_legitimate_pic;
12274 :
12275 : case UNSPEC_GOTTPOFF:
12276 : case UNSPEC_GOTNTPOFF:
12277 : case UNSPEC_INDNTPOFF:
12278 : case UNSPEC_NTPOFF:
12279 : case UNSPEC_DTPOFF:
12280 : case UNSPEC_SECREL32:
12281 : break;
12282 :
12283 : default:
12284 : /* Invalid address unspec. */
12285 : return false;
12286 : }
12287 :
12288 1262490481 : else if (SYMBOLIC_CONST (disp)
12289 2122545702 : && (flag_pic
12290 : #if TARGET_MACHO
12291 : || (MACHOPIC_INDIRECT
12292 : && !machopic_operand_p (disp))
12293 : #endif
12294 : ))
12295 : {
12296 :
12297 58395748 : is_legitimate_pic:
12298 58395748 : if (TARGET_64BIT && (index || base))
12299 : {
12300 : /* foo@dtpoff(%rX) is ok. */
12301 37035 : if (GET_CODE (disp) != CONST
12302 7024 : || GET_CODE (XEXP (disp, 0)) != PLUS
12303 7024 : || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12304 4637 : || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12305 4637 : || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12306 4637 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
12307 6 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12308 : /* Non-constant pic memory reference. */
12309 : return false;
12310 : }
12311 58358713 : else if ((!TARGET_MACHO || flag_pic)
12312 58358713 : && ! legitimate_pic_address_disp_p (disp))
12313 : /* Displacement is an invalid pic construct. */
12314 : return false;
12315 : #if TARGET_MACHO
12316 : else if (MACHO_DYNAMIC_NO_PIC_P
12317 : && !ix86_legitimate_constant_p (Pmode, disp))
12318 : /* displacment must be referenced via non_lazy_pointer */
12319 : return false;
12320 : #endif
12321 :
12322 : /* This code used to verify that a symbolic pic displacement
12323 : includes the pic_offset_table_rtx register.
12324 :
12325 : While this is good idea, unfortunately these constructs may
12326 : be created by "adds using lea" optimization for incorrect
12327 : code like:
12328 :
12329 : int a;
12330 : int foo(int i)
12331 : {
12332 : return *(&a+i);
12333 : }
12334 :
12335 : This code is nonsensical, but results in addressing
12336 : GOT table with pic_offset_table_rtx base. We can't
12337 : just refuse it easily, since it gets matched by
12338 : "addsi3" pattern, that later gets split to lea in the
12339 : case output register differs from input. While this
12340 : can be handled by separate addsi pattern for this case
12341 : that never results in lea, this seems to be easier and
12342 : correct fix for crash to disable this test. */
12343 : }
12344 1944968510 : else if (!LABEL_REF_P (disp)
12345 1944814449 : && !CONST_INT_P (disp)
12346 873132928 : && (GET_CODE (disp) != CONST
12347 135000357 : || !ix86_legitimate_constant_p (Pmode, disp))
12348 2686066420 : && (!SYMBOL_REF_P (disp)
12349 748981427 : || !ix86_legitimate_constant_p (Pmode, disp)))
12350 : /* Displacement is not constant. */
12351 57329467 : return false;
12352 1887639043 : else if (TARGET_64BIT
12353 1887639043 : && !x86_64_immediate_operand (disp, VOIDmode))
12354 : /* Displacement is out of range. */
12355 : return false;
12356 : /* In x32 mode, constant addresses are sign extended to 64bit, so
12357 : we have to prevent addresses from 0x80000000 to 0xffffffff. */
12358 45179 : else if (TARGET_X32 && !(index || base)
12359 17348 : && CONST_INT_P (disp)
12360 1887113402 : && val_signbit_known_set_p (SImode, INTVAL (disp)))
12361 : return false;
12362 : }
12363 :
12364 : /* Everything looks valid. */
12365 : return true;
12366 : }
12367 :
12368 : /* Determine if a given RTX is a valid constant address. */
12369 :
12370 : bool
12371 2794367619 : constant_address_p (rtx x)
12372 : {
12373 2874748447 : return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12374 : }
12375 :
12376 :
12377 : /* Return a legitimate reference for ORIG (an address) using the
12378 : register REG. If REG is 0, a new pseudo is generated.
12379 :
12380 : There are two types of references that must be handled:
12381 :
12382 : 1. Global data references must load the address from the GOT, via
12383 : the PIC reg. An insn is emitted to do this load, and the reg is
12384 : returned.
12385 :
12386 : 2. Static data references, constant pool addresses, and code labels
12387 : compute the address as an offset from the GOT, whose base is in
12388 : the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12389 : differentiate them from global data objects. The returned
12390 : address is the PIC reg + an unspec constant.
12391 :
12392 : TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12393 : reg also appears in the address. */
12394 :
12395 : rtx
12396 399144 : legitimize_pic_address (rtx orig, rtx reg)
12397 : {
12398 399144 : rtx addr = orig;
12399 399144 : rtx new_rtx = orig;
12400 :
12401 : #if TARGET_MACHO
12402 : if (TARGET_MACHO && !TARGET_64BIT)
12403 : {
12404 : if (reg == 0)
12405 : reg = gen_reg_rtx (Pmode);
12406 : /* Use the generic Mach-O PIC machinery. */
12407 : return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12408 : }
12409 : #endif
12410 :
12411 399144 : if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12412 : {
12413 : #if TARGET_PECOFF
12414 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12415 : if (tmp)
12416 : return tmp;
12417 : #endif
12418 : }
12419 :
12420 399144 : if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12421 : new_rtx = addr;
12422 302601 : else if ((!TARGET_64BIT
12423 101823 : || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12424 : && !TARGET_PECOFF
12425 503476 : && gotoff_operand (addr, Pmode))
12426 : {
12427 : /* This symbol may be referenced via a displacement
12428 : from the PIC base address (@GOTOFF). */
12429 97362 : if (GET_CODE (addr) == CONST)
12430 3047 : addr = XEXP (addr, 0);
12431 :
12432 97362 : if (GET_CODE (addr) == PLUS)
12433 : {
12434 6094 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12435 : UNSPEC_GOTOFF);
12436 6094 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12437 : }
12438 : else
12439 188601 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12440 :
12441 194695 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12442 :
12443 97362 : if (TARGET_64BIT)
12444 29 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12445 :
12446 97362 : if (reg != 0)
12447 : {
12448 3 : gcc_assert (REG_P (reg));
12449 3 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12450 : new_rtx, reg, 1, OPTAB_DIRECT);
12451 : }
12452 : else
12453 194692 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12454 : }
12455 383262 : else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
12456 : /* We can't always use @GOTOFF for text labels
12457 : on VxWorks, see gotoff_operand. */
12458 205239 : || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
12459 : {
12460 : #if TARGET_PECOFF
12461 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12462 : if (tmp)
12463 : return tmp;
12464 : #endif
12465 :
12466 : /* For x64 PE-COFF there is no GOT table,
12467 : so we use address directly. */
12468 178020 : if (TARGET_64BIT && TARGET_PECOFF)
12469 : {
12470 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12471 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12472 : }
12473 178020 : else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12474 : {
12475 94573 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12476 : UNSPEC_GOTPCREL);
12477 94573 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12478 94573 : new_rtx = gen_const_mem (Pmode, new_rtx);
12479 94570 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12480 : }
12481 : else
12482 : {
12483 : /* This symbol must be referenced via a load
12484 : from the Global Offset Table (@GOT). */
12485 166877 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12486 166877 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12487 :
12488 83450 : if (TARGET_64BIT)
12489 23 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12490 :
12491 83450 : if (reg != 0)
12492 : {
12493 0 : gcc_assert (REG_P (reg));
12494 0 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12495 : new_rtx, reg, 1, OPTAB_DIRECT);
12496 : }
12497 : else
12498 166877 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12499 :
12500 166877 : new_rtx = gen_const_mem (Pmode, new_rtx);
12501 83450 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12502 : }
12503 :
12504 261450 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12505 : }
12506 : else
12507 : {
12508 27219 : if (CONST_INT_P (addr)
12509 27219 : && !x86_64_immediate_operand (addr, VOIDmode))
12510 8 : new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12511 27211 : else if (GET_CODE (addr) == CONST)
12512 : {
12513 16717 : addr = XEXP (addr, 0);
12514 :
12515 : /* We must match stuff we generate before. Assume the only
12516 : unspecs that can get here are ours. Not that we could do
12517 : anything with them anyway.... */
12518 16717 : if (GET_CODE (addr) == UNSPEC
12519 8963 : || (GET_CODE (addr) == PLUS
12520 8963 : && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12521 : return orig;
12522 6839 : gcc_assert (GET_CODE (addr) == PLUS);
12523 : }
12524 :
12525 17341 : if (GET_CODE (addr) == PLUS)
12526 : {
12527 8671 : rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12528 :
12529 : /* Check first to see if this is a constant
12530 : offset from a @GOTOFF symbol reference. */
12531 8671 : if (!TARGET_PECOFF
12532 13741 : && gotoff_operand (op0, Pmode)
12533 8671 : && CONST_INT_P (op1))
12534 : {
12535 4 : if (!TARGET_64BIT)
12536 : {
12537 0 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12538 : UNSPEC_GOTOFF);
12539 0 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12540 0 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12541 :
12542 0 : if (reg != 0)
12543 : {
12544 0 : gcc_assert (REG_P (reg));
12545 0 : new_rtx = expand_simple_binop (Pmode, PLUS,
12546 : pic_offset_table_rtx,
12547 : new_rtx, reg, 1,
12548 : OPTAB_DIRECT);
12549 : }
12550 : else
12551 0 : new_rtx
12552 0 : = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12553 : }
12554 : else
12555 : {
12556 4 : if (INTVAL (op1) < -16*1024*1024
12557 4 : || INTVAL (op1) >= 16*1024*1024)
12558 : {
12559 4 : if (!x86_64_immediate_operand (op1, Pmode))
12560 4 : op1 = force_reg (Pmode, op1);
12561 :
12562 4 : new_rtx
12563 4 : = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12564 : }
12565 : }
12566 : }
12567 : else
12568 : {
12569 8667 : rtx base = legitimize_pic_address (op0, reg);
12570 8667 : machine_mode mode = GET_MODE (base);
12571 8667 : new_rtx
12572 8667 : = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12573 :
12574 8667 : if (CONST_INT_P (new_rtx))
12575 : {
12576 6827 : if (INTVAL (new_rtx) < -16*1024*1024
12577 6827 : || INTVAL (new_rtx) >= 16*1024*1024)
12578 : {
12579 0 : if (!x86_64_immediate_operand (new_rtx, mode))
12580 0 : new_rtx = force_reg (mode, new_rtx);
12581 :
12582 0 : new_rtx
12583 0 : = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12584 : }
12585 : else
12586 6827 : new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12587 : }
12588 : else
12589 : {
12590 : /* For %rip addressing, we have to use
12591 : just disp32, not base nor index. */
12592 1840 : if (TARGET_64BIT
12593 100 : && (SYMBOL_REF_P (base)
12594 100 : || LABEL_REF_P (base)))
12595 7 : base = force_reg (mode, base);
12596 1840 : if (GET_CODE (new_rtx) == PLUS
12597 1719 : && CONSTANT_P (XEXP (new_rtx, 1)))
12598 : {
12599 1715 : base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12600 1715 : new_rtx = XEXP (new_rtx, 1);
12601 : }
12602 1840 : new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12603 : }
12604 : }
12605 : }
12606 : }
12607 : return new_rtx;
12608 : }
12609 :
12610 : /* Load the thread pointer. If TO_REG is true, force it into a register. */
12611 :
12612 : static rtx
12613 24398 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
12614 : {
12615 24398 : rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12616 :
12617 24398 : if (GET_MODE (tp) != tp_mode)
12618 : {
12619 11 : gcc_assert (GET_MODE (tp) == SImode);
12620 11 : gcc_assert (tp_mode == DImode);
12621 :
12622 11 : tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12623 : }
12624 :
12625 24398 : if (to_reg)
12626 8110 : tp = copy_to_mode_reg (tp_mode, tp);
12627 :
12628 24398 : return tp;
12629 : }
12630 :
12631 : /* Construct the SYMBOL_REF for the _tls_index symbol. */
12632 :
12633 : static GTY(()) rtx ix86_tls_index_symbol;
12634 :
12635 : static rtx
12636 0 : ix86_tls_index (void)
12637 : {
12638 0 : if (!ix86_tls_index_symbol)
12639 0 : ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12640 :
12641 0 : if (flag_pic)
12642 0 : return gen_rtx_CONST (Pmode,
12643 : gen_rtx_UNSPEC (Pmode,
12644 : gen_rtvec (1, ix86_tls_index_symbol),
12645 : UNSPEC_PCREL));
12646 : else
12647 0 : return ix86_tls_index_symbol;
12648 : }
12649 :
12650 : /* Construct the SYMBOL_REF for the tls_get_addr function. */
12651 :
12652 : static GTY(()) rtx ix86_tls_symbol;
12653 :
12654 : rtx
12655 6715 : ix86_tls_get_addr (void)
12656 : {
12657 6715 : if (cfun->machine->call_saved_registers
12658 6715 : == TYPE_NO_CALLER_SAVED_REGISTERS)
12659 : {
12660 : /* __tls_get_addr doesn't preserve vector registers. When a
12661 : function with no_caller_saved_registers attribute calls
12662 : __tls_get_addr, YMM and ZMM registers will be clobbered.
12663 : Issue an error and suggest -mtls-dialect=gnu2 in this case. */
12664 3 : if (cfun->machine->func_type == TYPE_NORMAL)
12665 1 : error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
12666 : " with the %<no_caller_saved_registers%> attribute"));
12667 : else
12668 3 : error (cfun->machine->func_type == TYPE_EXCEPTION
12669 : ? G_("%<-mtls-dialect=gnu2%> must be used with an"
12670 : " exception service routine")
12671 : : G_("%<-mtls-dialect=gnu2%> must be used with an"
12672 : " interrupt service routine"));
12673 : /* Don't issue the same error twice. */
12674 3 : cfun->machine->func_type = TYPE_NORMAL;
12675 3 : cfun->machine->call_saved_registers
12676 3 : = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
12677 : }
12678 :
12679 6715 : if (!ix86_tls_symbol)
12680 : {
12681 204 : const char *sym
12682 241 : = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12683 241 : ? "___tls_get_addr" : "__tls_get_addr");
12684 :
12685 278 : ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12686 : }
12687 :
12688 6715 : if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12689 : {
12690 2 : rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12691 : UNSPEC_PLTOFF);
12692 2 : return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12693 : gen_rtx_CONST (Pmode, unspec));
12694 : }
12695 :
12696 6713 : return ix86_tls_symbol;
12697 : }
12698 :
12699 : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12700 :
12701 : static GTY(()) rtx ix86_tls_module_base_symbol;
12702 :
12703 : rtx
12704 87 : ix86_tls_module_base (void)
12705 : {
12706 87 : if (!ix86_tls_module_base_symbol)
12707 : {
12708 10 : ix86_tls_module_base_symbol
12709 10 : = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12710 :
12711 10 : SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12712 10 : |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12713 : }
12714 :
12715 87 : return ix86_tls_module_base_symbol;
12716 : }
12717 :
12718 : /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12719 : false if we expect this to be used for a memory address and true if
12720 : we expect to load the address into a register. */
12721 :
12722 : rtx
12723 30821 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12724 : {
12725 30821 : rtx dest, base, off;
12726 30821 : rtx pic = NULL_RTX, tp = NULL_RTX;
12727 30821 : machine_mode tp_mode = Pmode;
12728 30821 : int type;
12729 :
12730 : /* Windows implements a single form of TLS. */
12731 30821 : if (TARGET_WIN32_TLS)
12732 : {
12733 : /* Load the 32-bit index. */
12734 : rtx ind = gen_const_mem (SImode, ix86_tls_index ());
12735 : set_mem_alias_set (ind, GOT_ALIAS_SET);
12736 : if (TARGET_64BIT)
12737 : ind = convert_to_mode (Pmode, ind, 1);
12738 : ind = force_reg (Pmode, ind);
12739 :
12740 : /* Add it to the thread pointer and load the base. */
12741 : tp = get_thread_pointer (Pmode, true);
12742 : rtx addr = gen_rtx_PLUS (Pmode, tp,
12743 : gen_rtx_MULT (Pmode, ind,
12744 : GEN_INT (UNITS_PER_WORD)));
12745 : base = gen_const_mem (Pmode, addr);
12746 : set_mem_alias_set (base, GOT_ALIAS_SET);
12747 :
12748 : /* Add the 32-bit section-relative offset to the base. */
12749 : base = force_reg (Pmode, base);
12750 : off = gen_rtx_CONST (Pmode,
12751 : gen_rtx_UNSPEC (SImode,
12752 : gen_rtvec (1, x),
12753 : UNSPEC_SECREL32));
12754 : return gen_rtx_PLUS (Pmode, base, off);
12755 : }
12756 :
12757 : /* Fall back to global dynamic model if tool chain cannot support local
12758 : dynamic. */
12759 30821 : if (TARGET_SUN_TLS && !TARGET_64BIT
12760 : && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12761 : && model == TLS_MODEL_LOCAL_DYNAMIC)
12762 : model = TLS_MODEL_GLOBAL_DYNAMIC;
12763 :
12764 30821 : switch (model)
12765 : {
12766 6116 : case TLS_MODEL_GLOBAL_DYNAMIC:
12767 6116 : if (!TARGET_64BIT)
12768 : {
12769 1930 : if (flag_pic && !TARGET_PECOFF)
12770 1930 : pic = pic_offset_table_rtx;
12771 : else
12772 : {
12773 0 : pic = gen_reg_rtx (Pmode);
12774 0 : emit_insn (gen_set_got (pic));
12775 : }
12776 : }
12777 :
12778 6116 : if (TARGET_GNU2_TLS)
12779 : {
12780 53 : dest = gen_reg_rtx (ptr_mode);
12781 53 : if (TARGET_64BIT)
12782 53 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12783 : else
12784 0 : emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12785 :
12786 53 : tp = get_thread_pointer (ptr_mode, true);
12787 53 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12788 61 : if (GET_MODE (dest) != Pmode)
12789 6 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12790 61 : dest = force_reg (Pmode, dest);
12791 :
12792 61 : if (GET_MODE (x) != Pmode)
12793 3 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12794 :
12795 53 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12796 : }
12797 : else
12798 : {
12799 6063 : rtx caddr = ix86_tls_get_addr ();
12800 :
12801 7993 : dest = gen_reg_rtx (Pmode);
12802 6063 : if (TARGET_64BIT)
12803 : {
12804 4133 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12805 4133 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12806 4133 : rtx_insn *insns;
12807 :
12808 4133 : start_sequence ();
12809 4133 : emit_call_insn
12810 4133 : (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
12811 4133 : insns = end_sequence ();
12812 :
12813 4133 : if (GET_MODE (x) != Pmode)
12814 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12815 :
12816 4133 : RTL_CONST_CALL_P (insns) = 1;
12817 4133 : emit_libcall_block (insns, dest, rax, x);
12818 : }
12819 : else
12820 1930 : emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12821 : }
12822 : break;
12823 :
12824 384 : case TLS_MODEL_LOCAL_DYNAMIC:
12825 384 : if (!TARGET_64BIT)
12826 : {
12827 92 : if (flag_pic)
12828 92 : pic = pic_offset_table_rtx;
12829 : else
12830 : {
12831 0 : pic = gen_reg_rtx (Pmode);
12832 0 : emit_insn (gen_set_got (pic));
12833 : }
12834 : }
12835 :
12836 384 : if (TARGET_GNU2_TLS)
12837 : {
12838 24 : rtx tmp = ix86_tls_module_base ();
12839 :
12840 24 : base = gen_reg_rtx (ptr_mode);
12841 24 : if (TARGET_64BIT)
12842 24 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12843 : else
12844 0 : emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12845 :
12846 24 : tp = get_thread_pointer (ptr_mode, true);
12847 30 : if (GET_MODE (base) != Pmode)
12848 2 : base = gen_rtx_ZERO_EXTEND (Pmode, base);
12849 30 : base = force_reg (Pmode, base);
12850 : }
12851 : else
12852 : {
12853 360 : rtx caddr = ix86_tls_get_addr ();
12854 :
12855 452 : base = gen_reg_rtx (Pmode);
12856 360 : if (TARGET_64BIT)
12857 : {
12858 268 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12859 268 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12860 268 : rtx_insn *insns;
12861 268 : rtx eqv;
12862 :
12863 268 : start_sequence ();
12864 268 : emit_call_insn
12865 268 : (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
12866 268 : insns = end_sequence ();
12867 :
12868 : /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12869 : share the LD_BASE result with other LD model accesses. */
12870 268 : eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12871 : UNSPEC_TLS_LD_BASE);
12872 :
12873 268 : RTL_CONST_CALL_P (insns) = 1;
12874 268 : emit_libcall_block (insns, base, rax, eqv);
12875 : }
12876 : else
12877 92 : emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12878 : }
12879 :
12880 482 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12881 482 : off = gen_rtx_CONST (Pmode, off);
12882 :
12883 580 : dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12884 :
12885 384 : if (TARGET_GNU2_TLS)
12886 : {
12887 30 : if (GET_MODE (tp) != Pmode)
12888 : {
12889 2 : dest = lowpart_subreg (ptr_mode, dest, Pmode);
12890 2 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12891 2 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12892 : }
12893 : else
12894 22 : dest = gen_rtx_PLUS (Pmode, tp, dest);
12895 30 : dest = force_reg (Pmode, dest);
12896 :
12897 30 : if (GET_MODE (x) != Pmode)
12898 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12899 :
12900 24 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12901 : }
12902 : break;
12903 :
12904 10782 : case TLS_MODEL_INITIAL_EXEC:
12905 10782 : if (TARGET_64BIT)
12906 : {
12907 : /* Generate DImode references to avoid %fs:(%reg32)
12908 : problems and linker IE->LE relaxation bug. */
12909 : tp_mode = DImode;
12910 : pic = NULL;
12911 : type = UNSPEC_GOTNTPOFF;
12912 : }
12913 761 : else if (flag_pic)
12914 : {
12915 760 : pic = pic_offset_table_rtx;
12916 760 : type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12917 : }
12918 1 : else if (!TARGET_ANY_GNU_TLS)
12919 : {
12920 0 : pic = gen_reg_rtx (Pmode);
12921 0 : emit_insn (gen_set_got (pic));
12922 0 : type = UNSPEC_GOTTPOFF;
12923 : }
12924 : else
12925 : {
12926 : pic = NULL;
12927 : type = UNSPEC_INDNTPOFF;
12928 : }
12929 :
12930 10782 : off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12931 10782 : off = gen_rtx_CONST (tp_mode, off);
12932 10782 : if (pic)
12933 760 : off = gen_rtx_PLUS (tp_mode, pic, off);
12934 10782 : off = gen_const_mem (tp_mode, off);
12935 10782 : set_mem_alias_set (off, GOT_ALIAS_SET);
12936 :
12937 10782 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12938 : {
12939 10782 : base = get_thread_pointer (tp_mode,
12940 10782 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12941 10782 : off = force_reg (tp_mode, off);
12942 10782 : dest = gen_rtx_PLUS (tp_mode, base, off);
12943 11547 : if (tp_mode != Pmode)
12944 4 : dest = convert_to_mode (Pmode, dest, 1);
12945 : }
12946 : else
12947 : {
12948 0 : base = get_thread_pointer (Pmode, true);
12949 0 : dest = gen_reg_rtx (Pmode);
12950 0 : emit_insn (gen_sub3_insn (dest, base, off));
12951 : }
12952 : break;
12953 :
12954 13539 : case TLS_MODEL_LOCAL_EXEC:
12955 27846 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12956 : (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12957 : ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12958 14307 : off = gen_rtx_CONST (Pmode, off);
12959 :
12960 13539 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12961 : {
12962 14307 : base = get_thread_pointer (Pmode,
12963 13539 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12964 14307 : return gen_rtx_PLUS (Pmode, base, off);
12965 : }
12966 : else
12967 : {
12968 0 : base = get_thread_pointer (Pmode, true);
12969 0 : dest = gen_reg_rtx (Pmode);
12970 0 : emit_insn (gen_sub3_insn (dest, base, off));
12971 : }
12972 0 : break;
12973 :
12974 0 : default:
12975 0 : gcc_unreachable ();
12976 : }
12977 :
12978 : return dest;
12979 : }
12980 :
12981 : /* Return true if the TLS address requires insn using integer registers.
12982 : It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12983 : MOV instructions, refer to PR103275. */
12984 : bool
12985 15221359 : ix86_gpr_tls_address_pattern_p (rtx mem)
12986 : {
12987 15221359 : gcc_assert (MEM_P (mem));
12988 :
12989 15221359 : rtx addr = XEXP (mem, 0);
12990 15221359 : subrtx_var_iterator::array_type array;
12991 53026351 : FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12992 : {
12993 37812527 : rtx op = *iter;
12994 37812527 : if (GET_CODE (op) == UNSPEC)
12995 201658 : switch (XINT (op, 1))
12996 : {
12997 : case UNSPEC_GOTNTPOFF:
12998 7535 : return true;
12999 0 : case UNSPEC_TPOFF:
13000 0 : if (!TARGET_64BIT)
13001 : return true;
13002 : break;
13003 : default:
13004 : break;
13005 : }
13006 : }
13007 :
13008 15213824 : return false;
13009 15221359 : }
13010 :
13011 : /* Return true if OP refers to a TLS address. */
13012 : bool
13013 232881913 : ix86_tls_address_pattern_p (rtx op)
13014 : {
13015 232881913 : subrtx_var_iterator::array_type array;
13016 1384618318 : FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
13017 : {
13018 1151754386 : rtx op = *iter;
13019 1151754386 : if (MEM_P (op))
13020 : {
13021 105327725 : rtx *x = &XEXP (op, 0);
13022 166716321 : while (GET_CODE (*x) == PLUS)
13023 : {
13024 : int i;
13025 184183792 : for (i = 0; i < 2; i++)
13026 : {
13027 122795196 : rtx u = XEXP (*x, i);
13028 122795196 : if (GET_CODE (u) == ZERO_EXTEND)
13029 138440 : u = XEXP (u, 0);
13030 122795196 : if (GET_CODE (u) == UNSPEC
13031 18013 : && XINT (u, 1) == UNSPEC_TP)
13032 17981 : return true;
13033 : }
13034 61388596 : x = &XEXP (*x, 0);
13035 : }
13036 :
13037 105309744 : iter.skip_subrtxes ();
13038 : }
13039 : }
13040 :
13041 232863932 : return false;
13042 232881913 : }
13043 :
13044 : /* Rewrite *LOC so that it refers to a default TLS address space. */
13045 : static void
13046 17981 : ix86_rewrite_tls_address_1 (rtx *loc)
13047 : {
13048 17981 : subrtx_ptr_iterator::array_type array;
13049 53264 : FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
13050 : {
13051 53264 : rtx *loc = *iter;
13052 53264 : if (MEM_P (*loc))
13053 : {
13054 18168 : rtx addr = XEXP (*loc, 0);
13055 18168 : rtx *x = &addr;
13056 23004 : while (GET_CODE (*x) == PLUS)
13057 : {
13058 : int i;
13059 32512 : for (i = 0; i < 2; i++)
13060 : {
13061 27676 : rtx u = XEXP (*x, i);
13062 27676 : if (GET_CODE (u) == ZERO_EXTEND)
13063 19 : u = XEXP (u, 0);
13064 27676 : if (GET_CODE (u) == UNSPEC
13065 17981 : && XINT (u, 1) == UNSPEC_TP)
13066 : {
13067 : /* NB: Since address override only applies to the
13068 : (reg32) part in fs:(reg32), return if address
13069 : override is used. */
13070 19608 : if (Pmode != word_mode
13071 17981 : && REG_P (XEXP (*x, 1 - i)))
13072 17981 : return;
13073 :
13074 17979 : addr_space_t as = DEFAULT_TLS_SEG_REG;
13075 :
13076 17979 : *x = XEXP (*x, 1 - i);
13077 :
13078 17979 : *loc = replace_equiv_address_nv (*loc, addr, true);
13079 17979 : set_mem_addr_space (*loc, as);
13080 17979 : return;
13081 : }
13082 : }
13083 4836 : x = &XEXP (*x, 0);
13084 : }
13085 :
13086 187 : iter.skip_subrtxes ();
13087 : }
13088 : }
13089 17981 : }
13090 :
13091 : /* Rewrite instruction pattern involvning TLS address
13092 : so that it refers to a default TLS address space. */
13093 : rtx
13094 17981 : ix86_rewrite_tls_address (rtx pattern)
13095 : {
13096 17981 : pattern = copy_insn (pattern);
13097 17981 : ix86_rewrite_tls_address_1 (&pattern);
13098 17981 : return pattern;
13099 : }
13100 :
13101 : /* Try machine-dependent ways of modifying an illegitimate address
13102 : to be legitimate. If we find one, return the new, valid address.
13103 : This macro is used in only one place: `memory_address' in explow.cc.
13104 :
13105 : OLDX is the address as it was before break_out_memory_refs was called.
13106 : In some cases it is useful to look at this to decide what needs to be done.
13107 :
13108 : It is always safe for this macro to do nothing. It exists to recognize
13109 : opportunities to optimize the output.
13110 :
13111 : For the 80386, we handle X+REG by loading X into a register R and
13112 : using R+REG. R will go in a general reg and indexing will be used.
13113 : However, if REG is a broken-out memory address or multiplication,
13114 : nothing needs to be done because REG can certainly go in a general reg.
13115 :
13116 : When -fpic is used, special handling is needed for symbolic references.
13117 : See comments by legitimize_pic_address in i386.cc for details. */
13118 :
13119 : static rtx
13120 680679 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
13121 : {
13122 680679 : bool changed = false;
13123 680679 : unsigned log;
13124 :
13125 680679 : log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
13126 152052 : if (log)
13127 20703 : return legitimize_tls_address (x, (enum tls_model) log, false);
13128 659976 : if (GET_CODE (x) == CONST
13129 508 : && GET_CODE (XEXP (x, 0)) == PLUS
13130 508 : && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13131 660484 : && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13132 : {
13133 4 : rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13134 : (enum tls_model) log, false);
13135 5 : return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13136 : }
13137 :
13138 659972 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13139 : {
13140 : #if TARGET_PECOFF
13141 : rtx tmp = legitimize_pe_coff_symbol (x, true);
13142 : if (tmp)
13143 : return tmp;
13144 : #endif
13145 : }
13146 :
13147 659972 : if (flag_pic && SYMBOLIC_CONST (x))
13148 131732 : return legitimize_pic_address (x, 0);
13149 :
13150 : #if TARGET_MACHO
13151 : if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13152 : return machopic_indirect_data_reference (x, 0);
13153 : #endif
13154 :
13155 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13156 528240 : if (GET_CODE (x) == ASHIFT
13157 0 : && CONST_INT_P (XEXP (x, 1))
13158 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13159 : {
13160 0 : changed = true;
13161 0 : log = INTVAL (XEXP (x, 1));
13162 0 : x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13163 : GEN_INT (1 << log));
13164 : }
13165 :
13166 528240 : if (GET_CODE (x) == PLUS)
13167 : {
13168 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13169 :
13170 191800 : if (GET_CODE (XEXP (x, 0)) == ASHIFT
13171 594 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13172 594 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13173 : {
13174 594 : changed = true;
13175 594 : log = INTVAL (XEXP (XEXP (x, 0), 1));
13176 1738 : XEXP (x, 0) = gen_rtx_MULT (Pmode,
13177 : force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13178 : GEN_INT (1 << log));
13179 : }
13180 :
13181 191800 : if (GET_CODE (XEXP (x, 1)) == ASHIFT
13182 0 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13183 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13184 : {
13185 0 : changed = true;
13186 0 : log = INTVAL (XEXP (XEXP (x, 1), 1));
13187 0 : XEXP (x, 1) = gen_rtx_MULT (Pmode,
13188 : force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13189 : GEN_INT (1 << log));
13190 : }
13191 :
13192 : /* Put multiply first if it isn't already. */
13193 191800 : if (GET_CODE (XEXP (x, 1)) == MULT)
13194 : {
13195 0 : std::swap (XEXP (x, 0), XEXP (x, 1));
13196 0 : changed = true;
13197 : }
13198 :
13199 : /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13200 : into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13201 : created by virtual register instantiation, register elimination, and
13202 : similar optimizations. */
13203 191800 : if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13204 : {
13205 9832 : changed = true;
13206 15440 : x = gen_rtx_PLUS (Pmode,
13207 : gen_rtx_PLUS (Pmode, XEXP (x, 0),
13208 : XEXP (XEXP (x, 1), 0)),
13209 : XEXP (XEXP (x, 1), 1));
13210 : }
13211 :
13212 : /* Canonicalize
13213 : (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13214 : into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13215 181968 : else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13216 113821 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13217 51327 : && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13218 0 : && CONSTANT_P (XEXP (x, 1)))
13219 : {
13220 0 : rtx constant;
13221 0 : rtx other = NULL_RTX;
13222 :
13223 0 : if (CONST_INT_P (XEXP (x, 1)))
13224 : {
13225 0 : constant = XEXP (x, 1);
13226 0 : other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13227 : }
13228 0 : else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13229 : {
13230 : constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13231 : other = XEXP (x, 1);
13232 : }
13233 : else
13234 : constant = 0;
13235 :
13236 0 : if (constant)
13237 : {
13238 0 : changed = true;
13239 0 : x = gen_rtx_PLUS (Pmode,
13240 : gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13241 : XEXP (XEXP (XEXP (x, 0), 1), 0)),
13242 : plus_constant (Pmode, other,
13243 : INTVAL (constant)));
13244 : }
13245 : }
13246 :
13247 191800 : if (changed && ix86_legitimate_address_p (mode, x, false))
13248 9868 : return x;
13249 :
13250 181932 : if (GET_CODE (XEXP (x, 0)) == MULT)
13251 : {
13252 19874 : changed = true;
13253 19874 : XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13254 : }
13255 :
13256 181932 : if (GET_CODE (XEXP (x, 1)) == MULT)
13257 : {
13258 0 : changed = true;
13259 0 : XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13260 : }
13261 :
13262 181932 : if (changed
13263 19882 : && REG_P (XEXP (x, 1))
13264 16317 : && REG_P (XEXP (x, 0)))
13265 : return x;
13266 :
13267 165615 : if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13268 : {
13269 1832 : changed = true;
13270 1832 : x = legitimize_pic_address (x, 0);
13271 : }
13272 :
13273 165615 : if (changed && ix86_legitimate_address_p (mode, x, false))
13274 3842 : return x;
13275 :
13276 161773 : if (REG_P (XEXP (x, 0)))
13277 : {
13278 50237 : rtx temp = gen_reg_rtx (Pmode);
13279 47489 : rtx val = force_operand (XEXP (x, 1), temp);
13280 47489 : if (val != temp)
13281 : {
13282 39151 : val = convert_to_mode (Pmode, val, 1);
13283 38860 : emit_move_insn (temp, val);
13284 : }
13285 :
13286 47489 : XEXP (x, 1) = temp;
13287 47489 : return x;
13288 : }
13289 :
13290 114284 : else if (REG_P (XEXP (x, 1)))
13291 : {
13292 3587 : rtx temp = gen_reg_rtx (Pmode);
13293 2851 : rtx val = force_operand (XEXP (x, 0), temp);
13294 2851 : if (val != temp)
13295 : {
13296 0 : val = convert_to_mode (Pmode, val, 1);
13297 0 : emit_move_insn (temp, val);
13298 : }
13299 :
13300 2851 : XEXP (x, 0) = temp;
13301 2851 : return x;
13302 : }
13303 : }
13304 :
13305 : return x;
13306 : }
13307 :
13308 : /* Print an integer constant expression in assembler syntax. Addition
13309 : and subtraction are the only arithmetic that may appear in these
13310 : expressions. FILE is the stdio stream to write to, X is the rtx, and
13311 : CODE is the operand print code from the output string. */
13312 :
13313 : static void
13314 3706972 : output_pic_addr_const (FILE *file, rtx x, int code)
13315 : {
13316 3937708 : char buf[256];
13317 :
13318 3937708 : switch (GET_CODE (x))
13319 : {
13320 0 : case PC:
13321 0 : gcc_assert (flag_pic);
13322 0 : putc ('.', file);
13323 0 : break;
13324 :
13325 871929 : case SYMBOL_REF:
13326 871929 : if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
13327 871929 : output_addr_const (file, x);
13328 : else
13329 : {
13330 : const char *name = XSTR (x, 0);
13331 :
13332 : /* Mark the decl as referenced so that cgraph will
13333 : output the function. */
13334 : if (SYMBOL_REF_DECL (x))
13335 : mark_decl_referenced (SYMBOL_REF_DECL (x));
13336 :
13337 : #if TARGET_MACHO
13338 : if (MACHOPIC_INDIRECT
13339 : && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13340 : name = machopic_indirection_name (x, /*stub_p=*/true);
13341 : #endif
13342 : assemble_name (file, name);
13343 : }
13344 871929 : if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13345 871929 : && code == 'P' && ix86_call_use_plt_p (x))
13346 398387 : fputs ("@PLT", file);
13347 : break;
13348 :
13349 2642 : case LABEL_REF:
13350 2642 : x = XEXP (x, 0);
13351 : /* FALLTHRU */
13352 2642 : case CODE_LABEL:
13353 2642 : ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13354 2642 : assemble_name (asm_out_file, buf);
13355 2642 : break;
13356 :
13357 2630367 : CASE_CONST_SCALAR_INT:
13358 2630367 : output_addr_const (file, x);
13359 2630367 : break;
13360 :
13361 211656 : case CONST:
13362 : /* This used to output parentheses around the expression,
13363 : but that does not work on the 386 (either ATT or BSD assembler). */
13364 211656 : output_pic_addr_const (file, XEXP (x, 0), code);
13365 211656 : break;
13366 :
13367 0 : case CONST_DOUBLE:
13368 : /* We can't handle floating point constants;
13369 : TARGET_PRINT_OPERAND must handle them. */
13370 0 : output_operand_lossage ("floating constant misused");
13371 0 : break;
13372 :
13373 19080 : case PLUS:
13374 : /* Some assemblers need integer constants to appear first. */
13375 19080 : if (CONST_INT_P (XEXP (x, 0)))
13376 : {
13377 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13378 0 : putc ('+', file);
13379 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13380 : }
13381 : else
13382 : {
13383 19080 : gcc_assert (CONST_INT_P (XEXP (x, 1)));
13384 19080 : output_pic_addr_const (file, XEXP (x, 1), code);
13385 19080 : putc ('+', file);
13386 19080 : output_pic_addr_const (file, XEXP (x, 0), code);
13387 : }
13388 : break;
13389 :
13390 0 : case MINUS:
13391 0 : if (!TARGET_MACHO)
13392 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13393 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13394 0 : putc ('-', file);
13395 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13396 0 : if (!TARGET_MACHO)
13397 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13398 0 : break;
13399 :
13400 202034 : case UNSPEC:
13401 202034 : gcc_assert (XVECLEN (x, 0) == 1);
13402 202034 : output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13403 202034 : switch (XINT (x, 1))
13404 : {
13405 43365 : case UNSPEC_GOT:
13406 43365 : fputs ("@GOT", file);
13407 43365 : break;
13408 78521 : case UNSPEC_GOTOFF:
13409 78521 : fputs ("@GOTOFF", file);
13410 78521 : break;
13411 36 : case UNSPEC_PLTOFF:
13412 36 : fputs ("@PLTOFF", file);
13413 36 : break;
13414 0 : case UNSPEC_PCREL:
13415 0 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13416 : "(%rip)" : "[rip]", file);
13417 0 : break;
13418 75928 : case UNSPEC_GOTPCREL:
13419 75928 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13420 : "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13421 75928 : break;
13422 0 : case UNSPEC_GOTTPOFF:
13423 : /* FIXME: This might be @TPOFF in Sun ld too. */
13424 0 : fputs ("@gottpoff", file);
13425 0 : break;
13426 0 : case UNSPEC_TPOFF:
13427 0 : fputs ("@tpoff", file);
13428 0 : break;
13429 1459 : case UNSPEC_NTPOFF:
13430 1459 : if (TARGET_64BIT)
13431 1459 : fputs ("@tpoff", file);
13432 : else
13433 0 : fputs ("@ntpoff", file);
13434 : break;
13435 315 : case UNSPEC_DTPOFF:
13436 315 : fputs ("@dtpoff", file);
13437 315 : break;
13438 2410 : case UNSPEC_GOTNTPOFF:
13439 2410 : if (TARGET_64BIT)
13440 2147 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13441 : "@gottpoff(%rip)": "@gottpoff[rip]", file);
13442 : else
13443 263 : fputs ("@gotntpoff", file);
13444 : break;
13445 0 : case UNSPEC_INDNTPOFF:
13446 0 : fputs ("@indntpoff", file);
13447 0 : break;
13448 0 : case UNSPEC_SECREL32:
13449 0 : fputs ("@secrel32", file);
13450 0 : break;
13451 : #if TARGET_MACHO
13452 : case UNSPEC_MACHOPIC_OFFSET:
13453 : putc ('-', file);
13454 : machopic_output_function_base_name (file);
13455 : break;
13456 : #endif
13457 0 : default:
13458 0 : output_operand_lossage ("invalid UNSPEC as operand");
13459 0 : break;
13460 : }
13461 : break;
13462 :
13463 0 : default:
13464 0 : output_operand_lossage ("invalid expression as operand");
13465 : }
13466 3706972 : }
13467 :
13468 : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13469 : We need to emit DTP-relative relocations. */
13470 :
13471 : static void ATTRIBUTE_UNUSED
13472 668 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13473 : {
13474 668 : fputs (ASM_LONG, file);
13475 668 : output_addr_const (file, x);
13476 : #if TARGET_WIN32_TLS
13477 : fputs ("@secrel32", file);
13478 : #else
13479 668 : fputs ("@dtpoff", file);
13480 : #endif
13481 668 : switch (size)
13482 : {
13483 : case 4:
13484 : break;
13485 549 : case 8:
13486 549 : fputs (", 0", file);
13487 549 : break;
13488 0 : default:
13489 0 : gcc_unreachable ();
13490 : }
13491 668 : }
13492 :
13493 : /* Return true if X is a representation of the PIC register. This copes
13494 : with calls from ix86_find_base_term, where the register might have
13495 : been replaced by a cselib value. */
13496 :
13497 : static bool
13498 26896306 : ix86_pic_register_p (rtx x)
13499 : {
13500 26896306 : if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13501 748622 : return (pic_offset_table_rtx
13502 748622 : && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13503 26147684 : else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13504 : return true;
13505 26144692 : else if (!REG_P (x))
13506 : return false;
13507 25533062 : else if (pic_offset_table_rtx)
13508 : {
13509 25513177 : if (REGNO (x) == REGNO (pic_offset_table_rtx))
13510 : return true;
13511 407858 : if (HARD_REGISTER_P (x)
13512 386388 : && !HARD_REGISTER_P (pic_offset_table_rtx)
13513 794246 : && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13514 : return true;
13515 : return false;
13516 : }
13517 : else
13518 19885 : return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13519 : }
13520 :
13521 : /* Helper function for ix86_delegitimize_address.
13522 : Attempt to delegitimize TLS local-exec accesses. */
13523 :
13524 : static rtx
13525 3501736776 : ix86_delegitimize_tls_address (rtx orig_x)
13526 : {
13527 3501736776 : rtx x = orig_x, unspec;
13528 3501736776 : struct ix86_address addr;
13529 :
13530 3501736776 : if (!TARGET_TLS_DIRECT_SEG_REFS)
13531 : return orig_x;
13532 3501736776 : if (MEM_P (x))
13533 42880782 : x = XEXP (x, 0);
13534 5030402633 : if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13535 : return orig_x;
13536 1684960142 : if (ix86_decompose_address (x, &addr) == 0
13537 1944664319 : || addr.seg != DEFAULT_TLS_SEG_REG
13538 276772 : || addr.disp == NULL_RTX
13539 1685185406 : || GET_CODE (addr.disp) != CONST)
13540 : return orig_x;
13541 115489 : unspec = XEXP (addr.disp, 0);
13542 115489 : if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13543 67942 : unspec = XEXP (unspec, 0);
13544 115489 : if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13545 : return orig_x;
13546 115423 : x = XVECEXP (unspec, 0, 0);
13547 115423 : gcc_assert (SYMBOL_REF_P (x));
13548 115423 : if (unspec != XEXP (addr.disp, 0))
13549 89725 : x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13550 115423 : if (addr.index)
13551 : {
13552 187 : rtx idx = addr.index;
13553 187 : if (addr.scale != 1)
13554 187 : idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13555 187 : x = gen_rtx_PLUS (Pmode, idx, x);
13556 : }
13557 115423 : if (addr.base)
13558 2 : x = gen_rtx_PLUS (Pmode, addr.base, x);
13559 115423 : if (MEM_P (orig_x))
13560 198 : x = replace_equiv_address_nv (orig_x, x);
13561 : return x;
13562 : }
13563 :
13564 : /* In the name of slightly smaller debug output, and to cater to
13565 : general assembler lossage, recognize PIC+GOTOFF and turn it back
13566 : into a direct symbol reference.
13567 :
13568 : On Darwin, this is necessary to avoid a crash, because Darwin
13569 : has a different PIC label for each routine but the DWARF debugging
13570 : information is not associated with any particular routine, so it's
13571 : necessary to remove references to the PIC label from RTL stored by
13572 : the DWARF output code.
13573 :
13574 : This helper is used in the normal ix86_delegitimize_address
13575 : entrypoint (e.g. used in the target delegitimization hook) and
13576 : in ix86_find_base_term. As compile time memory optimization, we
13577 : avoid allocating rtxes that will not change anything on the outcome
13578 : of the callers (find_base_value and find_base_term). */
13579 :
13580 : static inline rtx
13581 3526580678 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13582 : {
13583 3526580678 : rtx orig_x = delegitimize_mem_from_attrs (x);
13584 : /* addend is NULL or some rtx if x is something+GOTOFF where
13585 : something doesn't include the PIC register. */
13586 3526580678 : rtx addend = NULL_RTX;
13587 : /* reg_addend is NULL or a multiple of some register. */
13588 3526580678 : rtx reg_addend = NULL_RTX;
13589 : /* const_addend is NULL or a const_int. */
13590 3526580678 : rtx const_addend = NULL_RTX;
13591 : /* This is the result, or NULL. */
13592 3526580678 : rtx result = NULL_RTX;
13593 :
13594 3526580678 : x = orig_x;
13595 :
13596 3526580678 : if (MEM_P (x))
13597 62085897 : x = XEXP (x, 0);
13598 :
13599 3526580678 : if (TARGET_64BIT)
13600 : {
13601 253567916 : if (GET_CODE (x) == CONST
13602 8735273 : && GET_CODE (XEXP (x, 0)) == PLUS
13603 6752255 : && GET_MODE (XEXP (x, 0)) == Pmode
13604 6752206 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13605 6752206 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13606 253572047 : && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13607 : {
13608 : /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13609 : base. A CONST can't be arg_pointer_rtx based. */
13610 0 : if (base_term_p && MEM_P (orig_x))
13611 : return orig_x;
13612 0 : rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13613 0 : x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13614 0 : if (MEM_P (orig_x))
13615 0 : x = replace_equiv_address_nv (orig_x, x);
13616 0 : return x;
13617 : }
13618 :
13619 253567916 : if (GET_CODE (x) == CONST
13620 8735273 : && GET_CODE (XEXP (x, 0)) == UNSPEC
13621 1983067 : && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13622 678689 : || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13623 1304378 : && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13624 : {
13625 298194 : x = XVECEXP (XEXP (x, 0), 0, 0);
13626 298194 : if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13627 : {
13628 9 : x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13629 9 : if (x == NULL_RTX)
13630 : return orig_x;
13631 : }
13632 298194 : return x;
13633 : }
13634 :
13635 253269722 : if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13636 253268023 : return ix86_delegitimize_tls_address (orig_x);
13637 :
13638 : /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13639 : and -mcmodel=medium -fpic. */
13640 : }
13641 :
13642 3273014461 : if (GET_CODE (x) != PLUS
13643 1553211300 : || GET_CODE (XEXP (x, 1)) != CONST)
13644 3246671632 : return ix86_delegitimize_tls_address (orig_x);
13645 :
13646 26342829 : if (ix86_pic_register_p (XEXP (x, 0)))
13647 : /* %ebx + GOT/GOTOFF */
13648 : ;
13649 1279945 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
13650 : {
13651 : /* %ebx + %reg * scale + GOT/GOTOFF */
13652 474739 : reg_addend = XEXP (x, 0);
13653 474739 : if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13654 396001 : reg_addend = XEXP (reg_addend, 1);
13655 78738 : else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13656 47148 : reg_addend = XEXP (reg_addend, 0);
13657 : else
13658 : {
13659 31590 : reg_addend = NULL_RTX;
13660 31590 : addend = XEXP (x, 0);
13661 : }
13662 : }
13663 : else
13664 : addend = XEXP (x, 0);
13665 :
13666 26342829 : x = XEXP (XEXP (x, 1), 0);
13667 26342829 : if (GET_CODE (x) == PLUS
13668 1445100 : && CONST_INT_P (XEXP (x, 1)))
13669 : {
13670 1445100 : const_addend = XEXP (x, 1);
13671 1445100 : x = XEXP (x, 0);
13672 : }
13673 :
13674 26342829 : if (GET_CODE (x) == UNSPEC
13675 25671437 : && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13676 6764516 : || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13677 1125733 : || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13678 4 : && !MEM_P (orig_x) && !addend)))
13679 24545708 : result = XVECEXP (x, 0, 0);
13680 :
13681 24545708 : if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13682 : && !MEM_P (orig_x))
13683 : result = XVECEXP (x, 0, 0);
13684 :
13685 24545708 : if (! result)
13686 1797121 : return ix86_delegitimize_tls_address (orig_x);
13687 :
13688 : /* For (PLUS something CONST_INT) both find_base_{value,term} just
13689 : recurse on the first operand. */
13690 24545708 : if (const_addend && !base_term_p)
13691 355788 : result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13692 24545708 : if (reg_addend)
13693 862024 : result = gen_rtx_PLUS (Pmode, reg_addend, result);
13694 24545708 : if (addend)
13695 : {
13696 : /* If the rest of original X doesn't involve the PIC register, add
13697 : addend and subtract pic_offset_table_rtx. This can happen e.g.
13698 : for code like:
13699 : leal (%ebx, %ecx, 4), %ecx
13700 : ...
13701 : movl foo@GOTOFF(%ecx), %edx
13702 : in which case we return (%ecx - %ebx) + foo
13703 : or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13704 : and reload has completed. Don't do the latter for debug,
13705 : as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13706 137191 : if (pic_offset_table_rtx
13707 137191 : && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13708 2364 : result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13709 : pic_offset_table_rtx),
13710 : result);
13711 136403 : else if (base_term_p
13712 130106 : && pic_offset_table_rtx
13713 : && !TARGET_MACHO
13714 : && !TARGET_VXWORKS_VAROFF)
13715 : {
13716 260212 : rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13717 260212 : tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13718 260212 : result = gen_rtx_PLUS (Pmode, tmp, result);
13719 130106 : }
13720 : else
13721 : return orig_x;
13722 : }
13723 49078735 : if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13724 : {
13725 0 : result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13726 0 : if (result == NULL_RTX)
13727 : return orig_x;
13728 : }
13729 : return result;
13730 : }
13731 :
13732 : /* The normal instantiation of the above template. */
13733 :
13734 : static rtx
13735 324373940 : ix86_delegitimize_address (rtx x)
13736 : {
13737 324373940 : return ix86_delegitimize_address_1 (x, false);
13738 : }
13739 :
13740 : /* If X is a machine specific address (i.e. a symbol or label being
13741 : referenced as a displacement from the GOT implemented using an
13742 : UNSPEC), then return the base term. Otherwise return X. */
13743 :
13744 : rtx
13745 6675936890 : ix86_find_base_term (rtx x)
13746 : {
13747 6675936890 : rtx term;
13748 :
13749 6675936890 : if (TARGET_64BIT)
13750 : {
13751 3473730152 : if (GET_CODE (x) != CONST)
13752 : return x;
13753 45069016 : term = XEXP (x, 0);
13754 45069016 : if (GET_CODE (term) == PLUS
13755 45054143 : && CONST_INT_P (XEXP (term, 1)))
13756 45054143 : term = XEXP (term, 0);
13757 45069016 : if (GET_CODE (term) != UNSPEC
13758 40579 : || (XINT (term, 1) != UNSPEC_GOTPCREL
13759 40579 : && XINT (term, 1) != UNSPEC_PCREL))
13760 : return x;
13761 :
13762 0 : return XVECEXP (term, 0, 0);
13763 : }
13764 :
13765 3202206738 : return ix86_delegitimize_address_1 (x, true);
13766 : }
13767 :
13768 : /* Return true if X shouldn't be emitted into the debug info.
13769 : Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13770 : symbol easily into the .debug_info section, so we need not to
13771 : delegitimize, but instead assemble as @gotoff.
13772 : Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13773 : assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13774 :
13775 : static bool
13776 1891873 : ix86_const_not_ok_for_debug_p (rtx x)
13777 : {
13778 1891873 : if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13779 : return true;
13780 :
13781 1891853 : if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13782 0 : return true;
13783 :
13784 : return false;
13785 : }
13786 :
13787 : static void
13788 7145002 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13789 : bool fp, FILE *file)
13790 : {
13791 7145002 : const char *suffix;
13792 :
13793 7145002 : if (mode == CCFPmode)
13794 : {
13795 561891 : code = ix86_fp_compare_code_to_integer (code);
13796 561891 : mode = CCmode;
13797 : }
13798 7145002 : if (reverse)
13799 204402 : code = reverse_condition (code);
13800 :
13801 7145002 : switch (code)
13802 : {
13803 2782348 : case EQ:
13804 2782348 : gcc_assert (mode != CCGZmode);
13805 2782348 : switch (mode)
13806 : {
13807 : case E_CCAmode:
13808 : suffix = "a";
13809 : break;
13810 : case E_CCCmode:
13811 26045 : suffix = "c";
13812 : break;
13813 : case E_CCOmode:
13814 7145002 : suffix = "o";
13815 : break;
13816 : case E_CCPmode:
13817 233496 : suffix = "p";
13818 : break;
13819 : case E_CCSmode:
13820 119843 : suffix = "s";
13821 : break;
13822 2762675 : default:
13823 2762675 : suffix = "e";
13824 2762675 : break;
13825 : }
13826 : break;
13827 2316688 : case NE:
13828 2316688 : gcc_assert (mode != CCGZmode);
13829 2316688 : switch (mode)
13830 : {
13831 : case E_CCAmode:
13832 : suffix = "na";
13833 : break;
13834 : case E_CCCmode:
13835 11882 : suffix = "nc";
13836 : break;
13837 10765 : case E_CCOmode:
13838 10765 : suffix = "no";
13839 10765 : break;
13840 : case E_CCPmode:
13841 4442 : suffix = "np";
13842 : break;
13843 : case E_CCSmode:
13844 49807 : suffix = "ns";
13845 : break;
13846 2304242 : default:
13847 2304242 : suffix = "ne";
13848 2304242 : break;
13849 : }
13850 : break;
13851 246039 : case GT:
13852 246039 : gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13853 : suffix = "g";
13854 : break;
13855 174754 : case GTU:
13856 : /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13857 : Those same assemblers have the same but opposite lossage on cmov. */
13858 174754 : if (mode == CCmode)
13859 174816 : suffix = fp ? "nbe" : "a";
13860 : else
13861 0 : gcc_unreachable ();
13862 : break;
13863 236545 : case LT:
13864 236545 : switch (mode)
13865 : {
13866 : case E_CCNOmode:
13867 : case E_CCGOCmode:
13868 : suffix = "s";
13869 : break;
13870 :
13871 : case E_CCmode:
13872 : case E_CCGCmode:
13873 : case E_CCGZmode:
13874 7145002 : suffix = "l";
13875 : break;
13876 :
13877 0 : default:
13878 0 : gcc_unreachable ();
13879 : }
13880 : break;
13881 446192 : case LTU:
13882 446192 : if (mode == CCmode || mode == CCGZmode)
13883 : suffix = "b";
13884 24715 : else if (mode == CCCmode)
13885 26045 : suffix = fp ? "b" : "c";
13886 : else
13887 0 : gcc_unreachable ();
13888 : break;
13889 144226 : case GE:
13890 144226 : switch (mode)
13891 : {
13892 : case E_CCNOmode:
13893 : case E_CCGOCmode:
13894 : suffix = "ns";
13895 : break;
13896 :
13897 : case E_CCmode:
13898 : case E_CCGCmode:
13899 : case E_CCGZmode:
13900 7145002 : suffix = "ge";
13901 : break;
13902 :
13903 0 : default:
13904 0 : gcc_unreachable ();
13905 : }
13906 : break;
13907 196238 : case GEU:
13908 196238 : if (mode == CCmode || mode == CCGZmode)
13909 : suffix = "nb";
13910 10221 : else if (mode == CCCmode)
13911 11882 : suffix = fp ? "nb" : "nc";
13912 : else
13913 0 : gcc_unreachable ();
13914 : break;
13915 247338 : case LE:
13916 247338 : gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13917 : suffix = "le";
13918 : break;
13919 116694 : case LEU:
13920 116694 : if (mode == CCmode)
13921 : suffix = "be";
13922 : else
13923 0 : gcc_unreachable ();
13924 : break;
13925 233496 : case UNORDERED:
13926 233503 : suffix = fp ? "u" : "p";
13927 : break;
13928 4444 : case ORDERED:
13929 4449 : suffix = fp ? "nu" : "np";
13930 : break;
13931 0 : default:
13932 0 : gcc_unreachable ();
13933 : }
13934 7145002 : fputs (suffix, file);
13935 7145002 : }
13936 :
13937 : /* Print the name of register X to FILE based on its machine mode and number.
13938 : If CODE is 'w', pretend the mode is HImode.
13939 : If CODE is 'b', pretend the mode is QImode.
13940 : If CODE is 'k', pretend the mode is SImode.
13941 : If CODE is 'q', pretend the mode is DImode.
13942 : If CODE is 'x', pretend the mode is V4SFmode.
13943 : If CODE is 't', pretend the mode is V8SFmode.
13944 : If CODE is 'g', pretend the mode is V16SFmode.
13945 : If CODE is 'h', pretend the reg is the 'high' byte register.
13946 : If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13947 : If CODE is 'd', duplicate the operand for AVX instruction.
13948 : If CODE is 'V', print naked full integer register name without %.
13949 : */
13950 :
13951 : void
13952 123494368 : print_reg (rtx x, int code, FILE *file)
13953 : {
13954 123494368 : const char *reg;
13955 123494368 : int msize;
13956 123494368 : unsigned int regno;
13957 123494368 : bool duplicated;
13958 :
13959 123494368 : if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13960 123491911 : putc ('%', file);
13961 :
13962 123494368 : if (x == pc_rtx)
13963 : {
13964 5745070 : gcc_assert (TARGET_64BIT);
13965 5745070 : fputs ("rip", file);
13966 5745070 : return;
13967 : }
13968 :
13969 117749298 : if (code == 'y' && STACK_TOP_P (x))
13970 : {
13971 290672 : fputs ("st(0)", file);
13972 290672 : return;
13973 : }
13974 :
13975 117458626 : if (code == 'w')
13976 : msize = 2;
13977 : else if (code == 'b')
13978 : msize = 1;
13979 : else if (code == 'k')
13980 : msize = 4;
13981 : else if (code == 'q')
13982 : msize = 8;
13983 : else if (code == 'h')
13984 : msize = 0;
13985 : else if (code == 'x')
13986 : msize = 16;
13987 : else if (code == 't')
13988 : msize = 32;
13989 : else if (code == 'g')
13990 : msize = 64;
13991 : else
13992 200696154 : msize = GET_MODE_SIZE (GET_MODE (x));
13993 :
13994 117458626 : regno = REGNO (x);
13995 :
13996 117458626 : if (regno == ARG_POINTER_REGNUM
13997 117458626 : || regno == FRAME_POINTER_REGNUM
13998 117458626 : || regno == FPSR_REG)
13999 : {
14000 0 : output_operand_lossage
14001 0 : ("invalid use of register '%s'", reg_names[regno]);
14002 0 : return;
14003 : }
14004 117458626 : else if (regno == FLAGS_REG)
14005 : {
14006 1 : output_operand_lossage ("invalid use of asm flag output");
14007 1 : return;
14008 : }
14009 :
14010 117458625 : if (code == 'V')
14011 : {
14012 1 : if (GENERAL_REGNO_P (regno))
14013 2 : msize = GET_MODE_SIZE (word_mode);
14014 : else
14015 0 : error ("%<V%> modifier on non-integer register");
14016 : }
14017 :
14018 117458625 : duplicated = code == 'd' && TARGET_AVX;
14019 :
14020 117458625 : switch (msize)
14021 : {
14022 77955659 : case 16:
14023 77955659 : case 12:
14024 77955659 : case 8:
14025 145854660 : if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
14026 5 : warning (0, "unsupported size for integer register");
14027 : /* FALLTHRU */
14028 113996619 : case 4:
14029 113996619 : if (LEGACY_INT_REGNO_P (regno))
14030 123313791 : putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
14031 : /* FALLTHRU */
14032 114895926 : case 2:
14033 22245002 : normal:
14034 114895926 : reg = hi_reg_name[regno];
14035 114895926 : break;
14036 2280259 : case 1:
14037 2280259 : if (regno >= ARRAY_SIZE (qi_reg_name))
14038 275499 : goto normal;
14039 2004760 : if (!ANY_QI_REGNO_P (regno))
14040 0 : error ("unsupported size for integer register");
14041 2004760 : reg = qi_reg_name[regno];
14042 2004760 : break;
14043 27276 : case 0:
14044 27276 : if (regno >= ARRAY_SIZE (qi_high_reg_name))
14045 0 : goto normal;
14046 27276 : reg = qi_high_reg_name[regno];
14047 27276 : break;
14048 530663 : case 32:
14049 530663 : case 64:
14050 530663 : if (SSE_REGNO_P (regno))
14051 : {
14052 530663 : gcc_assert (!duplicated);
14053 737400 : putc (msize == 32 ? 'y' : 'z', file);
14054 530663 : reg = hi_reg_name[regno] + 1;
14055 530663 : break;
14056 : }
14057 0 : goto normal;
14058 0 : default:
14059 0 : gcc_unreachable ();
14060 : }
14061 :
14062 117458625 : fputs (reg, file);
14063 :
14064 : /* Irritatingly, AMD extended registers use
14065 : different naming convention: "r%d[bwd]" */
14066 117458625 : if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
14067 : {
14068 10469897 : gcc_assert (TARGET_64BIT);
14069 10469897 : switch (msize)
14070 : {
14071 0 : case 0:
14072 0 : error ("extended registers have no high halves");
14073 0 : break;
14074 183516 : case 1:
14075 183516 : putc ('b', file);
14076 183516 : break;
14077 30397 : case 2:
14078 30397 : putc ('w', file);
14079 30397 : break;
14080 2546426 : case 4:
14081 2546426 : putc ('d', file);
14082 2546426 : break;
14083 : case 8:
14084 : /* no suffix */
14085 : break;
14086 0 : default:
14087 0 : error ("unsupported operand size for extended register");
14088 0 : break;
14089 : }
14090 10469897 : return;
14091 : }
14092 :
14093 106988728 : if (duplicated)
14094 : {
14095 16638 : if (ASSEMBLER_DIALECT == ASM_ATT)
14096 16617 : fprintf (file, ", %%%s", reg);
14097 : else
14098 21 : fprintf (file, ", %s", reg);
14099 : }
14100 : }
14101 :
14102 : /* Meaning of CODE:
14103 : L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14104 : C -- print opcode suffix for set/cmov insn.
14105 : c -- like C, but print reversed condition
14106 : F,f -- likewise, but for floating-point.
14107 : O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14108 : otherwise nothing
14109 : R -- print embedded rounding and sae.
14110 : r -- print only sae.
14111 : z -- print the opcode suffix for the size of the current operand.
14112 : Z -- likewise, with special suffixes for x87 instructions.
14113 : * -- print a star (in certain assembler syntax)
14114 : A -- print an absolute memory reference.
14115 : E -- print address with DImode register names if TARGET_64BIT.
14116 : w -- print the operand as if it's a "word" (HImode) even if it isn't.
14117 : s -- print a shift double count, followed by the assemblers argument
14118 : delimiter.
14119 : b -- print the QImode name of the register for the indicated operand.
14120 : %b0 would print %al if operands[0] is reg 0.
14121 : w -- likewise, print the HImode name of the register.
14122 : k -- likewise, print the SImode name of the register.
14123 : q -- likewise, print the DImode name of the register.
14124 : x -- likewise, print the V4SFmode name of the register.
14125 : t -- likewise, print the V8SFmode name of the register.
14126 : g -- likewise, print the V16SFmode name of the register.
14127 : h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14128 : y -- print "st(0)" instead of "st" as a register.
14129 : d -- print duplicated register operand for AVX instruction.
14130 : D -- print condition for SSE cmp instruction.
14131 : P -- if PIC, print an @PLT suffix. For -fno-plt, load function
14132 : address from GOT.
14133 : p -- print raw symbol name.
14134 : X -- don't print any sort of PIC '@' suffix for a symbol.
14135 : & -- print some in-use local-dynamic symbol name.
14136 : H -- print a memory address offset by 8; used for sse high-parts
14137 : Y -- print condition for XOP pcom* instruction.
14138 : V -- print naked full integer register name without %.
14139 : v -- print segment override prefix
14140 : + -- print a branch hint as 'cs' or 'ds' prefix
14141 : ; -- print a semicolon (after prefixes due to bug in older gas).
14142 : ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14143 : ^ -- print addr32 prefix if Pmode != word_mode
14144 : M -- print addr32 prefix for TARGET_X32 with VSIB address.
14145 : ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
14146 : N -- print maskz if it's constant 0 operand.
14147 : G -- print embedded flag for ccmp/ctest.
14148 : */
14149 :
14150 : void
14151 176823180 : ix86_print_operand (FILE *file, rtx x, int code)
14152 : {
14153 177013623 : if (code)
14154 : {
14155 62206711 : switch (code)
14156 : {
14157 190439 : case 'A':
14158 190439 : switch (ASSEMBLER_DIALECT)
14159 : {
14160 190439 : case ASM_ATT:
14161 190439 : putc ('*', file);
14162 190439 : break;
14163 :
14164 0 : case ASM_INTEL:
14165 : /* Intel syntax. For absolute addresses, registers should not
14166 : be surrounded by braces. */
14167 0 : if (!REG_P (x))
14168 : {
14169 0 : putc ('[', file);
14170 0 : ix86_print_operand (file, x, 0);
14171 0 : putc (']', file);
14172 0 : return;
14173 : }
14174 : break;
14175 :
14176 0 : default:
14177 0 : gcc_unreachable ();
14178 : }
14179 :
14180 190439 : ix86_print_operand (file, x, 0);
14181 190439 : return;
14182 :
14183 3562162 : case 'E':
14184 : /* Wrap address in an UNSPEC to declare special handling. */
14185 3562162 : if (TARGET_64BIT)
14186 3071765 : x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14187 :
14188 3562162 : output_address (VOIDmode, x);
14189 3562162 : return;
14190 :
14191 0 : case 'L':
14192 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14193 0 : putc ('l', file);
14194 0 : return;
14195 :
14196 0 : case 'W':
14197 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14198 0 : putc ('w', file);
14199 0 : return;
14200 :
14201 0 : case 'B':
14202 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14203 0 : putc ('b', file);
14204 0 : return;
14205 :
14206 0 : case 'Q':
14207 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14208 0 : putc ('l', file);
14209 0 : return;
14210 :
14211 0 : case 'S':
14212 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14213 0 : putc ('s', file);
14214 0 : return;
14215 :
14216 0 : case 'T':
14217 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14218 0 : putc ('t', file);
14219 0 : return;
14220 :
14221 : case 'O':
14222 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14223 : if (ASSEMBLER_DIALECT != ASM_ATT)
14224 : return;
14225 :
14226 : switch (GET_MODE_SIZE (GET_MODE (x)))
14227 : {
14228 : case 2:
14229 : putc ('w', file);
14230 : break;
14231 :
14232 : case 4:
14233 : putc ('l', file);
14234 : break;
14235 :
14236 : case 8:
14237 : putc ('q', file);
14238 : break;
14239 :
14240 : default:
14241 : output_operand_lossage ("invalid operand size for operand "
14242 : "code 'O'");
14243 : return;
14244 : }
14245 :
14246 : putc ('.', file);
14247 : #endif
14248 : return;
14249 :
14250 38012 : case 'z':
14251 38012 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14252 : {
14253 : /* Opcodes don't get size suffixes if using Intel opcodes. */
14254 38010 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14255 : return;
14256 :
14257 76020 : switch (GET_MODE_SIZE (GET_MODE (x)))
14258 : {
14259 6 : case 1:
14260 6 : putc ('b', file);
14261 6 : return;
14262 :
14263 6 : case 2:
14264 6 : putc ('w', file);
14265 6 : return;
14266 :
14267 37516 : case 4:
14268 37516 : putc ('l', file);
14269 37516 : return;
14270 :
14271 482 : case 8:
14272 482 : putc ('q', file);
14273 482 : return;
14274 :
14275 0 : default:
14276 0 : output_operand_lossage ("invalid operand size for operand "
14277 : "code 'z'");
14278 0 : return;
14279 : }
14280 : }
14281 :
14282 2 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14283 : {
14284 1 : if (this_is_asm_operands)
14285 1 : warning_for_asm (this_is_asm_operands,
14286 : "non-integer operand used with operand code %<z%>");
14287 : else
14288 0 : warning (0, "non-integer operand used with operand code %<z%>");
14289 : }
14290 : /* FALLTHRU */
14291 :
14292 381955 : case 'Z':
14293 : /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14294 381955 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14295 : return;
14296 :
14297 381955 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14298 : {
14299 29236 : switch (GET_MODE_SIZE (GET_MODE (x)))
14300 : {
14301 3501 : case 2:
14302 : #ifdef HAVE_AS_IX86_FILDS
14303 3501 : putc ('s', file);
14304 : #endif
14305 3501 : return;
14306 :
14307 3941 : case 4:
14308 3941 : putc ('l', file);
14309 3941 : return;
14310 :
14311 7176 : case 8:
14312 : #ifdef HAVE_AS_IX86_FILDQ
14313 7176 : putc ('q', file);
14314 : #else
14315 : fputs ("ll", file);
14316 : #endif
14317 7176 : return;
14318 :
14319 : default:
14320 : break;
14321 : }
14322 : }
14323 367337 : else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14324 : {
14325 : /* 387 opcodes don't get size suffixes
14326 : if the operands are registers. */
14327 367335 : if (STACK_REG_P (x))
14328 : return;
14329 :
14330 689904 : switch (GET_MODE_SIZE (GET_MODE (x)))
14331 : {
14332 23110 : case 4:
14333 23110 : putc ('s', file);
14334 23110 : return;
14335 :
14336 32751 : case 8:
14337 32751 : putc ('l', file);
14338 32751 : return;
14339 :
14340 289089 : case 12:
14341 289089 : case 16:
14342 289089 : putc ('t', file);
14343 289089 : return;
14344 :
14345 : default:
14346 : break;
14347 : }
14348 : }
14349 : else
14350 : {
14351 2 : output_operand_lossage ("invalid operand type used with "
14352 : "operand code '%c'", code);
14353 2 : return;
14354 : }
14355 :
14356 2 : output_operand_lossage ("invalid operand size for operand code '%c'",
14357 : code);
14358 2 : return;
14359 :
14360 : case 'd':
14361 : case 'b':
14362 : case 'w':
14363 : case 'k':
14364 : case 'q':
14365 : case 'h':
14366 : case 't':
14367 : case 'g':
14368 : case 'y':
14369 : case 'x':
14370 : case 'X':
14371 : case 'P':
14372 : case 'p':
14373 : case 'V':
14374 : break;
14375 :
14376 0 : case 's':
14377 0 : if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14378 : {
14379 0 : ix86_print_operand (file, x, 0);
14380 0 : fputs (", ", file);
14381 : }
14382 0 : return;
14383 :
14384 494 : case 'Y':
14385 494 : switch (GET_CODE (x))
14386 : {
14387 182 : case NE:
14388 182 : fputs ("neq", file);
14389 182 : break;
14390 32 : case EQ:
14391 32 : fputs ("eq", file);
14392 32 : break;
14393 64 : case GE:
14394 64 : case GEU:
14395 64 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14396 64 : break;
14397 40 : case GT:
14398 40 : case GTU:
14399 40 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14400 40 : break;
14401 64 : case LE:
14402 64 : case LEU:
14403 64 : fputs ("le", file);
14404 64 : break;
14405 112 : case LT:
14406 112 : case LTU:
14407 112 : fputs ("lt", file);
14408 112 : break;
14409 0 : case UNORDERED:
14410 0 : fputs ("unord", file);
14411 0 : break;
14412 0 : case ORDERED:
14413 0 : fputs ("ord", file);
14414 0 : break;
14415 0 : case UNEQ:
14416 0 : fputs ("ueq", file);
14417 0 : break;
14418 0 : case UNGE:
14419 0 : fputs ("nlt", file);
14420 0 : break;
14421 0 : case UNGT:
14422 0 : fputs ("nle", file);
14423 0 : break;
14424 0 : case UNLE:
14425 0 : fputs ("ule", file);
14426 0 : break;
14427 0 : case UNLT:
14428 0 : fputs ("ult", file);
14429 0 : break;
14430 0 : case LTGT:
14431 0 : fputs ("une", file);
14432 0 : break;
14433 0 : default:
14434 0 : output_operand_lossage ("operand is not a condition code, "
14435 : "invalid operand code 'Y'");
14436 0 : return;
14437 : }
14438 494 : return;
14439 :
14440 9312 : case 'D':
14441 : /* Little bit of braindamage here. The SSE compare instructions
14442 : does use completely different names for the comparisons that the
14443 : fp conditional moves. */
14444 9312 : switch (GET_CODE (x))
14445 : {
14446 3 : case UNEQ:
14447 3 : if (TARGET_AVX)
14448 : {
14449 3 : fputs ("eq_us", file);
14450 3 : break;
14451 : }
14452 : /* FALLTHRU */
14453 4626 : case EQ:
14454 4626 : fputs ("eq", file);
14455 4626 : break;
14456 0 : case UNLT:
14457 0 : if (TARGET_AVX)
14458 : {
14459 0 : fputs ("nge", file);
14460 0 : break;
14461 : }
14462 : /* FALLTHRU */
14463 1626 : case LT:
14464 1626 : fputs ("lt", file);
14465 1626 : break;
14466 0 : case UNLE:
14467 0 : if (TARGET_AVX)
14468 : {
14469 0 : fputs ("ngt", file);
14470 0 : break;
14471 : }
14472 : /* FALLTHRU */
14473 795 : case LE:
14474 795 : fputs ("le", file);
14475 795 : break;
14476 95 : case UNORDERED:
14477 95 : fputs ("unord", file);
14478 95 : break;
14479 24 : case LTGT:
14480 24 : if (TARGET_AVX)
14481 : {
14482 24 : fputs ("neq_oq", file);
14483 24 : break;
14484 : }
14485 : /* FALLTHRU */
14486 887 : case NE:
14487 887 : fputs ("neq", file);
14488 887 : break;
14489 0 : case GE:
14490 0 : if (TARGET_AVX)
14491 : {
14492 0 : fputs ("ge", file);
14493 0 : break;
14494 : }
14495 : /* FALLTHRU */
14496 403 : case UNGE:
14497 403 : fputs ("nlt", file);
14498 403 : break;
14499 0 : case GT:
14500 0 : if (TARGET_AVX)
14501 : {
14502 0 : fputs ("gt", file);
14503 0 : break;
14504 : }
14505 : /* FALLTHRU */
14506 770 : case UNGT:
14507 770 : fputs ("nle", file);
14508 770 : break;
14509 83 : case ORDERED:
14510 83 : fputs ("ord", file);
14511 83 : break;
14512 0 : default:
14513 0 : output_operand_lossage ("operand is not a condition code, "
14514 : "invalid operand code 'D'");
14515 0 : return;
14516 : }
14517 9312 : return;
14518 :
14519 7145002 : case 'F':
14520 7145002 : case 'f':
14521 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14522 : if (ASSEMBLER_DIALECT == ASM_ATT)
14523 : putc ('.', file);
14524 : gcc_fallthrough ();
14525 : #endif
14526 :
14527 7145002 : case 'C':
14528 7145002 : case 'c':
14529 7145002 : if (!COMPARISON_P (x))
14530 : {
14531 0 : output_operand_lossage ("operand is not a condition code, "
14532 : "invalid operand code '%c'", code);
14533 0 : return;
14534 : }
14535 7145002 : put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14536 7145002 : code == 'c' || code == 'f',
14537 7145002 : code == 'F' || code == 'f',
14538 : file);
14539 7145002 : return;
14540 :
14541 21 : case 'G':
14542 21 : {
14543 21 : int dfv = INTVAL (x);
14544 21 : const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14545 21 : fputs (dfv_suffix, file);
14546 : }
14547 21 : return;
14548 :
14549 1301 : case 'H':
14550 1301 : if (!offsettable_memref_p (x))
14551 : {
14552 1 : output_operand_lossage ("operand is not an offsettable memory "
14553 : "reference, invalid operand code 'H'");
14554 1 : return;
14555 : }
14556 : /* It doesn't actually matter what mode we use here, as we're
14557 : only going to use this for printing. */
14558 1300 : x = adjust_address_nv (x, DImode, 8);
14559 : /* Output 'qword ptr' for intel assembler dialect. */
14560 1300 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14561 0 : code = 'q';
14562 : break;
14563 :
14564 75627 : case 'K':
14565 75627 : if (!CONST_INT_P (x))
14566 : {
14567 1 : output_operand_lossage ("operand is not an integer, invalid "
14568 : "operand code 'K'");
14569 1 : return;
14570 : }
14571 :
14572 75626 : if (INTVAL (x) & IX86_HLE_ACQUIRE)
14573 : #ifdef HAVE_AS_IX86_HLE
14574 22 : fputs ("xacquire ", file);
14575 : #else
14576 : fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14577 : #endif
14578 75604 : else if (INTVAL (x) & IX86_HLE_RELEASE)
14579 : #ifdef HAVE_AS_IX86_HLE
14580 24 : fputs ("xrelease ", file);
14581 : #else
14582 : fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14583 : #endif
14584 : /* We do not want to print value of the operand. */
14585 75626 : return;
14586 :
14587 43035 : case 'N':
14588 43035 : if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14589 15481 : fputs ("{z}", file);
14590 43035 : return;
14591 :
14592 4013 : case 'r':
14593 4013 : if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14594 : {
14595 2 : output_operand_lossage ("operand is not a specific integer, "
14596 : "invalid operand code 'r'");
14597 2 : return;
14598 : }
14599 :
14600 4011 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14601 1 : fputs (", ", file);
14602 :
14603 4011 : fputs ("{sae}", file);
14604 :
14605 4011 : if (ASSEMBLER_DIALECT == ASM_ATT)
14606 4010 : fputs (", ", file);
14607 :
14608 4011 : return;
14609 :
14610 5993 : case 'R':
14611 5993 : if (!CONST_INT_P (x))
14612 : {
14613 1 : output_operand_lossage ("operand is not an integer, invalid "
14614 : "operand code 'R'");
14615 1 : return;
14616 : }
14617 :
14618 5992 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14619 6 : fputs (", ", file);
14620 :
14621 5992 : switch (INTVAL (x))
14622 : {
14623 5177 : case ROUND_NEAREST_INT | ROUND_SAE:
14624 5177 : fputs ("{rn-sae}", file);
14625 5177 : break;
14626 637 : case ROUND_NEG_INF | ROUND_SAE:
14627 637 : fputs ("{rd-sae}", file);
14628 637 : break;
14629 56 : case ROUND_POS_INF | ROUND_SAE:
14630 56 : fputs ("{ru-sae}", file);
14631 56 : break;
14632 121 : case ROUND_ZERO | ROUND_SAE:
14633 121 : fputs ("{rz-sae}", file);
14634 121 : break;
14635 1 : default:
14636 1 : output_operand_lossage ("operand is not a specific integer, "
14637 : "invalid operand code 'R'");
14638 : }
14639 :
14640 5992 : if (ASSEMBLER_DIALECT == ASM_ATT)
14641 5986 : fputs (", ", file);
14642 :
14643 5992 : return;
14644 :
14645 10450 : case 'v':
14646 10450 : if (MEM_P (x))
14647 : {
14648 10567 : switch (MEM_ADDR_SPACE (x))
14649 : {
14650 : case ADDR_SPACE_GENERIC:
14651 : break;
14652 0 : case ADDR_SPACE_SEG_FS:
14653 0 : fputs ("fs ", file);
14654 0 : break;
14655 0 : case ADDR_SPACE_SEG_GS:
14656 0 : fputs ("gs ", file);
14657 0 : break;
14658 0 : default:
14659 0 : gcc_unreachable ();
14660 : }
14661 : }
14662 : else
14663 0 : output_operand_lossage ("operand is not a memory reference, "
14664 : "invalid operand code 'v'");
14665 10450 : return;
14666 :
14667 0 : case '*':
14668 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14669 0 : putc ('*', file);
14670 0 : return;
14671 :
14672 202 : case '&':
14673 202 : {
14674 202 : const char *name = get_some_local_dynamic_name ();
14675 202 : if (name == NULL)
14676 1 : output_operand_lossage ("'%%&' used without any "
14677 : "local dynamic TLS references");
14678 : else
14679 201 : assemble_name (file, name);
14680 202 : return;
14681 : }
14682 :
14683 6511196 : case '+':
14684 6511196 : {
14685 6511196 : rtx x;
14686 :
14687 6511196 : if (!optimize
14688 5098077 : || optimize_function_for_size_p (cfun)
14689 11418405 : || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14690 4907209 : && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14691 6511196 : return;
14692 :
14693 0 : x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14694 0 : if (x)
14695 : {
14696 0 : int pred_val = profile_probability::from_reg_br_prob_note
14697 0 : (XINT (x, 0)).to_reg_br_prob_base ();
14698 :
14699 0 : bool taken = pred_val > REG_BR_PROB_BASE / 2;
14700 : /* We use 3e (DS) prefix for taken branches and
14701 : 2e (CS) prefix for not taken branches. */
14702 0 : if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14703 0 : fputs ("ds ; ", file);
14704 0 : else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14705 0 : fputs ("cs ; ", file);
14706 : }
14707 0 : return;
14708 : }
14709 :
14710 : case ';':
14711 : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14712 : putc (';', file);
14713 : #endif
14714 : return;
14715 :
14716 3722 : case '~':
14717 3722 : putc (TARGET_AVX2 ? 'i' : 'f', file);
14718 3722 : return;
14719 :
14720 1675 : case 'M':
14721 1675 : if (TARGET_X32)
14722 : {
14723 : /* NB: 32-bit indices in VSIB address are sign-extended
14724 : to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14725 : sign-extended to 0xfffffffff7fa3010 which is invalid
14726 : address. Add addr32 prefix if there is no base
14727 : register nor symbol. */
14728 40 : bool ok;
14729 40 : struct ix86_address parts;
14730 40 : ok = ix86_decompose_address (x, &parts);
14731 40 : gcc_assert (ok && parts.index == NULL_RTX);
14732 40 : if (parts.base == NULL_RTX
14733 40 : && (parts.disp == NULL_RTX
14734 34 : || !symbolic_operand (parts.disp,
14735 34 : GET_MODE (parts.disp))))
14736 34 : fputs ("addr32 ", file);
14737 : }
14738 1675 : return;
14739 :
14740 22171 : case '^':
14741 25342 : if (Pmode != word_mode)
14742 0 : fputs ("addr32 ", file);
14743 22171 : return;
14744 :
14745 14876739 : case '!':
14746 14876739 : if (ix86_notrack_prefixed_insn_p (current_output_insn))
14747 3777 : fputs ("notrack ", file);
14748 14876739 : return;
14749 :
14750 1 : default:
14751 1 : output_operand_lossage ("invalid operand code '%c'", code);
14752 : }
14753 : }
14754 :
14755 143742837 : if (REG_P (x))
14756 85618799 : print_reg (x, code, file);
14757 :
14758 58124038 : else if (MEM_P (x))
14759 : {
14760 33358604 : rtx addr = XEXP (x, 0);
14761 :
14762 : /* No `byte ptr' prefix for call instructions ... */
14763 33358604 : if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14764 : {
14765 325 : machine_mode mode = GET_MODE (x);
14766 325 : const char *size;
14767 :
14768 : /* Check for explicit size override codes. */
14769 325 : if (code == 'b')
14770 : size = "BYTE";
14771 : else if (code == 'w')
14772 : size = "WORD";
14773 : else if (code == 'k')
14774 : size = "DWORD";
14775 : else if (code == 'q')
14776 : size = "QWORD";
14777 : else if (code == 'x')
14778 : size = "XMMWORD";
14779 : else if (code == 't')
14780 : size = "YMMWORD";
14781 : else if (code == 'g')
14782 : size = "ZMMWORD";
14783 238 : else if (mode == BLKmode)
14784 : /* ... or BLKmode operands, when not overridden. */
14785 : size = NULL;
14786 : else
14787 472 : switch (GET_MODE_SIZE (mode))
14788 : {
14789 : case 1: size = "BYTE"; break;
14790 : case 2: size = "WORD"; break;
14791 : case 4: size = "DWORD"; break;
14792 : case 8: size = "QWORD"; break;
14793 : case 12: size = "TBYTE"; break;
14794 7 : case 16:
14795 7 : if (mode == XFmode)
14796 : size = "TBYTE";
14797 : else
14798 : size = "XMMWORD";
14799 : break;
14800 : case 32: size = "YMMWORD"; break;
14801 : case 64: size = "ZMMWORD"; break;
14802 0 : default:
14803 0 : gcc_unreachable ();
14804 : }
14805 : if (size)
14806 : {
14807 323 : fputs (size, file);
14808 323 : fputs (" PTR ", file);
14809 : }
14810 : }
14811 :
14812 33358604 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14813 0 : output_operand_lossage ("invalid constraints for operand");
14814 : else
14815 33358604 : ix86_print_operand_address_as
14816 34080913 : (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14817 : }
14818 :
14819 24765434 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14820 : {
14821 762 : long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14822 762 : REAL_MODE_FORMAT (HFmode));
14823 762 : if (ASSEMBLER_DIALECT == ASM_ATT)
14824 762 : putc ('$', file);
14825 762 : fprintf (file, "0x%04x", (unsigned int) l);
14826 762 : }
14827 :
14828 24764672 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14829 : {
14830 22121 : long l;
14831 :
14832 22121 : REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14833 :
14834 22121 : if (ASSEMBLER_DIALECT == ASM_ATT)
14835 22121 : putc ('$', file);
14836 : /* Sign extend 32bit SFmode immediate to 8 bytes. */
14837 22121 : if (code == 'q')
14838 327 : fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14839 : (unsigned long long) (int) l);
14840 : else
14841 21794 : fprintf (file, "0x%08x", (unsigned int) l);
14842 : }
14843 :
14844 24742551 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14845 : {
14846 3699 : long l[2];
14847 :
14848 3699 : REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14849 :
14850 3699 : if (ASSEMBLER_DIALECT == ASM_ATT)
14851 3699 : putc ('$', file);
14852 3699 : fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14853 3699 : }
14854 :
14855 : /* These float cases don't actually occur as immediate operands. */
14856 24738852 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14857 : {
14858 0 : char dstr[30];
14859 :
14860 0 : real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14861 0 : fputs (dstr, file);
14862 0 : }
14863 :
14864 : /* Print bcst_mem_operand. */
14865 24738852 : else if (GET_CODE (x) == VEC_DUPLICATE)
14866 : {
14867 313 : machine_mode vmode = GET_MODE (x);
14868 : /* Must be bcst_memory_operand. */
14869 313 : gcc_assert (bcst_mem_operand (x, vmode));
14870 :
14871 313 : rtx mem = XEXP (x,0);
14872 313 : ix86_print_operand (file, mem, 0);
14873 :
14874 313 : switch (vmode)
14875 : {
14876 28 : case E_V2DImode:
14877 28 : case E_V2DFmode:
14878 28 : fputs ("{1to2}", file);
14879 28 : break;
14880 74 : case E_V4SImode:
14881 74 : case E_V4SFmode:
14882 74 : case E_V4DImode:
14883 74 : case E_V4DFmode:
14884 74 : fputs ("{1to4}", file);
14885 74 : break;
14886 93 : case E_V8SImode:
14887 93 : case E_V8SFmode:
14888 93 : case E_V8DFmode:
14889 93 : case E_V8DImode:
14890 93 : case E_V8HFmode:
14891 93 : fputs ("{1to8}", file);
14892 93 : break;
14893 110 : case E_V16SFmode:
14894 110 : case E_V16SImode:
14895 110 : case E_V16HFmode:
14896 110 : fputs ("{1to16}", file);
14897 110 : break;
14898 8 : case E_V32HFmode:
14899 8 : fputs ("{1to32}", file);
14900 8 : break;
14901 0 : default:
14902 0 : gcc_unreachable ();
14903 : }
14904 : }
14905 :
14906 : else
14907 : {
14908 : /* We have patterns that allow zero sets of memory, for instance.
14909 : In 64-bit mode, we should probably support all 8-byte vectors,
14910 : since we can in fact encode that into an immediate. */
14911 24738539 : if (CONST_VECTOR_P (x))
14912 : {
14913 118 : if (x != CONST0_RTX (GET_MODE (x)))
14914 2 : output_operand_lossage ("invalid vector immediate");
14915 118 : x = const0_rtx;
14916 : }
14917 :
14918 24738539 : if (code == 'P')
14919 : {
14920 5932153 : if (ix86_force_load_from_GOT_p (x, true))
14921 : {
14922 : /* For inline assembly statement, load function address
14923 : from GOT with 'P' operand modifier to avoid PLT. */
14924 4 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14925 : (TARGET_64BIT
14926 : ? UNSPEC_GOTPCREL
14927 : : UNSPEC_GOT));
14928 4 : x = gen_rtx_CONST (Pmode, x);
14929 4 : x = gen_const_mem (Pmode, x);
14930 4 : ix86_print_operand (file, x, 'A');
14931 4 : return;
14932 : }
14933 : }
14934 18806386 : else if (code != 'p')
14935 : {
14936 18806277 : if (CONST_INT_P (x))
14937 : {
14938 15531574 : if (ASSEMBLER_DIALECT == ASM_ATT)
14939 15531348 : putc ('$', file);
14940 : }
14941 3274703 : else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
14942 9391 : || LABEL_REF_P (x))
14943 : {
14944 3274701 : if (ASSEMBLER_DIALECT == ASM_ATT)
14945 3274677 : putc ('$', file);
14946 : else
14947 24 : fputs ("OFFSET FLAT:", file);
14948 : }
14949 : }
14950 24738535 : if (CONST_INT_P (x))
14951 15531660 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14952 9206875 : else if (flag_pic || MACHOPIC_INDIRECT)
14953 531902 : output_pic_addr_const (file, x, code);
14954 : else
14955 8674973 : output_addr_const (file, x);
14956 : }
14957 : }
14958 :
14959 : static bool
14960 21494411 : ix86_print_operand_punct_valid_p (unsigned char code)
14961 : {
14962 21494411 : return (code == '*' || code == '+' || code == '&' || code == ';'
14963 14898910 : || code == '~' || code == '^' || code == '!');
14964 : }
14965 :
14966 : /* Print a memory operand whose address is ADDR. */
14967 :
14968 : static void
14969 36923041 : ix86_print_operand_address_as (FILE *file, rtx addr,
14970 : addr_space_t as, bool raw)
14971 : {
14972 36923041 : struct ix86_address parts;
14973 36923041 : rtx base, index, disp;
14974 36923041 : int scale;
14975 36923041 : int ok;
14976 36923041 : bool vsib = false;
14977 36923041 : int code = 0;
14978 :
14979 36923041 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14980 : {
14981 1675 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14982 1675 : gcc_assert (parts.index == NULL_RTX);
14983 1675 : parts.index = XVECEXP (addr, 0, 1);
14984 1675 : parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14985 1675 : addr = XVECEXP (addr, 0, 0);
14986 1675 : vsib = true;
14987 : }
14988 36921366 : else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14989 : {
14990 3071765 : gcc_assert (TARGET_64BIT);
14991 3071765 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14992 3071765 : code = 'q';
14993 : }
14994 : else
14995 33849601 : ok = ix86_decompose_address (addr, &parts);
14996 :
14997 36923041 : gcc_assert (ok);
14998 :
14999 36923041 : base = parts.base;
15000 36923041 : index = parts.index;
15001 36923041 : disp = parts.disp;
15002 36923041 : scale = parts.scale;
15003 :
15004 36923041 : if (ADDR_SPACE_GENERIC_P (as))
15005 36641399 : as = parts.seg;
15006 : else
15007 281642 : gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
15008 :
15009 36923041 : if (!ADDR_SPACE_GENERIC_P (as) && !raw)
15010 : {
15011 281657 : if (ASSEMBLER_DIALECT == ASM_ATT)
15012 281655 : putc ('%', file);
15013 :
15014 281657 : switch (as)
15015 : {
15016 182016 : case ADDR_SPACE_SEG_FS:
15017 182016 : fputs ("fs:", file);
15018 182016 : break;
15019 99641 : case ADDR_SPACE_SEG_GS:
15020 99641 : fputs ("gs:", file);
15021 99641 : break;
15022 0 : default:
15023 0 : gcc_unreachable ();
15024 : }
15025 : }
15026 :
15027 : /* Use one byte shorter RIP relative addressing for 64bit mode. */
15028 36923041 : if (TARGET_64BIT && !base && !index && !raw)
15029 : {
15030 6006697 : rtx symbol = disp;
15031 :
15032 6006697 : if (GET_CODE (disp) == CONST
15033 2182666 : && GET_CODE (XEXP (disp, 0)) == PLUS
15034 2097426 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15035 2097426 : symbol = XEXP (XEXP (disp, 0), 0);
15036 :
15037 6006697 : if (LABEL_REF_P (symbol)
15038 6006697 : || (SYMBOL_REF_P (symbol)
15039 5745198 : && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15040 5745070 : base = pc_rtx;
15041 : }
15042 :
15043 36923041 : if (!base && !index)
15044 : {
15045 : /* Displacement only requires special attention. */
15046 601112 : if (CONST_INT_P (disp))
15047 : {
15048 269321 : if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
15049 0 : fputs ("ds:", file);
15050 269321 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15051 : }
15052 : /* Load the external function address via the GOT slot to avoid PLT. */
15053 331791 : else if (GET_CODE (disp) == CONST
15054 113546 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
15055 85478 : && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
15056 9550 : || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
15057 407719 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15058 24 : output_pic_addr_const (file, disp, 0);
15059 331767 : else if (flag_pic)
15060 114915 : output_pic_addr_const (file, disp, 0);
15061 : else
15062 216852 : output_addr_const (file, disp);
15063 : }
15064 : else
15065 : {
15066 : /* Print SImode register names to force addr32 prefix. */
15067 36321929 : if (SImode_address_operand (addr, VOIDmode))
15068 : {
15069 37 : if (flag_checking)
15070 : {
15071 37 : gcc_assert (TARGET_64BIT);
15072 37 : switch (GET_CODE (addr))
15073 : {
15074 0 : case SUBREG:
15075 0 : gcc_assert (GET_MODE (addr) == SImode);
15076 0 : gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15077 : break;
15078 37 : case ZERO_EXTEND:
15079 37 : case AND:
15080 37 : gcc_assert (GET_MODE (addr) == DImode);
15081 : break;
15082 0 : default:
15083 0 : gcc_unreachable ();
15084 : }
15085 : }
15086 37 : gcc_assert (!code);
15087 : code = 'k';
15088 : }
15089 36321892 : else if (code == 0
15090 33251758 : && TARGET_X32
15091 482 : && disp
15092 410 : && CONST_INT_P (disp)
15093 311 : && INTVAL (disp) < -16*1024*1024)
15094 : {
15095 : /* X32 runs in 64-bit mode, where displacement, DISP, in
15096 : address DISP(%r64), is encoded as 32-bit immediate sign-
15097 : extended from 32-bit to 64-bit. For -0x40000300(%r64),
15098 : address is %r64 + 0xffffffffbffffd00. When %r64 <
15099 : 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15100 : which is invalid for x32. The correct address is %r64
15101 : - 0x40000300 == 0xf7ffdd64. To properly encode
15102 : -0x40000300(%r64) for x32, we zero-extend negative
15103 : displacement by forcing addr32 prefix which truncates
15104 : 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15105 : zero-extend all negative displacements, including -1(%rsp).
15106 : However, for small negative displacements, sign-extension
15107 : won't cause overflow. We only zero-extend negative
15108 : displacements if they < -16*1024*1024, which is also used
15109 : to check legitimate address displacements for PIC. */
15110 38 : code = 'k';
15111 : }
15112 :
15113 : /* Since the upper 32 bits of RSP are always zero for x32,
15114 : we can encode %esp as %rsp to avoid 0x67 prefix if
15115 : there is no index register. */
15116 976 : if (TARGET_X32 && Pmode == SImode
15117 36322333 : && !index && base && REG_P (base) && REGNO (base) == SP_REG)
15118 : code = 'q';
15119 :
15120 36321929 : if (ASSEMBLER_DIALECT == ASM_ATT)
15121 : {
15122 36321555 : if (disp)
15123 : {
15124 32219976 : if (flag_pic)
15125 2839017 : output_pic_addr_const (file, disp, 0);
15126 29380959 : else if (LABEL_REF_P (disp))
15127 5156 : output_asm_label (disp);
15128 : else
15129 29375803 : output_addr_const (file, disp);
15130 : }
15131 :
15132 36321555 : putc ('(', file);
15133 36321555 : if (base)
15134 35904556 : print_reg (base, code, file);
15135 36321555 : if (index)
15136 : {
15137 1970586 : putc (',', file);
15138 3939545 : print_reg (index, vsib ? 0 : code, file);
15139 1970586 : if (scale != 1 || vsib)
15140 1027677 : fprintf (file, ",%d", scale);
15141 : }
15142 36321555 : putc (')', file);
15143 : }
15144 : else
15145 : {
15146 374 : rtx offset = NULL_RTX;
15147 :
15148 374 : if (disp)
15149 : {
15150 : /* Pull out the offset of a symbol; print any symbol itself. */
15151 294 : if (GET_CODE (disp) == CONST
15152 20 : && GET_CODE (XEXP (disp, 0)) == PLUS
15153 20 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15154 : {
15155 20 : offset = XEXP (XEXP (disp, 0), 1);
15156 20 : disp = gen_rtx_CONST (VOIDmode,
15157 : XEXP (XEXP (disp, 0), 0));
15158 : }
15159 :
15160 294 : if (flag_pic)
15161 0 : output_pic_addr_const (file, disp, 0);
15162 294 : else if (LABEL_REF_P (disp))
15163 0 : output_asm_label (disp);
15164 294 : else if (CONST_INT_P (disp))
15165 : offset = disp;
15166 : else
15167 127 : output_addr_const (file, disp);
15168 : }
15169 :
15170 374 : putc ('[', file);
15171 374 : if (base)
15172 : {
15173 331 : print_reg (base, code, file);
15174 331 : if (offset)
15175 : {
15176 187 : if (INTVAL (offset) >= 0)
15177 20 : putc ('+', file);
15178 187 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15179 : }
15180 : }
15181 43 : else if (offset)
15182 0 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15183 : else
15184 43 : putc ('0', file);
15185 :
15186 374 : if (index)
15187 : {
15188 96 : putc ('+', file);
15189 144 : print_reg (index, vsib ? 0 : code, file);
15190 96 : if (scale != 1 || vsib)
15191 94 : fprintf (file, "*%d", scale);
15192 : }
15193 374 : putc (']', file);
15194 : }
15195 : }
15196 36923041 : }
15197 :
15198 : static void
15199 3564438 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
15200 : {
15201 3564438 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
15202 1 : output_operand_lossage ("invalid constraints for operand");
15203 : else
15204 3564437 : ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
15205 3564438 : }
15206 :
15207 : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15208 :
15209 : static bool
15210 15339 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
15211 : {
15212 15339 : rtx op;
15213 :
15214 15339 : if (GET_CODE (x) != UNSPEC)
15215 : return false;
15216 :
15217 15339 : op = XVECEXP (x, 0, 0);
15218 15339 : switch (XINT (x, 1))
15219 : {
15220 1350 : case UNSPEC_GOTOFF:
15221 1350 : output_addr_const (file, op);
15222 1350 : fputs ("@gotoff", file);
15223 1350 : break;
15224 0 : case UNSPEC_GOTTPOFF:
15225 0 : output_addr_const (file, op);
15226 : /* FIXME: This might be @TPOFF in Sun ld. */
15227 0 : fputs ("@gottpoff", file);
15228 0 : break;
15229 0 : case UNSPEC_TPOFF:
15230 0 : output_addr_const (file, op);
15231 0 : fputs ("@tpoff", file);
15232 0 : break;
15233 10914 : case UNSPEC_NTPOFF:
15234 10914 : output_addr_const (file, op);
15235 10914 : if (TARGET_64BIT)
15236 10168 : fputs ("@tpoff", file);
15237 : else
15238 746 : fputs ("@ntpoff", file);
15239 : break;
15240 0 : case UNSPEC_DTPOFF:
15241 0 : output_addr_const (file, op);
15242 0 : fputs ("@dtpoff", file);
15243 0 : break;
15244 3074 : case UNSPEC_GOTNTPOFF:
15245 3074 : output_addr_const (file, op);
15246 3074 : if (TARGET_64BIT)
15247 3074 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15248 : "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15249 : else
15250 0 : fputs ("@gotntpoff", file);
15251 : break;
15252 1 : case UNSPEC_INDNTPOFF:
15253 1 : output_addr_const (file, op);
15254 1 : fputs ("@indntpoff", file);
15255 1 : break;
15256 0 : case UNSPEC_SECREL32:
15257 0 : output_addr_const (file, op);
15258 0 : fputs ("@secrel32", file);
15259 0 : break;
15260 : #if TARGET_MACHO
15261 : case UNSPEC_MACHOPIC_OFFSET:
15262 : output_addr_const (file, op);
15263 : putc ('-', file);
15264 : machopic_output_function_base_name (file);
15265 : break;
15266 : #endif
15267 :
15268 : default:
15269 : return false;
15270 : }
15271 :
15272 : return true;
15273 : }
15274 :
15275 :
15276 : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15277 : MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15278 : is the expression of the binary operation. The output may either be
15279 : emitted here, or returned to the caller, like all output_* functions.
15280 :
15281 : There is no guarantee that the operands are the same mode, as they
15282 : might be within FLOAT or FLOAT_EXTEND expressions. */
15283 :
15284 : #ifndef SYSV386_COMPAT
15285 : /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15286 : wants to fix the assemblers because that causes incompatibility
15287 : with gcc. No-one wants to fix gcc because that causes
15288 : incompatibility with assemblers... You can use the option of
15289 : -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15290 : #define SYSV386_COMPAT 1
15291 : #endif
15292 :
15293 : const char *
15294 602255 : output_387_binary_op (rtx_insn *insn, rtx *operands)
15295 : {
15296 602255 : static char buf[40];
15297 602255 : const char *p;
15298 602255 : bool is_sse
15299 602255 : = (SSE_REG_P (operands[0])
15300 657318 : || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
15301 :
15302 55063 : if (is_sse)
15303 : p = "%v";
15304 55063 : else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15305 55056 : || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15306 : p = "fi";
15307 : else
15308 602255 : p = "f";
15309 :
15310 602255 : strcpy (buf, p);
15311 :
15312 602255 : switch (GET_CODE (operands[3]))
15313 : {
15314 : case PLUS:
15315 : p = "add"; break;
15316 : case MINUS:
15317 : p = "sub"; break;
15318 93430 : case MULT:
15319 93430 : p = "mul"; break;
15320 27668 : case DIV:
15321 27668 : p = "div"; break;
15322 0 : default:
15323 0 : gcc_unreachable ();
15324 : }
15325 :
15326 602255 : strcat (buf, p);
15327 :
15328 602255 : if (is_sse)
15329 : {
15330 547192 : p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
15331 547192 : strcat (buf, p);
15332 :
15333 547192 : if (TARGET_AVX)
15334 : p = "\t{%2, %1, %0|%0, %1, %2}";
15335 : else
15336 530714 : p = "\t{%2, %0|%0, %2}";
15337 :
15338 547192 : strcat (buf, p);
15339 547192 : return buf;
15340 : }
15341 :
15342 : /* Even if we do not want to check the inputs, this documents input
15343 : constraints. Which helps in understanding the following code. */
15344 55063 : if (flag_checking)
15345 : {
15346 55062 : if (STACK_REG_P (operands[0])
15347 55062 : && ((REG_P (operands[1])
15348 53483 : && REGNO (operands[0]) == REGNO (operands[1])
15349 49493 : && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15350 5569 : || (REG_P (operands[2])
15351 5569 : && REGNO (operands[0]) == REGNO (operands[2])
15352 5569 : && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15353 110124 : && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15354 : ; /* ok */
15355 : else
15356 0 : gcc_unreachable ();
15357 : }
15358 :
15359 55063 : switch (GET_CODE (operands[3]))
15360 : {
15361 40441 : case MULT:
15362 40441 : case PLUS:
15363 40441 : if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15364 1991 : std::swap (operands[1], operands[2]);
15365 :
15366 : /* know operands[0] == operands[1]. */
15367 :
15368 40441 : if (MEM_P (operands[2]))
15369 : {
15370 : p = "%Z2\t%2";
15371 : break;
15372 : }
15373 :
15374 36078 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15375 : {
15376 21087 : if (STACK_TOP_P (operands[0]))
15377 : /* How is it that we are storing to a dead operand[2]?
15378 : Well, presumably operands[1] is dead too. We can't
15379 : store the result to st(0) as st(0) gets popped on this
15380 : instruction. Instead store to operands[2] (which I
15381 : think has to be st(1)). st(1) will be popped later.
15382 : gcc <= 2.8.1 didn't have this check and generated
15383 : assembly code that the Unixware assembler rejected. */
15384 : p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15385 : else
15386 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15387 : break;
15388 : }
15389 :
15390 14991 : if (STACK_TOP_P (operands[0]))
15391 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15392 : else
15393 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15394 : break;
15395 :
15396 14622 : case MINUS:
15397 14622 : case DIV:
15398 14622 : if (MEM_P (operands[1]))
15399 : {
15400 : p = "r%Z1\t%1";
15401 : break;
15402 : }
15403 :
15404 14189 : if (MEM_P (operands[2]))
15405 : {
15406 : p = "%Z2\t%2";
15407 : break;
15408 : }
15409 :
15410 12683 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15411 : {
15412 : #if SYSV386_COMPAT
15413 : /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15414 : derived assemblers, confusingly reverse the direction of
15415 : the operation for fsub{r} and fdiv{r} when the
15416 : destination register is not st(0). The Intel assembler
15417 : doesn't have this brain damage. Read !SYSV386_COMPAT to
15418 : figure out what the hardware really does. */
15419 6093 : if (STACK_TOP_P (operands[0]))
15420 : p = "{p\t%0, %2|rp\t%2, %0}";
15421 : else
15422 : p = "{rp\t%2, %0|p\t%0, %2}";
15423 : #else
15424 : if (STACK_TOP_P (operands[0]))
15425 : /* As above for fmul/fadd, we can't store to st(0). */
15426 : p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15427 : else
15428 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15429 : #endif
15430 : break;
15431 : }
15432 :
15433 6590 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15434 : {
15435 : #if SYSV386_COMPAT
15436 3075 : if (STACK_TOP_P (operands[0]))
15437 : p = "{rp\t%0, %1|p\t%1, %0}";
15438 : else
15439 : p = "{p\t%1, %0|rp\t%0, %1}";
15440 : #else
15441 : if (STACK_TOP_P (operands[0]))
15442 : p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15443 : else
15444 : p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15445 : #endif
15446 : break;
15447 : }
15448 :
15449 3515 : if (STACK_TOP_P (operands[0]))
15450 : {
15451 2674 : if (STACK_TOP_P (operands[1]))
15452 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15453 : else
15454 : p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15455 : break;
15456 : }
15457 841 : else if (STACK_TOP_P (operands[1]))
15458 : {
15459 : #if SYSV386_COMPAT
15460 : p = "{\t%1, %0|r\t%0, %1}";
15461 : #else
15462 : p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15463 : #endif
15464 : }
15465 : else
15466 : {
15467 : #if SYSV386_COMPAT
15468 : p = "{r\t%2, %0|\t%0, %2}";
15469 : #else
15470 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15471 : #endif
15472 : }
15473 : break;
15474 :
15475 0 : default:
15476 0 : gcc_unreachable ();
15477 : }
15478 :
15479 55063 : strcat (buf, p);
15480 55063 : return buf;
15481 : }
15482 :
15483 : /* Return needed mode for entity in optimize_mode_switching pass. */
15484 :
15485 : static int
15486 1654 : ix86_dirflag_mode_needed (rtx_insn *insn)
15487 : {
15488 1654 : if (CALL_P (insn))
15489 : {
15490 339 : if (cfun->machine->func_type == TYPE_NORMAL)
15491 : return X86_DIRFLAG_ANY;
15492 : else
15493 : /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15494 339 : return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15495 : }
15496 :
15497 1315 : if (recog_memoized (insn) < 0)
15498 : return X86_DIRFLAG_ANY;
15499 :
15500 1313 : if (get_attr_type (insn) == TYPE_STR)
15501 : {
15502 : /* Emit cld instruction if stringops are used in the function. */
15503 1 : if (cfun->machine->func_type == TYPE_NORMAL)
15504 0 : return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15505 : else
15506 : return X86_DIRFLAG_RESET;
15507 : }
15508 :
15509 : return X86_DIRFLAG_ANY;
15510 : }
15511 :
15512 : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15513 :
15514 : static bool
15515 2234671 : ix86_check_avx_upper_register (const_rtx exp)
15516 : {
15517 : /* construct_container may return a parallel with expr_list
15518 : which contains the real reg and mode */
15519 2234671 : subrtx_iterator::array_type array;
15520 8521656 : FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15521 : {
15522 6450525 : const_rtx x = *iter;
15523 2597739 : if (SSE_REG_P (x)
15524 834287 : && !EXT_REX_SSE_REG_P (x)
15525 8106245 : && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15526 163540 : return true;
15527 : }
15528 :
15529 2071131 : return false;
15530 2234671 : }
15531 :
15532 : /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15533 :
15534 : static void
15535 52036 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15536 : {
15537 52036 : if (SSE_REG_P (dest)
15538 12854 : && !EXT_REX_SSE_REG_P (dest)
15539 77744 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15540 : {
15541 759 : bool *used = (bool *) data;
15542 759 : *used = true;
15543 : }
15544 52036 : }
15545 :
15546 : /* Return needed mode for entity in optimize_mode_switching pass. */
15547 :
15548 : static int
15549 2095968 : ix86_avx_u128_mode_needed (rtx_insn *insn)
15550 : {
15551 2095968 : if (DEBUG_INSN_P (insn))
15552 : return AVX_U128_ANY;
15553 :
15554 2095968 : if (CALL_P (insn))
15555 : {
15556 49730 : rtx link;
15557 :
15558 : /* Needed mode is set to AVX_U128_CLEAN if there are
15559 : no 256bit or 512bit modes used in function arguments. */
15560 49730 : for (link = CALL_INSN_FUNCTION_USAGE (insn);
15561 135371 : link;
15562 85641 : link = XEXP (link, 1))
15563 : {
15564 86692 : if (GET_CODE (XEXP (link, 0)) == USE)
15565 : {
15566 85246 : rtx arg = XEXP (XEXP (link, 0), 0);
15567 :
15568 85246 : if (ix86_check_avx_upper_register (arg))
15569 : return AVX_U128_DIRTY;
15570 : }
15571 : }
15572 :
15573 : /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15574 : nor 512bit registers used in the function return register. */
15575 48679 : bool avx_upper_reg_found = false;
15576 48679 : note_stores (insn, ix86_check_avx_upper_stores,
15577 : &avx_upper_reg_found);
15578 48679 : if (avx_upper_reg_found)
15579 : return AVX_U128_DIRTY;
15580 :
15581 : /* If the function is known to preserve some SSE registers,
15582 : RA and previous passes can legitimately rely on that for
15583 : modes wider than 256 bits. It's only safe to issue a
15584 : vzeroupper if all SSE registers are clobbered. */
15585 48495 : const function_abi &abi = insn_callee_abi (insn);
15586 48495 : if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15587 : /* Should be safe to issue an vzeroupper before sibling_call_p.
15588 : Also there not mode_exit for sibling_call, so there could be
15589 : missing vzeroupper for that. */
15590 48495 : || !(SIBLING_CALL_P (insn)
15591 47211 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15592 47211 : abi.mode_clobbers (V4DImode))))
15593 8441 : return AVX_U128_ANY;
15594 :
15595 40054 : return AVX_U128_CLEAN;
15596 : }
15597 :
15598 2046238 : rtx set = single_set (insn);
15599 2046238 : if (set)
15600 : {
15601 1973829 : rtx dest = SET_DEST (set);
15602 1973829 : rtx src = SET_SRC (set);
15603 1477923 : if (SSE_REG_P (dest)
15604 557806 : && !EXT_REX_SSE_REG_P (dest)
15605 3077305 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15606 : {
15607 : /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15608 : source isn't zero. */
15609 176234 : if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15610 : return AVX_U128_DIRTY;
15611 : else
15612 : return AVX_U128_ANY;
15613 : }
15614 : else
15615 : {
15616 1797595 : if (ix86_check_avx_upper_register (src))
15617 : return AVX_U128_DIRTY;
15618 : }
15619 :
15620 : /* This isn't YMM/ZMM load/store. */
15621 : return AVX_U128_ANY;
15622 : }
15623 :
15624 : /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15625 : Hardware changes state only when a 256bit register is written to,
15626 : but we need to prevent the compiler from moving optimal insertion
15627 : point above eventual read from 256bit or 512 bit register. */
15628 72409 : if (ix86_check_avx_upper_register (PATTERN (insn)))
15629 : return AVX_U128_DIRTY;
15630 :
15631 : return AVX_U128_ANY;
15632 : }
15633 :
15634 : /* Return mode that i387 must be switched into
15635 : prior to the execution of insn. */
15636 :
15637 : static int
15638 412857 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
15639 : {
15640 412857 : enum attr_i387_cw mode;
15641 :
15642 : /* The mode UNINITIALIZED is used to store control word after a
15643 : function call or ASM pattern. The mode ANY specify that function
15644 : has no requirements on the control word and make no changes in the
15645 : bits we are interested in. */
15646 :
15647 412857 : if (CALL_P (insn)
15648 412857 : || (NONJUMP_INSN_P (insn)
15649 337983 : && (asm_noperands (PATTERN (insn)) >= 0
15650 337930 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15651 14354 : return I387_CW_UNINITIALIZED;
15652 :
15653 398503 : if (recog_memoized (insn) < 0)
15654 : return I387_CW_ANY;
15655 :
15656 397565 : mode = get_attr_i387_cw (insn);
15657 :
15658 397565 : switch (entity)
15659 : {
15660 0 : case I387_ROUNDEVEN:
15661 0 : if (mode == I387_CW_ROUNDEVEN)
15662 : return mode;
15663 : break;
15664 :
15665 391853 : case I387_TRUNC:
15666 391853 : if (mode == I387_CW_TRUNC)
15667 : return mode;
15668 : break;
15669 :
15670 4378 : case I387_FLOOR:
15671 4378 : if (mode == I387_CW_FLOOR)
15672 : return mode;
15673 : break;
15674 :
15675 1334 : case I387_CEIL:
15676 1334 : if (mode == I387_CW_CEIL)
15677 : return mode;
15678 : break;
15679 :
15680 0 : default:
15681 0 : gcc_unreachable ();
15682 : }
15683 :
15684 : return I387_CW_ANY;
15685 : }
15686 :
15687 : /* Return mode that entity must be switched into
15688 : prior to the execution of insn. */
15689 :
15690 : static int
15691 2510479 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15692 : {
15693 2510479 : switch (entity)
15694 : {
15695 1654 : case X86_DIRFLAG:
15696 1654 : return ix86_dirflag_mode_needed (insn);
15697 2095968 : case AVX_U128:
15698 2095968 : return ix86_avx_u128_mode_needed (insn);
15699 412857 : case I387_ROUNDEVEN:
15700 412857 : case I387_TRUNC:
15701 412857 : case I387_FLOOR:
15702 412857 : case I387_CEIL:
15703 412857 : return ix86_i387_mode_needed (entity, insn);
15704 0 : default:
15705 0 : gcc_unreachable ();
15706 : }
15707 : return 0;
15708 : }
15709 :
15710 : /* Calculate mode of upper 128bit AVX registers after the insn. */
15711 :
15712 : static int
15713 2095968 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15714 : {
15715 2095968 : rtx pat = PATTERN (insn);
15716 :
15717 2095968 : if (vzeroupper_pattern (pat, VOIDmode)
15718 2095968 : || vzeroall_pattern (pat, VOIDmode))
15719 184 : return AVX_U128_CLEAN;
15720 :
15721 : /* We know that state is clean after CALL insn if there are no
15722 : 256bit or 512bit registers used in the function return register. */
15723 2095784 : if (CALL_P (insn))
15724 : {
15725 49684 : bool avx_upper_reg_found = false;
15726 49684 : note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15727 :
15728 49684 : if (avx_upper_reg_found)
15729 : return AVX_U128_DIRTY;
15730 :
15731 : /* If the function desn't clobber any sse registers or only clobber
15732 : 128-bit part, Then vzeroupper isn't issued before the function exit.
15733 : the status not CLEAN but ANY after the function. */
15734 49109 : const function_abi &abi = insn_callee_abi (insn);
15735 49109 : if (!(SIBLING_CALL_P (insn)
15736 47830 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15737 47830 : abi.mode_clobbers (V4DImode))))
15738 8737 : return AVX_U128_ANY;
15739 :
15740 40372 : return AVX_U128_CLEAN;
15741 : }
15742 :
15743 : /* Otherwise, return current mode. Remember that if insn
15744 : references AVX 256bit or 512bit registers, the mode was already
15745 : changed to DIRTY from MODE_NEEDED. */
15746 : return mode;
15747 : }
15748 :
15749 : /* Return the mode that an insn results in. */
15750 :
15751 : static int
15752 2509638 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15753 : {
15754 2509638 : switch (entity)
15755 : {
15756 : case X86_DIRFLAG:
15757 : return mode;
15758 2095968 : case AVX_U128:
15759 2095968 : return ix86_avx_u128_mode_after (mode, insn);
15760 : case I387_ROUNDEVEN:
15761 : case I387_TRUNC:
15762 : case I387_FLOOR:
15763 : case I387_CEIL:
15764 : return mode;
15765 0 : default:
15766 0 : gcc_unreachable ();
15767 : }
15768 : }
15769 :
15770 : static int
15771 120 : ix86_dirflag_mode_entry (void)
15772 : {
15773 : /* For TARGET_CLD or in the interrupt handler we can't assume
15774 : direction flag state at function entry. */
15775 120 : if (TARGET_CLD
15776 118 : || cfun->machine->func_type != TYPE_NORMAL)
15777 120 : return X86_DIRFLAG_ANY;
15778 :
15779 : return X86_DIRFLAG_RESET;
15780 : }
15781 :
15782 : static int
15783 122971 : ix86_avx_u128_mode_entry (void)
15784 : {
15785 122971 : tree arg;
15786 :
15787 : /* Entry mode is set to AVX_U128_DIRTY if there are
15788 : 256bit or 512bit modes used in function arguments. */
15789 310402 : for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15790 187431 : arg = TREE_CHAIN (arg))
15791 : {
15792 221404 : rtx incoming = DECL_INCOMING_RTL (arg);
15793 :
15794 221404 : if (incoming && ix86_check_avx_upper_register (incoming))
15795 : return AVX_U128_DIRTY;
15796 : }
15797 :
15798 : return AVX_U128_CLEAN;
15799 : }
15800 :
15801 : /* Return a mode that ENTITY is assumed to be
15802 : switched to at function entry. */
15803 :
15804 : static int
15805 75836 : ix86_mode_entry (int entity)
15806 : {
15807 75836 : switch (entity)
15808 : {
15809 120 : case X86_DIRFLAG:
15810 120 : return ix86_dirflag_mode_entry ();
15811 74567 : case AVX_U128:
15812 74567 : return ix86_avx_u128_mode_entry ();
15813 : case I387_ROUNDEVEN:
15814 : case I387_TRUNC:
15815 : case I387_FLOOR:
15816 : case I387_CEIL:
15817 : return I387_CW_ANY;
15818 0 : default:
15819 0 : gcc_unreachable ();
15820 : }
15821 : }
15822 :
15823 : static int
15824 73318 : ix86_avx_u128_mode_exit (void)
15825 : {
15826 73318 : rtx reg = crtl->return_rtx;
15827 :
15828 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15829 : or 512 bit modes used in the function return register. */
15830 73318 : if (reg && ix86_check_avx_upper_register (reg))
15831 : return AVX_U128_DIRTY;
15832 :
15833 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15834 : modes used in function arguments, otherwise return AVX_U128_CLEAN.
15835 : */
15836 48404 : return ix86_avx_u128_mode_entry ();
15837 : }
15838 :
15839 : /* Return a mode that ENTITY is assumed to be
15840 : switched to at function exit. */
15841 :
15842 : static int
15843 74442 : ix86_mode_exit (int entity)
15844 : {
15845 74442 : switch (entity)
15846 : {
15847 : case X86_DIRFLAG:
15848 : return X86_DIRFLAG_ANY;
15849 73318 : case AVX_U128:
15850 73318 : return ix86_avx_u128_mode_exit ();
15851 1090 : case I387_ROUNDEVEN:
15852 1090 : case I387_TRUNC:
15853 1090 : case I387_FLOOR:
15854 1090 : case I387_CEIL:
15855 1090 : return I387_CW_ANY;
15856 0 : default:
15857 0 : gcc_unreachable ();
15858 : }
15859 : }
15860 :
15861 : static int
15862 2179750 : ix86_mode_priority (int, int n)
15863 : {
15864 2179750 : return n;
15865 : }
15866 :
15867 : /* Output code to initialize control word copies used by trunc?f?i and
15868 : rounding patterns. CURRENT_MODE is set to current control word,
15869 : while NEW_MODE is set to new control word. */
15870 :
15871 : static void
15872 3296 : emit_i387_cw_initialization (int mode)
15873 : {
15874 3296 : rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15875 3296 : rtx new_mode;
15876 :
15877 3296 : enum ix86_stack_slot slot;
15878 :
15879 3296 : rtx reg = gen_reg_rtx (HImode);
15880 :
15881 3296 : emit_insn (gen_x86_fnstcw_1 (stored_mode));
15882 3296 : emit_move_insn (reg, copy_rtx (stored_mode));
15883 :
15884 3296 : switch (mode)
15885 : {
15886 0 : case I387_CW_ROUNDEVEN:
15887 : /* round to nearest */
15888 0 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15889 0 : slot = SLOT_CW_ROUNDEVEN;
15890 0 : break;
15891 :
15892 3076 : case I387_CW_TRUNC:
15893 : /* round toward zero (truncate) */
15894 3076 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15895 3076 : slot = SLOT_CW_TRUNC;
15896 3076 : break;
15897 :
15898 153 : case I387_CW_FLOOR:
15899 : /* round down toward -oo */
15900 153 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15901 153 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15902 153 : slot = SLOT_CW_FLOOR;
15903 153 : break;
15904 :
15905 67 : case I387_CW_CEIL:
15906 : /* round up toward +oo */
15907 67 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15908 67 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15909 67 : slot = SLOT_CW_CEIL;
15910 67 : break;
15911 :
15912 0 : default:
15913 0 : gcc_unreachable ();
15914 : }
15915 :
15916 3296 : gcc_assert (slot < MAX_386_STACK_LOCALS);
15917 :
15918 3296 : new_mode = assign_386_stack_local (HImode, slot);
15919 3296 : emit_move_insn (new_mode, reg);
15920 3296 : }
15921 :
15922 : /* Generate one or more insns to set ENTITY to MODE. */
15923 :
15924 : static void
15925 52611 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15926 : HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15927 : {
15928 52611 : switch (entity)
15929 : {
15930 265 : case X86_DIRFLAG:
15931 265 : if (mode == X86_DIRFLAG_RESET)
15932 265 : emit_insn (gen_cld ());
15933 : break;
15934 44231 : case AVX_U128:
15935 44231 : if (mode == AVX_U128_CLEAN)
15936 22572 : ix86_expand_avx_vzeroupper ();
15937 : break;
15938 8115 : case I387_ROUNDEVEN:
15939 8115 : case I387_TRUNC:
15940 8115 : case I387_FLOOR:
15941 8115 : case I387_CEIL:
15942 8115 : if (mode != I387_CW_ANY
15943 8115 : && mode != I387_CW_UNINITIALIZED)
15944 3296 : emit_i387_cw_initialization (mode);
15945 : break;
15946 0 : default:
15947 0 : gcc_unreachable ();
15948 : }
15949 52611 : }
15950 :
15951 : /* Output code for INSN to convert a float to a signed int. OPERANDS
15952 : are the insn operands. The output may be [HSD]Imode and the input
15953 : operand may be [SDX]Fmode. */
15954 :
15955 : const char *
15956 7425 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15957 : {
15958 7425 : bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15959 7425 : bool dimode_p = GET_MODE (operands[0]) == DImode;
15960 7425 : int round_mode = get_attr_i387_cw (insn);
15961 :
15962 7425 : static char buf[40];
15963 7425 : const char *p;
15964 :
15965 : /* Jump through a hoop or two for DImode, since the hardware has no
15966 : non-popping instruction. We used to do this a different way, but
15967 : that was somewhat fragile and broke with post-reload splitters. */
15968 7425 : if ((dimode_p || fisttp) && !stack_top_dies)
15969 25 : output_asm_insn ("fld\t%y1", operands);
15970 :
15971 7425 : gcc_assert (STACK_TOP_P (operands[1]));
15972 7425 : gcc_assert (MEM_P (operands[0]));
15973 7425 : gcc_assert (GET_MODE (operands[1]) != TFmode);
15974 :
15975 7425 : if (fisttp)
15976 : return "fisttp%Z0\t%0";
15977 :
15978 7424 : strcpy (buf, "fist");
15979 :
15980 7424 : if (round_mode != I387_CW_ANY)
15981 7376 : output_asm_insn ("fldcw\t%3", operands);
15982 :
15983 7424 : p = "p%Z0\t%0";
15984 7424 : strcat (buf, p + !(stack_top_dies || dimode_p));
15985 :
15986 7424 : output_asm_insn (buf, operands);
15987 :
15988 7424 : if (round_mode != I387_CW_ANY)
15989 7376 : output_asm_insn ("fldcw\t%2", operands);
15990 :
15991 : return "";
15992 : }
15993 :
15994 : /* Output code for x87 ffreep insn. The OPNO argument, which may only
15995 : have the values zero or one, indicates the ffreep insn's operand
15996 : from the OPERANDS array. */
15997 :
15998 : static const char *
15999 275937 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16000 : {
16001 0 : if (TARGET_USE_FFREEP)
16002 : #ifdef HAVE_AS_IX86_FFREEP
16003 0 : return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16004 : #else
16005 : {
16006 : static char retval[32];
16007 : int regno = REGNO (operands[opno]);
16008 :
16009 : gcc_assert (STACK_REGNO_P (regno));
16010 :
16011 : regno -= FIRST_STACK_REG;
16012 :
16013 : snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16014 : return retval;
16015 : }
16016 : #endif
16017 :
16018 0 : return opno ? "fstp\t%y1" : "fstp\t%y0";
16019 : }
16020 :
16021 :
16022 : /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16023 : should be used. UNORDERED_P is true when fucom should be used. */
16024 :
16025 : const char *
16026 107426 : output_fp_compare (rtx_insn *insn, rtx *operands,
16027 : bool eflags_p, bool unordered_p)
16028 : {
16029 107426 : rtx *xops = eflags_p ? &operands[0] : &operands[1];
16030 107426 : bool stack_top_dies;
16031 :
16032 107426 : static char buf[40];
16033 107426 : const char *p;
16034 :
16035 107426 : gcc_assert (STACK_TOP_P (xops[0]));
16036 :
16037 107426 : stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
16038 :
16039 107426 : if (eflags_p)
16040 : {
16041 107426 : p = unordered_p ? "fucomi" : "fcomi";
16042 107426 : strcpy (buf, p);
16043 :
16044 107426 : p = "p\t{%y1, %0|%0, %y1}";
16045 107426 : strcat (buf, p + !stack_top_dies);
16046 :
16047 107426 : return buf;
16048 : }
16049 :
16050 0 : if (STACK_REG_P (xops[1])
16051 0 : && stack_top_dies
16052 0 : && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
16053 : {
16054 0 : gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
16055 :
16056 : /* If both the top of the 387 stack die, and the other operand
16057 : is also a stack register that dies, then this must be a
16058 : `fcompp' float compare. */
16059 0 : p = unordered_p ? "fucompp" : "fcompp";
16060 0 : strcpy (buf, p);
16061 : }
16062 0 : else if (const0_operand (xops[1], VOIDmode))
16063 : {
16064 0 : gcc_assert (!unordered_p);
16065 0 : strcpy (buf, "ftst");
16066 : }
16067 : else
16068 : {
16069 0 : if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
16070 : {
16071 0 : gcc_assert (!unordered_p);
16072 : p = "ficom";
16073 : }
16074 : else
16075 0 : p = unordered_p ? "fucom" : "fcom";
16076 :
16077 0 : strcpy (buf, p);
16078 :
16079 0 : p = "p%Z2\t%y2";
16080 0 : strcat (buf, p + !stack_top_dies);
16081 : }
16082 :
16083 0 : output_asm_insn (buf, operands);
16084 0 : return "fnstsw\t%0";
16085 : }
16086 :
16087 : void
16088 112773 : ix86_output_addr_vec_elt (FILE *file, int value)
16089 : {
16090 112773 : const char *directive = ASM_LONG;
16091 :
16092 : #ifdef ASM_QUAD
16093 112773 : if (TARGET_LP64)
16094 101134 : directive = ASM_QUAD;
16095 : #else
16096 : gcc_assert (!TARGET_64BIT);
16097 : #endif
16098 :
16099 112773 : fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16100 112773 : }
16101 :
16102 : void
16103 25756 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16104 : {
16105 25756 : const char *directive = ASM_LONG;
16106 :
16107 : #ifdef ASM_QUAD
16108 38554 : if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16109 : directive = ASM_QUAD;
16110 : #else
16111 : gcc_assert (!TARGET_64BIT);
16112 : #endif
16113 : /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16114 25756 : if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
16115 12798 : fprintf (file, "%s%s%d-%s%d\n",
16116 : directive, LPREFIX, value, LPREFIX, rel);
16117 : #if TARGET_MACHO
16118 : else if (TARGET_MACHO)
16119 : {
16120 : fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16121 : machopic_output_function_base_name (file);
16122 : putc ('\n', file);
16123 : }
16124 : #endif
16125 12958 : else if (HAVE_AS_GOTOFF_IN_DATA)
16126 12958 : fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16127 : else
16128 : asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16129 : GOT_SYMBOL_NAME, LPREFIX, value);
16130 25756 : }
16131 :
16132 : #define LEA_MAX_STALL (3)
16133 : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16134 :
16135 : /* Increase given DISTANCE in half-cycles according to
16136 : dependencies between PREV and NEXT instructions.
16137 : Add 1 half-cycle if there is no dependency and
16138 : go to next cycle if there is some dependecy. */
16139 :
16140 : static unsigned int
16141 2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
16142 : {
16143 2129 : df_ref def, use;
16144 :
16145 2129 : if (!prev || !next)
16146 748 : return distance + (distance & 1) + 2;
16147 :
16148 1381 : if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16149 226 : return distance + 1;
16150 :
16151 1920 : FOR_EACH_INSN_USE (use, next)
16152 2448 : FOR_EACH_INSN_DEF (def, prev)
16153 1683 : if (!DF_REF_IS_ARTIFICIAL (def)
16154 1683 : && DF_REF_REGNO (use) == DF_REF_REGNO (def))
16155 735 : return distance + (distance & 1) + 2;
16156 :
16157 420 : return distance + 1;
16158 : }
16159 :
16160 : /* Function checks if instruction INSN defines register number
16161 : REGNO1 or REGNO2. */
16162 :
16163 : bool
16164 2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
16165 : rtx_insn *insn)
16166 : {
16167 2073 : df_ref def;
16168 :
16169 3739 : FOR_EACH_INSN_DEF (def, insn)
16170 2070 : if (DF_REF_REG_DEF_P (def)
16171 2070 : && !DF_REF_IS_ARTIFICIAL (def)
16172 2070 : && (regno1 == DF_REF_REGNO (def)
16173 1682 : || regno2 == DF_REF_REGNO (def)))
16174 : return true;
16175 :
16176 : return false;
16177 : }
16178 :
16179 : /* Function checks if instruction INSN uses register number
16180 : REGNO as a part of address expression. */
16181 :
16182 : static bool
16183 1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
16184 : {
16185 1182 : df_ref use;
16186 :
16187 2475 : FOR_EACH_INSN_USE (use, insn)
16188 1384 : if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
16189 : return true;
16190 :
16191 : return false;
16192 : }
16193 :
16194 : /* Search backward for non-agu definition of register number REGNO1
16195 : or register number REGNO2 in basic block starting from instruction
16196 : START up to head of basic block or instruction INSN.
16197 :
16198 : Function puts true value into *FOUND var if definition was found
16199 : and false otherwise.
16200 :
16201 : Distance in half-cycles between START and found instruction or head
16202 : of BB is added to DISTANCE and returned. */
16203 :
16204 : static int
16205 624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16206 : rtx_insn *insn, int distance,
16207 : rtx_insn *start, bool *found)
16208 : {
16209 624 : basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16210 624 : rtx_insn *prev = start;
16211 624 : rtx_insn *next = NULL;
16212 :
16213 624 : *found = false;
16214 :
16215 624 : while (prev
16216 1861 : && prev != insn
16217 1861 : && distance < LEA_SEARCH_THRESHOLD)
16218 : {
16219 1660 : if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16220 : {
16221 947 : distance = increase_distance (prev, next, distance);
16222 947 : if (insn_defines_reg (regno1, regno2, prev))
16223 : {
16224 243 : if (recog_memoized (prev) < 0
16225 243 : || get_attr_type (prev) != TYPE_LEA)
16226 : {
16227 200 : *found = true;
16228 200 : return distance;
16229 : }
16230 : }
16231 :
16232 : next = prev;
16233 : }
16234 1460 : if (prev == BB_HEAD (bb))
16235 : break;
16236 :
16237 1237 : prev = PREV_INSN (prev);
16238 : }
16239 :
16240 : return distance;
16241 : }
16242 :
16243 : /* Search backward for non-agu definition of register number REGNO1
16244 : or register number REGNO2 in INSN's basic block until
16245 : 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16246 : 2. Reach neighbor BBs boundary, or
16247 : 3. Reach agu definition.
16248 : Returns the distance between the non-agu definition point and INSN.
16249 : If no definition point, returns -1. */
16250 :
16251 : static int
16252 429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16253 : rtx_insn *insn)
16254 : {
16255 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16256 429 : int distance = 0;
16257 429 : bool found = false;
16258 :
16259 429 : if (insn != BB_HEAD (bb))
16260 429 : distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16261 : distance, PREV_INSN (insn),
16262 : &found);
16263 :
16264 429 : if (!found && distance < LEA_SEARCH_THRESHOLD)
16265 : {
16266 167 : edge e;
16267 167 : edge_iterator ei;
16268 167 : bool simple_loop = false;
16269 :
16270 336 : FOR_EACH_EDGE (e, ei, bb->preds)
16271 206 : if (e->src == bb)
16272 : {
16273 : simple_loop = true;
16274 : break;
16275 : }
16276 :
16277 167 : if (simple_loop)
16278 37 : distance = distance_non_agu_define_in_bb (regno1, regno2,
16279 : insn, distance,
16280 37 : BB_END (bb), &found);
16281 : else
16282 : {
16283 130 : int shortest_dist = -1;
16284 130 : bool found_in_bb = false;
16285 :
16286 288 : FOR_EACH_EDGE (e, ei, bb->preds)
16287 : {
16288 158 : int bb_dist
16289 316 : = distance_non_agu_define_in_bb (regno1, regno2,
16290 : insn, distance,
16291 158 : BB_END (e->src),
16292 : &found_in_bb);
16293 158 : if (found_in_bb)
16294 : {
16295 24 : if (shortest_dist < 0)
16296 : shortest_dist = bb_dist;
16297 0 : else if (bb_dist > 0)
16298 0 : shortest_dist = MIN (bb_dist, shortest_dist);
16299 :
16300 24 : found = true;
16301 : }
16302 : }
16303 :
16304 130 : distance = shortest_dist;
16305 : }
16306 : }
16307 :
16308 429 : if (!found)
16309 : return -1;
16310 :
16311 200 : return distance >> 1;
16312 : }
16313 :
16314 : /* Return the distance in half-cycles between INSN and the next
16315 : insn that uses register number REGNO in memory address added
16316 : to DISTANCE. Return -1 if REGNO0 is set.
16317 :
16318 : Put true value into *FOUND if register usage was found and
16319 : false otherwise.
16320 : Put true value into *REDEFINED if register redefinition was
16321 : found and false otherwise. */
16322 :
16323 : static int
16324 767 : distance_agu_use_in_bb (unsigned int regno,
16325 : rtx_insn *insn, int distance, rtx_insn *start,
16326 : bool *found, bool *redefined)
16327 : {
16328 767 : basic_block bb = NULL;
16329 767 : rtx_insn *next = start;
16330 767 : rtx_insn *prev = NULL;
16331 :
16332 767 : *found = false;
16333 767 : *redefined = false;
16334 :
16335 767 : if (start != NULL_RTX)
16336 : {
16337 750 : bb = BLOCK_FOR_INSN (start);
16338 750 : if (start != BB_HEAD (bb))
16339 : /* If insn and start belong to the same bb, set prev to insn,
16340 : so the call to increase_distance will increase the distance
16341 : between insns by 1. */
16342 412 : prev = insn;
16343 : }
16344 :
16345 2566 : while (next
16346 2566 : && next != insn
16347 2566 : && distance < LEA_SEARCH_THRESHOLD)
16348 : {
16349 2378 : if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16350 : {
16351 1182 : distance = increase_distance(prev, next, distance);
16352 1182 : if (insn_uses_reg_mem (regno, next))
16353 : {
16354 : /* Return DISTANCE if OP0 is used in memory
16355 : address in NEXT. */
16356 91 : *found = true;
16357 91 : return distance;
16358 : }
16359 :
16360 1091 : if (insn_defines_reg (regno, INVALID_REGNUM, next))
16361 : {
16362 : /* Return -1 if OP0 is set in NEXT. */
16363 156 : *redefined = true;
16364 156 : return -1;
16365 : }
16366 :
16367 : prev = next;
16368 : }
16369 :
16370 2131 : if (next == BB_END (bb))
16371 : break;
16372 :
16373 1799 : next = NEXT_INSN (next);
16374 : }
16375 :
16376 : return distance;
16377 : }
16378 :
16379 : /* Return the distance between INSN and the next insn that uses
16380 : register number REGNO0 in memory address. Return -1 if no such
16381 : a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16382 :
16383 : static int
16384 429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
16385 : {
16386 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16387 429 : int distance = 0;
16388 429 : bool found = false;
16389 429 : bool redefined = false;
16390 :
16391 429 : if (insn != BB_END (bb))
16392 412 : distance = distance_agu_use_in_bb (regno0, insn, distance,
16393 : NEXT_INSN (insn),
16394 : &found, &redefined);
16395 :
16396 429 : if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16397 : {
16398 250 : edge e;
16399 250 : edge_iterator ei;
16400 250 : bool simple_loop = false;
16401 :
16402 535 : FOR_EACH_EDGE (e, ei, bb->succs)
16403 355 : if (e->dest == bb)
16404 : {
16405 : simple_loop = true;
16406 : break;
16407 : }
16408 :
16409 250 : if (simple_loop)
16410 70 : distance = distance_agu_use_in_bb (regno0, insn,
16411 : distance, BB_HEAD (bb),
16412 : &found, &redefined);
16413 : else
16414 : {
16415 180 : int shortest_dist = -1;
16416 180 : bool found_in_bb = false;
16417 180 : bool redefined_in_bb = false;
16418 :
16419 465 : FOR_EACH_EDGE (e, ei, bb->succs)
16420 : {
16421 285 : int bb_dist
16422 570 : = distance_agu_use_in_bb (regno0, insn,
16423 285 : distance, BB_HEAD (e->dest),
16424 : &found_in_bb, &redefined_in_bb);
16425 285 : if (found_in_bb)
16426 : {
16427 17 : if (shortest_dist < 0)
16428 : shortest_dist = bb_dist;
16429 2 : else if (bb_dist > 0)
16430 2 : shortest_dist = MIN (bb_dist, shortest_dist);
16431 :
16432 17 : found = true;
16433 : }
16434 : }
16435 :
16436 180 : distance = shortest_dist;
16437 : }
16438 : }
16439 :
16440 429 : if (!found || redefined)
16441 : return -1;
16442 :
16443 89 : return distance >> 1;
16444 : }
16445 :
16446 : /* Define this macro to tune LEA priority vs ADD, it take effect when
16447 : there is a dilemma of choosing LEA or ADD
16448 : Negative value: ADD is more preferred than LEA
16449 : Zero: Neutral
16450 : Positive value: LEA is more preferred than ADD. */
16451 : #define IX86_LEA_PRIORITY 0
16452 :
16453 : /* Return true if usage of lea INSN has performance advantage
16454 : over a sequence of instructions. Instructions sequence has
16455 : SPLIT_COST cycles higher latency than lea latency. */
16456 :
16457 : static bool
16458 1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16459 : unsigned int regno2, int split_cost, bool has_scale)
16460 : {
16461 1629 : int dist_define, dist_use;
16462 :
16463 : /* For Atom processors newer than Bonnell, if using a 2-source or
16464 : 3-source LEA for non-destructive destination purposes, or due to
16465 : wanting ability to use SCALE, the use of LEA is justified. */
16466 1629 : if (!TARGET_CPU_P (BONNELL))
16467 : {
16468 1200 : if (has_scale)
16469 : return true;
16470 1181 : if (split_cost < 1)
16471 : return false;
16472 406 : if (regno0 == regno1 || regno0 == regno2)
16473 : return false;
16474 : return true;
16475 : }
16476 :
16477 : /* Remember recog_data content. */
16478 429 : struct recog_data_d recog_data_save = recog_data;
16479 :
16480 429 : dist_define = distance_non_agu_define (regno1, regno2, insn);
16481 429 : dist_use = distance_agu_use (regno0, insn);
16482 :
16483 : /* distance_non_agu_define can call get_attr_type which can call
16484 : recog_memoized, restore recog_data back to previous content. */
16485 429 : recog_data = recog_data_save;
16486 :
16487 429 : if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16488 : {
16489 : /* If there is no non AGU operand definition, no AGU
16490 : operand usage and split cost is 0 then both lea
16491 : and non lea variants have same priority. Currently
16492 : we prefer lea for 64 bit code and non lea on 32 bit
16493 : code. */
16494 232 : if (dist_use < 0 && split_cost == 0)
16495 98 : return TARGET_64BIT || IX86_LEA_PRIORITY;
16496 : else
16497 : return true;
16498 : }
16499 :
16500 : /* With longer definitions distance lea is more preferable.
16501 : Here we change it to take into account splitting cost and
16502 : lea priority. */
16503 197 : dist_define += split_cost + IX86_LEA_PRIORITY;
16504 :
16505 : /* If there is no use in memory addess then we just check
16506 : that split cost exceeds AGU stall. */
16507 197 : if (dist_use < 0)
16508 193 : return dist_define > LEA_MAX_STALL;
16509 :
16510 : /* If this insn has both backward non-agu dependence and forward
16511 : agu dependence, the one with short distance takes effect. */
16512 4 : return dist_define >= dist_use;
16513 : }
16514 :
16515 : /* Return true if we need to split op0 = op1 + op2 into a sequence of
16516 : move and add to avoid AGU stalls. */
16517 :
16518 : bool
16519 9115323 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16520 : {
16521 9115323 : unsigned int regno0, regno1, regno2;
16522 :
16523 : /* Check if we need to optimize. */
16524 9115323 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16525 9114508 : return false;
16526 :
16527 815 : regno0 = true_regnum (operands[0]);
16528 815 : regno1 = true_regnum (operands[1]);
16529 815 : regno2 = true_regnum (operands[2]);
16530 :
16531 : /* We need to split only adds with non destructive
16532 : destination operand. */
16533 815 : if (regno0 == regno1 || regno0 == regno2)
16534 : return false;
16535 : else
16536 245 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
16537 : }
16538 :
16539 : /* Return true if we should emit lea instruction instead of mov
16540 : instruction. */
16541 :
16542 : bool
16543 29543582 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16544 : {
16545 29543582 : unsigned int regno0, regno1;
16546 :
16547 : /* Check if we need to optimize. */
16548 29543582 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16549 29541296 : return false;
16550 :
16551 : /* Use lea for reg to reg moves only. */
16552 2286 : if (!REG_P (operands[0]) || !REG_P (operands[1]))
16553 : return false;
16554 :
16555 464 : regno0 = true_regnum (operands[0]);
16556 464 : regno1 = true_regnum (operands[1]);
16557 :
16558 464 : return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
16559 : }
16560 :
16561 : /* Return true if we need to split lea into a sequence of
16562 : instructions to avoid AGU stalls during peephole2. */
16563 :
16564 : bool
16565 11162102 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16566 : {
16567 11162102 : unsigned int regno0, regno1, regno2;
16568 11162102 : int split_cost;
16569 11162102 : struct ix86_address parts;
16570 11162102 : int ok;
16571 :
16572 : /* The "at least two components" test below might not catch simple
16573 : move or zero extension insns if parts.base is non-NULL and parts.disp
16574 : is const0_rtx as the only components in the address, e.g. if the
16575 : register is %rbp or %r13. As this test is much cheaper and moves or
16576 : zero extensions are the common case, do this check first. */
16577 11162102 : if (REG_P (operands[1])
16578 11162102 : || (SImode_address_operand (operands[1], VOIDmode)
16579 152059 : && REG_P (XEXP (operands[1], 0))))
16580 4095309 : return false;
16581 :
16582 7066793 : ok = ix86_decompose_address (operands[1], &parts);
16583 7066793 : gcc_assert (ok);
16584 :
16585 : /* There should be at least two components in the address. */
16586 7066793 : if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16587 7066793 : + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16588 : return false;
16589 :
16590 : /* We should not split into add if non legitimate pic
16591 : operand is used as displacement. */
16592 2689633 : if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16593 : return false;
16594 :
16595 2639534 : regno0 = true_regnum (operands[0]) ;
16596 2639534 : regno1 = INVALID_REGNUM;
16597 2639534 : regno2 = INVALID_REGNUM;
16598 :
16599 2639534 : if (parts.base)
16600 2565415 : regno1 = true_regnum (parts.base);
16601 2639534 : if (parts.index)
16602 487539 : regno2 = true_regnum (parts.index);
16603 :
16604 : /* Use add for a = a + b and a = b + a since it is faster and shorter
16605 : than lea for most processors. For the processors like BONNELL, if
16606 : the destination register of LEA holds an actual address which will
16607 : be used soon, LEA is better and otherwise ADD is better. */
16608 2639534 : if (!TARGET_CPU_P (BONNELL)
16609 2639405 : && parts.scale == 1
16610 2397482 : && (!parts.disp || parts.disp == const0_rtx)
16611 179822 : && (regno0 == regno1 || regno0 == regno2))
16612 : return true;
16613 :
16614 : /* Split with -Oz if the encoding requires fewer bytes. */
16615 2633551 : if (optimize_size > 1
16616 27 : && parts.scale > 1
16617 4 : && !parts.base
16618 4 : && (!parts.disp || parts.disp == const0_rtx))
16619 : return true;
16620 :
16621 : /* Check we need to optimize. */
16622 2633547 : if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16623 2633206 : return false;
16624 :
16625 341 : split_cost = 0;
16626 :
16627 : /* Compute how many cycles we will add to execution time
16628 : if split lea into a sequence of instructions. */
16629 341 : if (parts.base || parts.index)
16630 : {
16631 : /* Have to use mov instruction if non desctructive
16632 : destination form is used. */
16633 341 : if (regno1 != regno0 && regno2 != regno0)
16634 266 : split_cost += 1;
16635 :
16636 : /* Have to add index to base if both exist. */
16637 341 : if (parts.base && parts.index)
16638 54 : split_cost += 1;
16639 :
16640 : /* Have to use shift and adds if scale is 2 or greater. */
16641 341 : if (parts.scale > 1)
16642 : {
16643 29 : if (regno0 != regno1)
16644 23 : split_cost += 1;
16645 6 : else if (regno2 == regno0)
16646 0 : split_cost += 4;
16647 : else
16648 6 : split_cost += parts.scale;
16649 : }
16650 :
16651 : /* Have to use add instruction with immediate if
16652 : disp is non zero. */
16653 341 : if (parts.disp && parts.disp != const0_rtx)
16654 280 : split_cost += 1;
16655 :
16656 : /* Subtract the price of lea. */
16657 341 : split_cost -= 1;
16658 : }
16659 :
16660 341 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16661 341 : parts.scale > 1);
16662 : }
16663 :
16664 : /* Return true if it is ok to optimize an ADD operation to LEA
16665 : operation to avoid flag register consumation. For most processors,
16666 : ADD is faster than LEA. For the processors like BONNELL, if the
16667 : destination register of LEA holds an actual address which will be
16668 : used soon, LEA is better and otherwise ADD is better. */
16669 :
16670 : bool
16671 9172942 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16672 : {
16673 9172942 : unsigned int regno0 = true_regnum (operands[0]);
16674 9172942 : unsigned int regno1 = true_regnum (operands[1]);
16675 9172942 : unsigned int regno2 = true_regnum (operands[2]);
16676 :
16677 : /* If a = b + c, (a!=b && a!=c), must use lea form. */
16678 9172942 : if (regno0 != regno1 && regno0 != regno2)
16679 : return true;
16680 :
16681 7130270 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16682 7129691 : return false;
16683 :
16684 579 : return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16685 : }
16686 :
16687 : /* Return true if destination reg of SET_BODY is shift count of
16688 : USE_BODY. */
16689 :
16690 : static bool
16691 89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16692 : {
16693 89 : rtx set_dest;
16694 89 : rtx shift_rtx;
16695 89 : int i;
16696 :
16697 : /* Retrieve destination of SET_BODY. */
16698 89 : switch (GET_CODE (set_body))
16699 : {
16700 73 : case SET:
16701 73 : set_dest = SET_DEST (set_body);
16702 73 : if (!set_dest || !REG_P (set_dest))
16703 : return false;
16704 72 : break;
16705 8 : case PARALLEL:
16706 24 : for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16707 16 : if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16708 : use_body))
16709 : return true;
16710 : /* FALLTHROUGH */
16711 : default:
16712 : return false;
16713 : }
16714 :
16715 : /* Retrieve shift count of USE_BODY. */
16716 72 : switch (GET_CODE (use_body))
16717 : {
16718 24 : case SET:
16719 24 : shift_rtx = XEXP (use_body, 1);
16720 24 : break;
16721 24 : case PARALLEL:
16722 72 : for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16723 48 : if (ix86_dep_by_shift_count_body (set_body,
16724 48 : XVECEXP (use_body, 0, i)))
16725 : return true;
16726 : /* FALLTHROUGH */
16727 : default:
16728 : return false;
16729 : }
16730 :
16731 24 : if (shift_rtx
16732 24 : && (GET_CODE (shift_rtx) == ASHIFT
16733 21 : || GET_CODE (shift_rtx) == LSHIFTRT
16734 5 : || GET_CODE (shift_rtx) == ASHIFTRT
16735 0 : || GET_CODE (shift_rtx) == ROTATE
16736 0 : || GET_CODE (shift_rtx) == ROTATERT))
16737 : {
16738 24 : rtx shift_count = XEXP (shift_rtx, 1);
16739 :
16740 : /* Return true if shift count is dest of SET_BODY. */
16741 24 : if (REG_P (shift_count))
16742 : {
16743 : /* Add check since it can be invoked before register
16744 : allocation in pre-reload schedule. */
16745 0 : if (reload_completed
16746 0 : && true_regnum (set_dest) == true_regnum (shift_count))
16747 : return true;
16748 0 : else if (REGNO(set_dest) == REGNO(shift_count))
16749 : return true;
16750 : }
16751 : }
16752 :
16753 : return false;
16754 : }
16755 :
16756 : /* Return true if destination reg of SET_INSN is shift count of
16757 : USE_INSN. */
16758 :
16759 : bool
16760 25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16761 : {
16762 25 : return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16763 25 : PATTERN (use_insn));
16764 : }
16765 :
16766 : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16767 : are ok, keeping in mind the possible movddup alternative. */
16768 :
16769 : bool
16770 89963 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16771 : {
16772 89963 : if (MEM_P (operands[0]))
16773 2041 : return rtx_equal_p (operands[0], operands[1 + high]);
16774 87922 : if (MEM_P (operands[1]) && MEM_P (operands[2]))
16775 951 : return false;
16776 : return true;
16777 : }
16778 :
16779 : /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16780 : then replicate the value for all elements of the vector
16781 : register. */
16782 :
16783 : rtx
16784 74193 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16785 : {
16786 74193 : int i, n_elt;
16787 74193 : rtvec v;
16788 74193 : machine_mode scalar_mode;
16789 :
16790 74193 : switch (mode)
16791 : {
16792 1276 : case E_V64QImode:
16793 1276 : case E_V32QImode:
16794 1276 : case E_V16QImode:
16795 1276 : case E_V32HImode:
16796 1276 : case E_V16HImode:
16797 1276 : case E_V8HImode:
16798 1276 : case E_V16SImode:
16799 1276 : case E_V8SImode:
16800 1276 : case E_V4SImode:
16801 1276 : case E_V2SImode:
16802 1276 : case E_V8DImode:
16803 1276 : case E_V4DImode:
16804 1276 : case E_V2DImode:
16805 1276 : gcc_assert (vect);
16806 : /* FALLTHRU */
16807 74193 : case E_V2HFmode:
16808 74193 : case E_V4HFmode:
16809 74193 : case E_V8HFmode:
16810 74193 : case E_V16HFmode:
16811 74193 : case E_V32HFmode:
16812 74193 : case E_V16SFmode:
16813 74193 : case E_V8SFmode:
16814 74193 : case E_V4SFmode:
16815 74193 : case E_V2SFmode:
16816 74193 : case E_V8DFmode:
16817 74193 : case E_V4DFmode:
16818 74193 : case E_V2DFmode:
16819 74193 : case E_V32BFmode:
16820 74193 : case E_V16BFmode:
16821 74193 : case E_V8BFmode:
16822 74193 : case E_V4BFmode:
16823 74193 : case E_V2BFmode:
16824 74193 : n_elt = GET_MODE_NUNITS (mode);
16825 74193 : v = rtvec_alloc (n_elt);
16826 74193 : scalar_mode = GET_MODE_INNER (mode);
16827 :
16828 74193 : RTVEC_ELT (v, 0) = value;
16829 :
16830 230446 : for (i = 1; i < n_elt; ++i)
16831 156253 : RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16832 :
16833 74193 : return gen_rtx_CONST_VECTOR (mode, v);
16834 :
16835 0 : default:
16836 0 : gcc_unreachable ();
16837 : }
16838 : }
16839 :
16840 : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16841 : and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16842 : for an SSE register. If VECT is true, then replicate the mask for
16843 : all elements of the vector register. If INVERT is true, then create
16844 : a mask excluding the sign bit. */
16845 :
16846 : rtx
16847 75562 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16848 : {
16849 75562 : machine_mode vec_mode, imode;
16850 75562 : wide_int w;
16851 75562 : rtx mask, v;
16852 :
16853 75562 : switch (mode)
16854 : {
16855 : case E_V2HFmode:
16856 : case E_V4HFmode:
16857 : case E_V8HFmode:
16858 : case E_V16HFmode:
16859 : case E_V32HFmode:
16860 : case E_V32BFmode:
16861 : case E_V16BFmode:
16862 : case E_V8BFmode:
16863 : case E_V4BFmode:
16864 : case E_V2BFmode:
16865 : vec_mode = mode;
16866 : imode = HImode;
16867 : break;
16868 :
16869 33840 : case E_V16SImode:
16870 33840 : case E_V16SFmode:
16871 33840 : case E_V8SImode:
16872 33840 : case E_V4SImode:
16873 33840 : case E_V8SFmode:
16874 33840 : case E_V4SFmode:
16875 33840 : case E_V2SFmode:
16876 33840 : case E_V2SImode:
16877 33840 : vec_mode = mode;
16878 33840 : imode = SImode;
16879 33840 : break;
16880 :
16881 38871 : case E_V8DImode:
16882 38871 : case E_V4DImode:
16883 38871 : case E_V2DImode:
16884 38871 : case E_V8DFmode:
16885 38871 : case E_V4DFmode:
16886 38871 : case E_V2DFmode:
16887 38871 : vec_mode = mode;
16888 38871 : imode = DImode;
16889 38871 : break;
16890 :
16891 2352 : case E_TImode:
16892 2352 : case E_TFmode:
16893 2352 : vec_mode = VOIDmode;
16894 2352 : imode = TImode;
16895 2352 : break;
16896 :
16897 0 : default:
16898 0 : gcc_unreachable ();
16899 : }
16900 :
16901 75562 : machine_mode inner_mode = GET_MODE_INNER (mode);
16902 151124 : w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16903 151124 : GET_MODE_BITSIZE (inner_mode));
16904 75562 : if (invert)
16905 39261 : w = wi::bit_not (w);
16906 :
16907 : /* Force this value into the low part of a fp vector constant. */
16908 75562 : mask = immed_wide_int_const (w, imode);
16909 75562 : mask = gen_lowpart (inner_mode, mask);
16910 :
16911 75562 : if (vec_mode == VOIDmode)
16912 2352 : return force_reg (inner_mode, mask);
16913 :
16914 73210 : v = ix86_build_const_vector (vec_mode, vect, mask);
16915 73210 : return force_reg (vec_mode, v);
16916 75562 : }
16917 :
16918 : /* Return HOST_WIDE_INT for const vector OP in MODE. */
16919 :
16920 : HOST_WIDE_INT
16921 159181 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16922 : {
16923 336312 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16924 0 : gcc_unreachable ();
16925 :
16926 159181 : int nunits = GET_MODE_NUNITS (mode);
16927 318362 : wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16928 159181 : machine_mode innermode = GET_MODE_INNER (mode);
16929 159181 : unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16930 :
16931 159181 : switch (mode)
16932 : {
16933 : case E_V2QImode:
16934 : case E_V4QImode:
16935 : case E_V2HImode:
16936 : case E_V8QImode:
16937 : case E_V4HImode:
16938 : case E_V2SImode:
16939 530135 : for (int i = 0; i < nunits; ++i)
16940 : {
16941 376326 : int v = INTVAL (XVECEXP (op, 0, i));
16942 376326 : wide_int wv = wi::shwi (v, innermode_bits);
16943 376326 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16944 376326 : }
16945 : break;
16946 92 : case E_V1SImode:
16947 92 : case E_V1DImode:
16948 92 : op = CONST_VECTOR_ELT (op, 0);
16949 92 : return INTVAL (op);
16950 : case E_V2HFmode:
16951 : case E_V2BFmode:
16952 : case E_V4HFmode:
16953 : case E_V4BFmode:
16954 : case E_V2SFmode:
16955 15864 : for (int i = 0; i < nunits; ++i)
16956 : {
16957 10584 : rtx x = XVECEXP (op, 0, i);
16958 10584 : int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16959 10584 : REAL_MODE_FORMAT (innermode));
16960 10584 : wide_int wv = wi::shwi (v, innermode_bits);
16961 10584 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16962 10584 : }
16963 : break;
16964 0 : default:
16965 0 : gcc_unreachable ();
16966 : }
16967 :
16968 159089 : return val.to_shwi ();
16969 159181 : }
16970 :
16971 32 : int ix86_get_flags_cc (rtx_code code)
16972 : {
16973 32 : switch (code)
16974 : {
16975 : case NE: return X86_CCNE;
16976 : case EQ: return X86_CCE;
16977 : case GE: return X86_CCNL;
16978 : case GT: return X86_CCNLE;
16979 : case LE: return X86_CCLE;
16980 : case LT: return X86_CCL;
16981 : case GEU: return X86_CCNB;
16982 : case GTU: return X86_CCNBE;
16983 : case LEU: return X86_CCBE;
16984 : case LTU: return X86_CCB;
16985 : default: return -1;
16986 : }
16987 : }
16988 :
16989 : /* Return TRUE or FALSE depending on whether the first SET in INSN
16990 : has source and destination with matching CC modes, and that the
16991 : CC mode is at least as constrained as REQ_MODE. */
16992 :
16993 : bool
16994 54326591 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
16995 : {
16996 54326591 : rtx set;
16997 54326591 : machine_mode set_mode;
16998 :
16999 54326591 : set = PATTERN (insn);
17000 54326591 : if (GET_CODE (set) == PARALLEL)
17001 492250 : set = XVECEXP (set, 0, 0);
17002 54326591 : gcc_assert (GET_CODE (set) == SET);
17003 54326591 : gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17004 :
17005 54326591 : set_mode = GET_MODE (SET_DEST (set));
17006 54326591 : switch (set_mode)
17007 : {
17008 1371212 : case E_CCNOmode:
17009 1371212 : if (req_mode != CCNOmode
17010 91827 : && (req_mode != CCmode
17011 0 : || XEXP (SET_SRC (set), 1) != const0_rtx))
17012 : return false;
17013 : break;
17014 5855163 : case E_CCmode:
17015 5855163 : if (req_mode == CCGCmode)
17016 : return false;
17017 : /* FALLTHRU */
17018 9453361 : case E_CCGCmode:
17019 9453361 : if (req_mode == CCGOCmode || req_mode == CCNOmode)
17020 : return false;
17021 : /* FALLTHRU */
17022 10494583 : case E_CCGOCmode:
17023 10494583 : if (req_mode == CCZmode)
17024 : return false;
17025 : /* FALLTHRU */
17026 : case E_CCZmode:
17027 : break;
17028 :
17029 0 : case E_CCGZmode:
17030 :
17031 0 : case E_CCAmode:
17032 0 : case E_CCCmode:
17033 0 : case E_CCOmode:
17034 0 : case E_CCPmode:
17035 0 : case E_CCSmode:
17036 0 : if (set_mode != req_mode)
17037 : return false;
17038 : break;
17039 :
17040 0 : default:
17041 0 : gcc_unreachable ();
17042 : }
17043 :
17044 54225913 : return GET_MODE (SET_SRC (set)) == set_mode;
17045 : }
17046 :
17047 : machine_mode
17048 13686562 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17049 : {
17050 13686562 : machine_mode mode = GET_MODE (op0);
17051 :
17052 13686562 : if (SCALAR_FLOAT_MODE_P (mode))
17053 : {
17054 136234 : gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17055 : return CCFPmode;
17056 : }
17057 :
17058 13550328 : switch (code)
17059 : {
17060 : /* Only zero flag is needed. */
17061 : case EQ: /* ZF=0 */
17062 : case NE: /* ZF!=0 */
17063 : return CCZmode;
17064 : /* Codes needing carry flag. */
17065 991284 : case GEU: /* CF=0 */
17066 991284 : case LTU: /* CF=1 */
17067 991284 : rtx geu;
17068 : /* Detect overflow checks. They need just the carry flag. */
17069 991284 : if (GET_CODE (op0) == PLUS
17070 991284 : && (rtx_equal_p (op1, XEXP (op0, 0))
17071 130336 : || rtx_equal_p (op1, XEXP (op0, 1))))
17072 17468 : return CCCmode;
17073 : /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
17074 : Match LTU of op0
17075 : (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
17076 : and op1
17077 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
17078 : where CC_CCC is either CC or CCC. */
17079 973816 : else if (code == LTU
17080 388884 : && GET_CODE (op0) == NEG
17081 5034 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
17082 3663 : && REG_P (XEXP (geu, 0))
17083 3353 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
17084 37 : || GET_MODE (XEXP (geu, 0)) == CCmode)
17085 3342 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
17086 3342 : && XEXP (geu, 1) == const0_rtx
17087 3342 : && GET_CODE (op1) == LTU
17088 3342 : && REG_P (XEXP (op1, 0))
17089 3342 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
17090 3342 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
17091 977158 : && XEXP (op1, 1) == const0_rtx)
17092 : return CCCmode;
17093 : /* Similarly for *x86_cmc pattern.
17094 : Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
17095 : and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
17096 : It is sufficient to test that the operand modes are CCCmode. */
17097 970474 : else if (code == LTU
17098 385542 : && GET_CODE (op0) == NEG
17099 1692 : && GET_CODE (XEXP (op0, 0)) == LTU
17100 372 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
17101 3 : && GET_CODE (op1) == GEU
17102 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode)
17103 : return CCCmode;
17104 : /* Similarly for the comparison of addcarry/subborrow pattern. */
17105 385539 : else if (code == LTU
17106 385539 : && GET_CODE (op0) == ZERO_EXTEND
17107 16270 : && GET_CODE (op1) == PLUS
17108 10346 : && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
17109 10346 : && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
17110 : return CCCmode;
17111 : else
17112 960125 : return CCmode;
17113 : case GTU: /* CF=0 & ZF=0 */
17114 : case LEU: /* CF=1 | ZF=1 */
17115 : return CCmode;
17116 : /* Codes possibly doable only with sign flag when
17117 : comparing against zero. */
17118 779489 : case GE: /* SF=OF or SF=0 */
17119 779489 : case LT: /* SF<>OF or SF=1 */
17120 779489 : if (op1 == const0_rtx)
17121 : return CCGOCmode;
17122 : else
17123 : /* For other cases Carry flag is not required. */
17124 443641 : return CCGCmode;
17125 : /* Codes doable only with sign flag when comparing
17126 : against zero, but we miss jump instruction for it
17127 : so we need to use relational tests against overflow
17128 : that thus needs to be zero. */
17129 892404 : case GT: /* ZF=0 & SF=OF */
17130 892404 : case LE: /* ZF=1 | SF<>OF */
17131 892404 : if (op1 == const0_rtx)
17132 : return CCNOmode;
17133 : else
17134 593789 : return CCGCmode;
17135 : default:
17136 : /* CCmode should be used in all other cases. */
17137 : return CCmode;
17138 : }
17139 : }
17140 :
17141 : /* Return TRUE or FALSE depending on whether the ptest instruction
17142 : INSN has source and destination with suitable matching CC modes. */
17143 :
17144 : bool
17145 94451 : ix86_match_ptest_ccmode (rtx insn)
17146 : {
17147 94451 : rtx set, src;
17148 94451 : machine_mode set_mode;
17149 :
17150 94451 : set = PATTERN (insn);
17151 94451 : gcc_assert (GET_CODE (set) == SET);
17152 94451 : src = SET_SRC (set);
17153 94451 : gcc_assert (GET_CODE (src) == UNSPEC
17154 : && XINT (src, 1) == UNSPEC_PTEST);
17155 :
17156 94451 : set_mode = GET_MODE (src);
17157 94451 : if (set_mode != CCZmode
17158 : && set_mode != CCCmode
17159 : && set_mode != CCmode)
17160 : return false;
17161 94451 : return GET_MODE (SET_DEST (set)) == set_mode;
17162 : }
17163 :
17164 : /* Return the fixed registers used for condition codes. */
17165 :
17166 : static bool
17167 18822845 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17168 : {
17169 18822845 : *p1 = FLAGS_REG;
17170 18822845 : *p2 = INVALID_REGNUM;
17171 18822845 : return true;
17172 : }
17173 :
17174 : /* If two condition code modes are compatible, return a condition code
17175 : mode which is compatible with both. Otherwise, return
17176 : VOIDmode. */
17177 :
17178 : static machine_mode
17179 30602 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
17180 : {
17181 30602 : if (m1 == m2)
17182 : return m1;
17183 :
17184 29947 : if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17185 : return VOIDmode;
17186 :
17187 29947 : if ((m1 == CCGCmode && m2 == CCGOCmode)
17188 29947 : || (m1 == CCGOCmode && m2 == CCGCmode))
17189 : return CCGCmode;
17190 :
17191 29947 : if ((m1 == CCNOmode && m2 == CCGOCmode)
17192 29767 : || (m1 == CCGOCmode && m2 == CCNOmode))
17193 : return CCNOmode;
17194 :
17195 29650 : if (m1 == CCZmode
17196 15662 : && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
17197 : return m2;
17198 17251 : else if (m2 == CCZmode
17199 13735 : && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
17200 : return m1;
17201 :
17202 7188 : switch (m1)
17203 : {
17204 0 : default:
17205 0 : gcc_unreachable ();
17206 :
17207 7188 : case E_CCmode:
17208 7188 : case E_CCGCmode:
17209 7188 : case E_CCGOCmode:
17210 7188 : case E_CCNOmode:
17211 7188 : case E_CCAmode:
17212 7188 : case E_CCCmode:
17213 7188 : case E_CCOmode:
17214 7188 : case E_CCPmode:
17215 7188 : case E_CCSmode:
17216 7188 : case E_CCZmode:
17217 7188 : switch (m2)
17218 : {
17219 : default:
17220 : return VOIDmode;
17221 :
17222 : case E_CCmode:
17223 : case E_CCGCmode:
17224 : case E_CCGOCmode:
17225 : case E_CCNOmode:
17226 : case E_CCAmode:
17227 : case E_CCCmode:
17228 : case E_CCOmode:
17229 : case E_CCPmode:
17230 : case E_CCSmode:
17231 : case E_CCZmode:
17232 : return CCmode;
17233 : }
17234 :
17235 : case E_CCFPmode:
17236 : /* These are only compatible with themselves, which we already
17237 : checked above. */
17238 : return VOIDmode;
17239 : }
17240 : }
17241 :
17242 : /* Return strategy to use for floating-point. We assume that fcomi is always
17243 : preferrable where available, since that is also true when looking at size
17244 : (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17245 :
17246 : enum ix86_fpcmp_strategy
17247 5531039 : ix86_fp_comparison_strategy (enum rtx_code)
17248 : {
17249 : /* Do fcomi/sahf based test when profitable. */
17250 :
17251 5531039 : if (TARGET_CMOVE)
17252 : return IX86_FPCMP_COMI;
17253 :
17254 0 : if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
17255 0 : return IX86_FPCMP_SAHF;
17256 :
17257 : return IX86_FPCMP_ARITH;
17258 : }
17259 :
17260 : /* Convert comparison codes we use to represent FP comparison to integer
17261 : code that will result in proper branch. Return UNKNOWN if no such code
17262 : is available. */
17263 :
17264 : enum rtx_code
17265 580954 : ix86_fp_compare_code_to_integer (enum rtx_code code)
17266 : {
17267 580954 : switch (code)
17268 : {
17269 : case GT:
17270 : return GTU;
17271 18009 : case GE:
17272 18009 : return GEU;
17273 : case ORDERED:
17274 : case UNORDERED:
17275 : return code;
17276 118749 : case UNEQ:
17277 118749 : return EQ;
17278 17430 : case UNLT:
17279 17430 : return LTU;
17280 31130 : case UNLE:
17281 31130 : return LEU;
17282 113340 : case LTGT:
17283 113340 : return NE;
17284 681 : case EQ:
17285 681 : case NE:
17286 681 : if (TARGET_AVX10_2)
17287 : return code;
17288 : /* FALLTHRU. */
17289 221 : default:
17290 221 : return UNKNOWN;
17291 : }
17292 : }
17293 :
17294 : /* Zero extend possibly SImode EXP to Pmode register. */
17295 : rtx
17296 46575 : ix86_zero_extend_to_Pmode (rtx exp)
17297 : {
17298 58437 : return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
17299 : }
17300 :
17301 : /* Return true if the function is called via PLT. */
17302 :
17303 : bool
17304 1000178 : ix86_call_use_plt_p (rtx call_op)
17305 : {
17306 1000178 : if (SYMBOL_REF_LOCAL_P (call_op))
17307 : {
17308 199464 : if (SYMBOL_REF_DECL (call_op)
17309 199464 : && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
17310 : {
17311 : /* NB: All ifunc functions must be called via PLT. */
17312 116304 : cgraph_node *node
17313 116304 : = cgraph_node::get (SYMBOL_REF_DECL (call_op));
17314 116304 : if (node && node->ifunc_resolver)
17315 : return true;
17316 : }
17317 199444 : return false;
17318 : }
17319 : return true;
17320 : }
17321 :
17322 : /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
17323 : the PLT entry will be used as the function address for local IFUNC
17324 : functions. When the PIC register is needed for PLT call, indirect
17325 : call via the PLT entry will fail since the PIC register may not be
17326 : set up properly for indirect call. In this case, we should return
17327 : false. */
17328 :
17329 : static bool
17330 768688329 : ix86_ifunc_ref_local_ok (void)
17331 : {
17332 768688329 : return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
17333 : }
17334 :
17335 : /* Return true if the function being called was marked with attribute
17336 : "noplt" or using -fno-plt and we are compiling for non-PIC. We need
17337 : to handle the non-PIC case in the backend because there is no easy
17338 : interface for the front-end to force non-PLT calls to use the GOT.
17339 : This is currently used only with 64-bit or 32-bit GOT32X ELF targets
17340 : to call the function marked "noplt" indirectly. */
17341 :
17342 : bool
17343 5916374 : ix86_nopic_noplt_attribute_p (rtx call_op)
17344 : {
17345 5421862 : if (flag_pic || ix86_cmodel == CM_LARGE
17346 : || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17347 : || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17348 11338236 : || SYMBOL_REF_LOCAL_P (call_op))
17349 : return false;
17350 :
17351 3800964 : tree symbol_decl = SYMBOL_REF_DECL (call_op);
17352 :
17353 3800964 : if (!flag_plt
17354 3800964 : || (symbol_decl != NULL_TREE
17355 3800932 : && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
17356 34 : return true;
17357 :
17358 : return false;
17359 : }
17360 :
17361 : /* Helper to output the jmp/call. */
17362 : static void
17363 33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17364 : {
17365 33 : if (thunk_name != NULL)
17366 : {
17367 22 : if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17368 1 : && ix86_indirect_branch_cs_prefix)
17369 1 : fprintf (asm_out_file, "\tcs\n");
17370 22 : fprintf (asm_out_file, "\tjmp\t");
17371 22 : assemble_name (asm_out_file, thunk_name);
17372 22 : putc ('\n', asm_out_file);
17373 22 : if ((ix86_harden_sls & harden_sls_indirect_jmp))
17374 2 : fputs ("\tint3\n", asm_out_file);
17375 : }
17376 : else
17377 11 : output_indirect_thunk (regno);
17378 33 : }
17379 :
17380 : /* Output indirect branch via a call and return thunk. CALL_OP is a
17381 : register which contains the branch target. XASM is the assembly
17382 : template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17383 : A normal call is converted to:
17384 :
17385 : call __x86_indirect_thunk_reg
17386 :
17387 : and a tail call is converted to:
17388 :
17389 : jmp __x86_indirect_thunk_reg
17390 : */
17391 :
17392 : static void
17393 50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17394 : {
17395 50 : char thunk_name_buf[32];
17396 50 : char *thunk_name;
17397 50 : enum indirect_thunk_prefix need_prefix
17398 50 : = indirect_thunk_need_prefix (current_output_insn);
17399 50 : int regno = REGNO (call_op);
17400 :
17401 50 : if (cfun->machine->indirect_branch_type
17402 50 : != indirect_branch_thunk_inline)
17403 : {
17404 39 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17405 16 : SET_HARD_REG_BIT (indirect_thunks_used, regno);
17406 :
17407 39 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17408 39 : thunk_name = thunk_name_buf;
17409 : }
17410 : else
17411 : thunk_name = NULL;
17412 :
17413 50 : if (sibcall_p)
17414 27 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17415 : else
17416 : {
17417 23 : if (thunk_name != NULL)
17418 : {
17419 17 : if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17420 1 : && ix86_indirect_branch_cs_prefix)
17421 1 : fprintf (asm_out_file, "\tcs\n");
17422 17 : fprintf (asm_out_file, "\tcall\t");
17423 17 : assemble_name (asm_out_file, thunk_name);
17424 17 : putc ('\n', asm_out_file);
17425 17 : return;
17426 : }
17427 :
17428 6 : char indirectlabel1[32];
17429 6 : char indirectlabel2[32];
17430 :
17431 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17432 : INDIRECT_LABEL,
17433 : indirectlabelno++);
17434 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17435 : INDIRECT_LABEL,
17436 : indirectlabelno++);
17437 :
17438 : /* Jump. */
17439 6 : fputs ("\tjmp\t", asm_out_file);
17440 6 : assemble_name_raw (asm_out_file, indirectlabel2);
17441 6 : fputc ('\n', asm_out_file);
17442 :
17443 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17444 :
17445 6 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17446 :
17447 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17448 :
17449 : /* Call. */
17450 6 : fputs ("\tcall\t", asm_out_file);
17451 6 : assemble_name_raw (asm_out_file, indirectlabel1);
17452 6 : fputc ('\n', asm_out_file);
17453 : }
17454 : }
17455 :
17456 : /* Output indirect branch via a call and return thunk. CALL_OP is
17457 : the branch target. XASM is the assembly template for CALL_OP.
17458 : Branch is a tail call if SIBCALL_P is true. A normal call is
17459 : converted to:
17460 :
17461 : jmp L2
17462 : L1:
17463 : push CALL_OP
17464 : jmp __x86_indirect_thunk
17465 : L2:
17466 : call L1
17467 :
17468 : and a tail call is converted to:
17469 :
17470 : push CALL_OP
17471 : jmp __x86_indirect_thunk
17472 : */
17473 :
17474 : static void
17475 0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17476 : bool sibcall_p)
17477 : {
17478 0 : char thunk_name_buf[32];
17479 0 : char *thunk_name;
17480 0 : char push_buf[64];
17481 0 : enum indirect_thunk_prefix need_prefix
17482 0 : = indirect_thunk_need_prefix (current_output_insn);
17483 0 : int regno = -1;
17484 :
17485 0 : if (cfun->machine->indirect_branch_type
17486 0 : != indirect_branch_thunk_inline)
17487 : {
17488 0 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17489 0 : indirect_thunk_needed = true;
17490 0 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17491 0 : thunk_name = thunk_name_buf;
17492 : }
17493 : else
17494 : thunk_name = NULL;
17495 :
17496 0 : snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
17497 0 : TARGET_64BIT ? 'q' : 'l', xasm);
17498 :
17499 0 : if (sibcall_p)
17500 : {
17501 0 : output_asm_insn (push_buf, &call_op);
17502 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17503 : }
17504 : else
17505 : {
17506 0 : char indirectlabel1[32];
17507 0 : char indirectlabel2[32];
17508 :
17509 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17510 : INDIRECT_LABEL,
17511 : indirectlabelno++);
17512 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17513 : INDIRECT_LABEL,
17514 : indirectlabelno++);
17515 :
17516 : /* Jump. */
17517 0 : fputs ("\tjmp\t", asm_out_file);
17518 0 : assemble_name_raw (asm_out_file, indirectlabel2);
17519 0 : fputc ('\n', asm_out_file);
17520 :
17521 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17522 :
17523 : /* An external function may be called via GOT, instead of PLT. */
17524 0 : if (MEM_P (call_op))
17525 : {
17526 0 : struct ix86_address parts;
17527 0 : rtx addr = XEXP (call_op, 0);
17528 0 : if (ix86_decompose_address (addr, &parts)
17529 0 : && parts.base == stack_pointer_rtx)
17530 : {
17531 : /* Since call will adjust stack by -UNITS_PER_WORD,
17532 : we must convert "disp(stack, index, scale)" to
17533 : "disp+UNITS_PER_WORD(stack, index, scale)". */
17534 0 : if (parts.index)
17535 : {
17536 0 : addr = gen_rtx_MULT (Pmode, parts.index,
17537 : GEN_INT (parts.scale));
17538 0 : addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17539 : addr);
17540 : }
17541 : else
17542 : addr = stack_pointer_rtx;
17543 :
17544 0 : rtx disp;
17545 0 : if (parts.disp != NULL_RTX)
17546 0 : disp = plus_constant (Pmode, parts.disp,
17547 0 : UNITS_PER_WORD);
17548 : else
17549 0 : disp = GEN_INT (UNITS_PER_WORD);
17550 :
17551 0 : addr = gen_rtx_PLUS (Pmode, addr, disp);
17552 0 : call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17553 : }
17554 : }
17555 :
17556 0 : output_asm_insn (push_buf, &call_op);
17557 :
17558 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17559 :
17560 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17561 :
17562 : /* Call. */
17563 0 : fputs ("\tcall\t", asm_out_file);
17564 0 : assemble_name_raw (asm_out_file, indirectlabel1);
17565 0 : fputc ('\n', asm_out_file);
17566 : }
17567 0 : }
17568 :
17569 : /* Output indirect branch via a call and return thunk. CALL_OP is
17570 : the branch target. XASM is the assembly template for CALL_OP.
17571 : Branch is a tail call if SIBCALL_P is true. */
17572 :
17573 : static void
17574 50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
17575 : bool sibcall_p)
17576 : {
17577 50 : if (REG_P (call_op))
17578 50 : ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17579 : else
17580 0 : ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17581 50 : }
17582 :
17583 : /* Output indirect jump. CALL_OP is the jump target. */
17584 :
17585 : const char *
17586 7633 : ix86_output_indirect_jmp (rtx call_op)
17587 : {
17588 7633 : if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17589 : {
17590 : /* We can't have red-zone since "call" in the indirect thunk
17591 : pushes the return address onto stack, destroying red-zone. */
17592 4 : if (ix86_red_zone_used)
17593 0 : gcc_unreachable ();
17594 :
17595 4 : ix86_output_indirect_branch (call_op, "%0", true);
17596 : }
17597 : else
17598 7629 : output_asm_insn ("%!jmp\t%A0", &call_op);
17599 7633 : return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17600 : }
17601 :
17602 : /* Output return instrumentation for current function if needed. */
17603 :
17604 : static void
17605 1708718 : output_return_instrumentation (void)
17606 : {
17607 1708718 : if (ix86_instrument_return != instrument_return_none
17608 6 : && flag_fentry
17609 1708724 : && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17610 : {
17611 5 : if (ix86_flag_record_return)
17612 5 : fprintf (asm_out_file, "1:\n");
17613 5 : switch (ix86_instrument_return)
17614 : {
17615 2 : case instrument_return_call:
17616 2 : fprintf (asm_out_file, "\tcall\t__return__\n");
17617 2 : break;
17618 3 : case instrument_return_nop5:
17619 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17620 3 : fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17621 3 : break;
17622 : case instrument_return_none:
17623 : break;
17624 : }
17625 :
17626 5 : if (ix86_flag_record_return)
17627 : {
17628 5 : fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17629 5 : fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17630 5 : fprintf (asm_out_file, "\t.previous\n");
17631 : }
17632 : }
17633 1708718 : }
17634 :
17635 : /* Output function return. CALL_OP is the jump target. Add a REP
17636 : prefix to RET if LONG_P is true and function return is kept. */
17637 :
17638 : const char *
17639 1578236 : ix86_output_function_return (bool long_p)
17640 : {
17641 1578236 : output_return_instrumentation ();
17642 :
17643 1578236 : if (cfun->machine->function_return_type != indirect_branch_keep)
17644 : {
17645 18 : char thunk_name[32];
17646 18 : enum indirect_thunk_prefix need_prefix
17647 18 : = indirect_thunk_need_prefix (current_output_insn);
17648 :
17649 18 : if (cfun->machine->function_return_type
17650 18 : != indirect_branch_thunk_inline)
17651 : {
17652 13 : bool need_thunk = (cfun->machine->function_return_type
17653 : == indirect_branch_thunk);
17654 13 : indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17655 : true);
17656 13 : indirect_return_needed |= need_thunk;
17657 13 : fprintf (asm_out_file, "\tjmp\t");
17658 13 : assemble_name (asm_out_file, thunk_name);
17659 13 : putc ('\n', asm_out_file);
17660 : }
17661 : else
17662 5 : output_indirect_thunk (INVALID_REGNUM);
17663 :
17664 18 : return "";
17665 : }
17666 :
17667 3155947 : output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17668 1578218 : return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17669 : }
17670 :
17671 : /* Output indirect function return. RET_OP is the function return
17672 : target. */
17673 :
17674 : const char *
17675 17 : ix86_output_indirect_function_return (rtx ret_op)
17676 : {
17677 17 : if (cfun->machine->function_return_type != indirect_branch_keep)
17678 : {
17679 0 : char thunk_name[32];
17680 0 : enum indirect_thunk_prefix need_prefix
17681 0 : = indirect_thunk_need_prefix (current_output_insn);
17682 0 : unsigned int regno = REGNO (ret_op);
17683 0 : gcc_assert (regno == CX_REG);
17684 :
17685 0 : if (cfun->machine->function_return_type
17686 0 : != indirect_branch_thunk_inline)
17687 : {
17688 0 : bool need_thunk = (cfun->machine->function_return_type
17689 : == indirect_branch_thunk);
17690 0 : indirect_thunk_name (thunk_name, regno, need_prefix, true);
17691 :
17692 0 : if (need_thunk)
17693 : {
17694 0 : indirect_return_via_cx = true;
17695 0 : SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17696 : }
17697 0 : fprintf (asm_out_file, "\tjmp\t");
17698 0 : assemble_name (asm_out_file, thunk_name);
17699 0 : putc ('\n', asm_out_file);
17700 : }
17701 : else
17702 0 : output_indirect_thunk (regno);
17703 : }
17704 : else
17705 : {
17706 17 : output_asm_insn ("%!jmp\t%A0", &ret_op);
17707 17 : if (ix86_harden_sls & harden_sls_indirect_jmp)
17708 1 : fputs ("\tint3\n", asm_out_file);
17709 : }
17710 17 : return "";
17711 : }
17712 :
17713 : /* Output the assembly for a call instruction. */
17714 :
17715 : const char *
17716 6098187 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17717 : {
17718 6098187 : bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17719 6098187 : bool output_indirect_p
17720 : = (!TARGET_SEH
17721 6098187 : && cfun->machine->indirect_branch_type != indirect_branch_keep);
17722 6098187 : bool seh_nop_p = false;
17723 6098187 : const char *xasm;
17724 :
17725 6098187 : if (SIBLING_CALL_P (insn))
17726 : {
17727 130482 : output_return_instrumentation ();
17728 130482 : if (direct_p)
17729 : {
17730 120847 : if (ix86_nopic_noplt_attribute_p (call_op))
17731 : {
17732 4 : direct_p = false;
17733 4 : if (TARGET_64BIT)
17734 : {
17735 4 : if (output_indirect_p)
17736 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17737 : else
17738 4 : xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17739 : }
17740 : else
17741 : {
17742 0 : if (output_indirect_p)
17743 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17744 : else
17745 0 : xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17746 : }
17747 : }
17748 : else
17749 : xasm = "%!jmp\t%P0";
17750 : }
17751 : /* SEH epilogue detection requires the indirect branch case
17752 : to include REX.W. */
17753 9635 : else if (TARGET_SEH)
17754 : xasm = "%!rex.W jmp\t%A0";
17755 : else
17756 : {
17757 9635 : if (output_indirect_p)
17758 : xasm = "%0";
17759 : else
17760 9612 : xasm = "%!jmp\t%A0";
17761 : }
17762 :
17763 130482 : if (output_indirect_p && !direct_p)
17764 23 : ix86_output_indirect_branch (call_op, xasm, true);
17765 : else
17766 : {
17767 130459 : output_asm_insn (xasm, &call_op);
17768 130459 : if (!direct_p
17769 9616 : && (ix86_harden_sls & harden_sls_indirect_jmp))
17770 : return "int3";
17771 : }
17772 130481 : return "";
17773 : }
17774 :
17775 : /* SEH unwinding can require an extra nop to be emitted in several
17776 : circumstances. Determine if we have one of those. */
17777 5967705 : if (TARGET_SEH)
17778 : {
17779 : rtx_insn *i;
17780 :
17781 : for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17782 : {
17783 : /* Prevent a catch region from being adjacent to a jump that would
17784 : be interpreted as an epilogue sequence by the unwinder. */
17785 : if (JUMP_P(i) && CROSSING_JUMP_P (i))
17786 : {
17787 : seh_nop_p = true;
17788 : break;
17789 : }
17790 :
17791 : /* If we get to another real insn, we don't need the nop. */
17792 : if (INSN_P (i))
17793 : break;
17794 :
17795 : /* If we get to the epilogue note, prevent a catch region from
17796 : being adjacent to the standard epilogue sequence. Note that,
17797 : if non-call exceptions are enabled, we already did it during
17798 : epilogue expansion, or else, if the insn can throw internally,
17799 : we already did it during the reorg pass. */
17800 : if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17801 : && !flag_non_call_exceptions
17802 : && !can_throw_internal (insn))
17803 : {
17804 : seh_nop_p = true;
17805 : break;
17806 : }
17807 : }
17808 :
17809 : /* If we didn't find a real insn following the call, prevent the
17810 : unwinder from looking into the next function. */
17811 : if (i == NULL)
17812 : seh_nop_p = true;
17813 : }
17814 :
17815 5967705 : if (direct_p)
17816 : {
17817 5794505 : if (ix86_nopic_noplt_attribute_p (call_op))
17818 : {
17819 6 : direct_p = false;
17820 6 : if (TARGET_64BIT)
17821 : {
17822 6 : if (output_indirect_p)
17823 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17824 : else
17825 6 : xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17826 : }
17827 : else
17828 : {
17829 0 : if (output_indirect_p)
17830 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17831 : else
17832 0 : xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17833 : }
17834 : }
17835 : else
17836 : xasm = "%!call\t%P0";
17837 : }
17838 : else
17839 : {
17840 173200 : if (output_indirect_p)
17841 : xasm = "%0";
17842 : else
17843 173177 : xasm = "%!call\t%A0";
17844 : }
17845 :
17846 5967705 : if (output_indirect_p && !direct_p)
17847 23 : ix86_output_indirect_branch (call_op, xasm, false);
17848 : else
17849 5967682 : output_asm_insn (xasm, &call_op);
17850 :
17851 : if (seh_nop_p)
17852 : return "nop";
17853 :
17854 : return "";
17855 : }
17856 :
17857 : /* Return a MEM corresponding to a stack slot with mode MODE.
17858 : Allocate a new slot if necessary.
17859 :
17860 : The RTL for a function can have several slots available: N is
17861 : which slot to use. */
17862 :
17863 : rtx
17864 22366 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17865 : {
17866 22366 : struct stack_local_entry *s;
17867 :
17868 22366 : gcc_assert (n < MAX_386_STACK_LOCALS);
17869 :
17870 33727 : for (s = ix86_stack_locals; s; s = s->next)
17871 31116 : if (s->mode == mode && s->n == n)
17872 19755 : return validize_mem (copy_rtx (s->rtl));
17873 :
17874 2611 : int align = 0;
17875 : /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17876 : alignment with -m32 -mpreferred-stack-boundary=2. */
17877 2611 : if (mode == DImode
17878 329 : && !TARGET_64BIT
17879 329 : && n == SLOT_FLOATxFDI_387
17880 2940 : && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17881 : align = 32;
17882 2611 : s = ggc_alloc<stack_local_entry> ();
17883 2611 : s->n = n;
17884 2611 : s->mode = mode;
17885 5222 : s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17886 :
17887 2611 : s->next = ix86_stack_locals;
17888 2611 : ix86_stack_locals = s;
17889 2611 : return validize_mem (copy_rtx (s->rtl));
17890 : }
17891 :
17892 : static void
17893 1480112 : ix86_instantiate_decls (void)
17894 : {
17895 1480112 : struct stack_local_entry *s;
17896 :
17897 1480112 : for (s = ix86_stack_locals; s; s = s->next)
17898 0 : if (s->rtl != NULL_RTX)
17899 0 : instantiate_decl_rtl (s->rtl);
17900 1480112 : }
17901 :
17902 : /* Check whether x86 address PARTS is a pc-relative address. */
17903 :
17904 : bool
17905 27330350 : ix86_rip_relative_addr_p (struct ix86_address *parts)
17906 : {
17907 27330350 : rtx base, index, disp;
17908 :
17909 27330350 : base = parts->base;
17910 27330350 : index = parts->index;
17911 27330350 : disp = parts->disp;
17912 :
17913 27330350 : if (disp && !base && !index)
17914 : {
17915 25572114 : if (TARGET_64BIT)
17916 : {
17917 23907639 : rtx symbol = disp;
17918 :
17919 23907639 : if (GET_CODE (disp) == CONST)
17920 7846182 : symbol = XEXP (disp, 0);
17921 23907639 : if (GET_CODE (symbol) == PLUS
17922 7330399 : && CONST_INT_P (XEXP (symbol, 1)))
17923 7330399 : symbol = XEXP (symbol, 0);
17924 :
17925 23907639 : if (LABEL_REF_P (symbol)
17926 23900063 : || (SYMBOL_REF_P (symbol)
17927 22632605 : && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17928 25175097 : || (GET_CODE (symbol) == UNSPEC
17929 534681 : && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17930 : || XINT (symbol, 1) == UNSPEC_PCREL
17931 : || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17932 23147380 : return true;
17933 : }
17934 : }
17935 : return false;
17936 : }
17937 :
17938 : /* Calculate the length of the memory address in the instruction encoding.
17939 : Includes addr32 prefix, does not include the one-byte modrm, opcode,
17940 : or other prefixes. We never generate addr32 prefix for LEA insn. */
17941 :
17942 : int
17943 272372610 : memory_address_length (rtx addr, bool lea)
17944 : {
17945 272372610 : struct ix86_address parts;
17946 272372610 : rtx base, index, disp;
17947 272372610 : int len;
17948 272372610 : int ok;
17949 :
17950 272372610 : if (GET_CODE (addr) == PRE_DEC
17951 263834143 : || GET_CODE (addr) == POST_INC
17952 259350500 : || GET_CODE (addr) == PRE_MODIFY
17953 259350500 : || GET_CODE (addr) == POST_MODIFY)
17954 : return 0;
17955 :
17956 259350500 : ok = ix86_decompose_address (addr, &parts);
17957 259350500 : gcc_assert (ok);
17958 :
17959 259350500 : len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17960 :
17961 : /* If this is not LEA instruction, add the length of addr32 prefix. */
17962 220838890 : if (TARGET_64BIT && !lea
17963 455332092 : && (SImode_address_operand (addr, VOIDmode)
17964 195981439 : || (parts.base && GET_MODE (parts.base) == SImode)
17965 195971209 : || (parts.index && GET_MODE (parts.index) == SImode)))
17966 10383 : len++;
17967 :
17968 259350500 : base = parts.base;
17969 259350500 : index = parts.index;
17970 259350500 : disp = parts.disp;
17971 :
17972 259350500 : if (base && SUBREG_P (base))
17973 2 : base = SUBREG_REG (base);
17974 259350500 : if (index && SUBREG_P (index))
17975 0 : index = SUBREG_REG (index);
17976 :
17977 259350500 : gcc_assert (base == NULL_RTX || REG_P (base));
17978 259350500 : gcc_assert (index == NULL_RTX || REG_P (index));
17979 :
17980 : /* Rule of thumb:
17981 : - esp as the base always wants an index,
17982 : - ebp as the base always wants a displacement,
17983 : - r12 as the base always wants an index,
17984 : - r13 as the base always wants a displacement. */
17985 :
17986 : /* Register Indirect. */
17987 259350500 : if (base && !index && !disp)
17988 : {
17989 : /* esp (for its index) and ebp (for its displacement) need
17990 : the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17991 : code. */
17992 16993768 : if (base == arg_pointer_rtx
17993 16993768 : || base == frame_pointer_rtx
17994 16993768 : || REGNO (base) == SP_REG
17995 10128453 : || REGNO (base) == BP_REG
17996 10128453 : || REGNO (base) == R12_REG
17997 26609837 : || REGNO (base) == R13_REG)
17998 7377699 : len++;
17999 : }
18000 :
18001 : /* Direct Addressing. In 64-bit mode mod 00 r/m 5
18002 : is not disp32, but disp32(%rip), so for disp32
18003 : SIB byte is needed, unless print_operand_address
18004 : optimizes it into disp32(%rip) or (%rip) is implied
18005 : by UNSPEC. */
18006 242356732 : else if (disp && !base && !index)
18007 : {
18008 24596500 : len += 4;
18009 24596500 : if (!ix86_rip_relative_addr_p (&parts))
18010 1852246 : len++;
18011 : }
18012 : else
18013 : {
18014 : /* Find the length of the displacement constant. */
18015 217760232 : if (disp)
18016 : {
18017 213619538 : if (base && satisfies_constraint_K (disp))
18018 124654550 : len += 1;
18019 : else
18020 88964988 : len += 4;
18021 : }
18022 : /* ebp always wants a displacement. Similarly r13. */
18023 4140694 : else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
18024 8133 : len++;
18025 :
18026 : /* An index requires the two-byte modrm form.... */
18027 217760232 : if (index
18028 : /* ...like esp (or r12), which always wants an index. */
18029 206745757 : || base == arg_pointer_rtx
18030 206745757 : || base == frame_pointer_rtx
18031 424505989 : || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
18032 155831440 : len++;
18033 : }
18034 :
18035 : return len;
18036 : }
18037 :
18038 : /* Compute default value for "length_immediate" attribute. When SHORTFORM
18039 : is set, expect that insn have 8bit immediate alternative. */
18040 : int
18041 317452537 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
18042 : {
18043 317452537 : int len = 0;
18044 317452537 : int i;
18045 317452537 : extract_insn_cached (insn);
18046 990219189 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18047 672766652 : if (CONSTANT_P (recog_data.operand[i]))
18048 : {
18049 139493576 : enum attr_mode mode = get_attr_mode (insn);
18050 :
18051 139493576 : gcc_assert (!len);
18052 139493576 : if (shortform && CONST_INT_P (recog_data.operand[i]))
18053 : {
18054 37475913 : HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
18055 37475913 : switch (mode)
18056 : {
18057 1266980 : case MODE_QI:
18058 1266980 : len = 1;
18059 1266980 : continue;
18060 443110 : case MODE_HI:
18061 443110 : ival = trunc_int_for_mode (ival, HImode);
18062 443110 : break;
18063 15893982 : case MODE_SI:
18064 15893982 : ival = trunc_int_for_mode (ival, SImode);
18065 15893982 : break;
18066 : default:
18067 : break;
18068 : }
18069 36208933 : if (IN_RANGE (ival, -128, 127))
18070 : {
18071 32126802 : len = 1;
18072 32126802 : continue;
18073 : }
18074 : }
18075 106099794 : switch (mode)
18076 : {
18077 : case MODE_QI:
18078 : len = 1;
18079 : break;
18080 : case MODE_HI:
18081 672766652 : len = 2;
18082 : break;
18083 : case MODE_SI:
18084 100304113 : len = 4;
18085 : break;
18086 : /* Immediates for DImode instructions are encoded
18087 : as 32bit sign extended values. */
18088 : case MODE_DI:
18089 100304113 : len = 4;
18090 : break;
18091 0 : default:
18092 0 : fatal_insn ("unknown insn mode", insn);
18093 : }
18094 : }
18095 317452537 : return len;
18096 : }
18097 :
18098 : /* Compute default value for "length_address" attribute. */
18099 : int
18100 445065192 : ix86_attr_length_address_default (rtx_insn *insn)
18101 : {
18102 445065192 : int i;
18103 :
18104 445065192 : if (get_attr_type (insn) == TYPE_LEA)
18105 : {
18106 27601829 : rtx set = PATTERN (insn), addr;
18107 :
18108 27601829 : if (GET_CODE (set) == PARALLEL)
18109 86829 : set = XVECEXP (set, 0, 0);
18110 :
18111 27601829 : gcc_assert (GET_CODE (set) == SET);
18112 :
18113 27601829 : addr = SET_SRC (set);
18114 :
18115 27601829 : return memory_address_length (addr, true);
18116 : }
18117 :
18118 417463363 : extract_insn_cached (insn);
18119 957476000 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18120 : {
18121 784503800 : rtx op = recog_data.operand[i];
18122 784503800 : if (MEM_P (op))
18123 : {
18124 244769791 : constrain_operands_cached (insn, reload_completed);
18125 244769791 : if (which_alternative != -1)
18126 : {
18127 244769791 : const char *constraints = recog_data.constraints[i];
18128 244769791 : int alt = which_alternative;
18129 :
18130 388282590 : while (*constraints == '=' || *constraints == '+')
18131 143512799 : constraints++;
18132 1113723357 : while (alt-- > 0)
18133 2130468530 : while (*constraints++ != ',')
18134 : ;
18135 : /* Skip ignored operands. */
18136 244769791 : if (*constraints == 'X')
18137 278628 : continue;
18138 : }
18139 :
18140 244491163 : int len = memory_address_length (XEXP (op, 0), false);
18141 :
18142 : /* Account for segment prefix for non-default addr spaces. */
18143 258316774 : if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
18144 783498 : len++;
18145 :
18146 244491163 : return len;
18147 : }
18148 : }
18149 : return 0;
18150 : }
18151 :
18152 : /* Compute default value for "length_vex" attribute. It includes
18153 : 2 or 3 byte VEX prefix and 1 opcode byte. */
18154 :
18155 : int
18156 5101876 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
18157 : bool has_vex_w)
18158 : {
18159 5101876 : int i, reg_only = 2 + 1;
18160 5101876 : bool has_mem = false;
18161 :
18162 : /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18163 : byte VEX prefix. */
18164 5101876 : if (!has_0f_opcode || has_vex_w)
18165 : return 3 + 1;
18166 :
18167 : /* We can always use 2 byte VEX prefix in 32bit. */
18168 4648019 : if (!TARGET_64BIT)
18169 : return 2 + 1;
18170 :
18171 3545275 : extract_insn_cached (insn);
18172 :
18173 11034714 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18174 7829944 : if (REG_P (recog_data.operand[i]))
18175 : {
18176 : /* REX.W bit uses 3 byte VEX prefix.
18177 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18178 5134689 : if (GET_MODE (recog_data.operand[i]) == DImode
18179 5134689 : && GENERAL_REG_P (recog_data.operand[i]))
18180 : return 3 + 1;
18181 :
18182 : /* REX.B bit requires 3-byte VEX. Right here we don't know which
18183 : operand will be encoded using VEX.B, so be conservative.
18184 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18185 5122762 : if (REX_INT_REGNO_P (recog_data.operand[i])
18186 5122762 : || REX2_INT_REGNO_P (recog_data.operand[i])
18187 5122762 : || REX_SSE_REGNO_P (recog_data.operand[i]))
18188 0 : reg_only = 3 + 1;
18189 : }
18190 2695255 : else if (MEM_P (recog_data.operand[i]))
18191 : {
18192 : /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
18193 2091210 : if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
18194 : return 4;
18195 :
18196 : /* REX.X or REX.B bits use 3 byte VEX prefix. */
18197 2090956 : if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
18198 : return 3 + 1;
18199 :
18200 : has_mem = true;
18201 : }
18202 :
18203 3204770 : return has_mem ? 2 + 1 : reg_only;
18204 : }
18205 :
18206 :
18207 : static bool
18208 : ix86_class_likely_spilled_p (reg_class_t);
18209 :
18210 : /* Returns true if lhs of insn is HW function argument register and set up
18211 : is_spilled to true if it is likely spilled HW register. */
18212 : static bool
18213 1145 : insn_is_function_arg (rtx insn, bool* is_spilled)
18214 : {
18215 1145 : rtx dst;
18216 :
18217 1145 : if (!NONDEBUG_INSN_P (insn))
18218 : return false;
18219 : /* Call instructions are not movable, ignore it. */
18220 1145 : if (CALL_P (insn))
18221 : return false;
18222 1071 : insn = PATTERN (insn);
18223 1071 : if (GET_CODE (insn) == PARALLEL)
18224 73 : insn = XVECEXP (insn, 0, 0);
18225 1071 : if (GET_CODE (insn) != SET)
18226 : return false;
18227 1071 : dst = SET_DEST (insn);
18228 975 : if (REG_P (dst) && HARD_REGISTER_P (dst)
18229 1940 : && ix86_function_arg_regno_p (REGNO (dst)))
18230 : {
18231 : /* Is it likely spilled HW register? */
18232 869 : if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
18233 869 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
18234 825 : *is_spilled = true;
18235 869 : return true;
18236 : }
18237 : return false;
18238 : }
18239 :
18240 : /* Add output dependencies for chain of function adjacent arguments if only
18241 : there is a move to likely spilled HW register. Return first argument
18242 : if at least one dependence was added or NULL otherwise. */
18243 : static rtx_insn *
18244 414 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
18245 : {
18246 414 : rtx_insn *insn;
18247 414 : rtx_insn *last = call;
18248 414 : rtx_insn *first_arg = NULL;
18249 414 : bool is_spilled = false;
18250 :
18251 414 : head = PREV_INSN (head);
18252 :
18253 : /* Find nearest to call argument passing instruction. */
18254 414 : while (true)
18255 : {
18256 414 : last = PREV_INSN (last);
18257 414 : if (last == head)
18258 : return NULL;
18259 414 : if (!NONDEBUG_INSN_P (last))
18260 0 : continue;
18261 414 : if (insn_is_function_arg (last, &is_spilled))
18262 : break;
18263 : return NULL;
18264 : }
18265 :
18266 : first_arg = last;
18267 1050 : while (true)
18268 : {
18269 1050 : insn = PREV_INSN (last);
18270 1050 : if (!INSN_P (insn))
18271 : break;
18272 953 : if (insn == head)
18273 : break;
18274 912 : if (!NONDEBUG_INSN_P (insn))
18275 : {
18276 181 : last = insn;
18277 181 : continue;
18278 : }
18279 731 : if (insn_is_function_arg (insn, &is_spilled))
18280 : {
18281 : /* Add output depdendence between two function arguments if chain
18282 : of output arguments contains likely spilled HW registers. */
18283 463 : if (is_spilled)
18284 463 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18285 : first_arg = last = insn;
18286 : }
18287 : else
18288 : break;
18289 : }
18290 406 : if (!is_spilled)
18291 : return NULL;
18292 : return first_arg;
18293 : }
18294 :
18295 : /* Add output or anti dependency from insn to first_arg to restrict its code
18296 : motion. */
18297 : static void
18298 2335 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
18299 : {
18300 2335 : rtx set;
18301 2335 : rtx tmp;
18302 :
18303 2335 : set = single_set (insn);
18304 2335 : if (!set)
18305 : return;
18306 1453 : tmp = SET_DEST (set);
18307 1453 : if (REG_P (tmp))
18308 : {
18309 : /* Add output dependency to the first function argument. */
18310 1258 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18311 1258 : return;
18312 : }
18313 : /* Add anti dependency. */
18314 195 : add_dependence (first_arg, insn, REG_DEP_ANTI);
18315 : }
18316 :
18317 : /* Avoid cross block motion of function argument through adding dependency
18318 : from the first non-jump instruction in bb. */
18319 : static void
18320 68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
18321 : {
18322 68 : rtx_insn *insn = BB_END (bb);
18323 :
18324 134 : while (insn)
18325 : {
18326 134 : if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
18327 : {
18328 67 : rtx set = single_set (insn);
18329 67 : if (set)
18330 : {
18331 67 : avoid_func_arg_motion (arg, insn);
18332 67 : return;
18333 : }
18334 : }
18335 67 : if (insn == BB_HEAD (bb))
18336 : return;
18337 66 : insn = PREV_INSN (insn);
18338 : }
18339 : }
18340 :
18341 : /* Hook for pre-reload schedule - avoid motion of function arguments
18342 : passed in likely spilled HW registers. */
18343 : static void
18344 10297998 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18345 : {
18346 10297998 : rtx_insn *insn;
18347 10297998 : rtx_insn *first_arg = NULL;
18348 10297998 : if (reload_completed)
18349 : return;
18350 1579 : while (head != tail && DEBUG_INSN_P (head))
18351 346 : head = NEXT_INSN (head);
18352 10663 : for (insn = tail; insn != head; insn = PREV_INSN (insn))
18353 9565 : if (INSN_P (insn) && CALL_P (insn))
18354 : {
18355 414 : first_arg = add_parameter_dependencies (insn, head);
18356 414 : if (first_arg)
18357 : {
18358 : /* Add dependee for first argument to predecessors if only
18359 : region contains more than one block. */
18360 406 : basic_block bb = BLOCK_FOR_INSN (insn);
18361 406 : int rgn = CONTAINING_RGN (bb->index);
18362 406 : int nr_blks = RGN_NR_BLOCKS (rgn);
18363 : /* Skip trivial regions and region head blocks that can have
18364 : predecessors outside of region. */
18365 406 : if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18366 : {
18367 67 : edge e;
18368 67 : edge_iterator ei;
18369 :
18370 : /* Regions are SCCs with the exception of selective
18371 : scheduling with pipelining of outer blocks enabled.
18372 : So also check that immediate predecessors of a non-head
18373 : block are in the same region. */
18374 137 : FOR_EACH_EDGE (e, ei, bb->preds)
18375 : {
18376 : /* Avoid creating of loop-carried dependencies through
18377 : using topological ordering in the region. */
18378 70 : if (rgn == CONTAINING_RGN (e->src->index)
18379 69 : && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18380 68 : add_dependee_for_func_arg (first_arg, e->src);
18381 : }
18382 : }
18383 406 : insn = first_arg;
18384 406 : if (insn == head)
18385 : break;
18386 : }
18387 : }
18388 9151 : else if (first_arg)
18389 2268 : avoid_func_arg_motion (first_arg, insn);
18390 : }
18391 :
18392 : /* Hook for pre-reload schedule - set priority of moves from likely spilled
18393 : HW registers to maximum, to schedule them at soon as possible. These are
18394 : moves from function argument registers at the top of the function entry
18395 : and moves from function return value registers after call. */
18396 : static int
18397 108717909 : ix86_adjust_priority (rtx_insn *insn, int priority)
18398 : {
18399 108717909 : rtx set;
18400 :
18401 108717909 : if (reload_completed)
18402 : return priority;
18403 :
18404 14043 : if (!NONDEBUG_INSN_P (insn))
18405 : return priority;
18406 :
18407 12477 : set = single_set (insn);
18408 12477 : if (set)
18409 : {
18410 11905 : rtx tmp = SET_SRC (set);
18411 11905 : if (REG_P (tmp)
18412 2530 : && HARD_REGISTER_P (tmp)
18413 499 : && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18414 11905 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18415 448 : return current_sched_info->sched_max_insns_priority;
18416 : }
18417 :
18418 : return priority;
18419 : }
18420 :
18421 : /* Prepare for scheduling pass. */
18422 : static void
18423 965986 : ix86_sched_init_global (FILE *, int, int)
18424 : {
18425 : /* Install scheduling hooks for current CPU. Some of these hooks are used
18426 : in time-critical parts of the scheduler, so we only set them up when
18427 : they are actually used. */
18428 965986 : switch (ix86_tune)
18429 : {
18430 919451 : case PROCESSOR_CORE2:
18431 919451 : case PROCESSOR_NEHALEM:
18432 919451 : case PROCESSOR_SANDYBRIDGE:
18433 919451 : case PROCESSOR_HASWELL:
18434 919451 : case PROCESSOR_TREMONT:
18435 919451 : case PROCESSOR_ALDERLAKE:
18436 919451 : case PROCESSOR_GENERIC:
18437 : /* Do not perform multipass scheduling for pre-reload schedule
18438 : to save compile time. */
18439 919451 : if (reload_completed)
18440 : {
18441 918964 : ix86_core2i7_init_hooks ();
18442 918964 : break;
18443 : }
18444 : /* Fall through. */
18445 47022 : default:
18446 47022 : targetm.sched.dfa_post_advance_cycle = NULL;
18447 47022 : targetm.sched.first_cycle_multipass_init = NULL;
18448 47022 : targetm.sched.first_cycle_multipass_begin = NULL;
18449 47022 : targetm.sched.first_cycle_multipass_issue = NULL;
18450 47022 : targetm.sched.first_cycle_multipass_backtrack = NULL;
18451 47022 : targetm.sched.first_cycle_multipass_end = NULL;
18452 47022 : targetm.sched.first_cycle_multipass_fini = NULL;
18453 47022 : break;
18454 : }
18455 965986 : }
18456 :
18457 :
18458 : /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18459 :
18460 : static HOST_WIDE_INT
18461 721342 : ix86_static_rtx_alignment (machine_mode mode)
18462 : {
18463 721342 : if (mode == DFmode)
18464 : return 64;
18465 : if (ALIGN_MODE_128 (mode))
18466 156579 : return MAX (128, GET_MODE_ALIGNMENT (mode));
18467 479968 : return GET_MODE_ALIGNMENT (mode);
18468 : }
18469 :
18470 : /* Implement TARGET_CONSTANT_ALIGNMENT. */
18471 :
18472 : static HOST_WIDE_INT
18473 6871682 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18474 : {
18475 6871682 : if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18476 : || TREE_CODE (exp) == INTEGER_CST)
18477 : {
18478 366240 : machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18479 366240 : HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18480 366240 : return MAX (mode_align, align);
18481 : }
18482 6364192 : else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18483 9612932 : && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18484 : return BITS_PER_WORD;
18485 :
18486 : return align;
18487 : }
18488 :
18489 : /* Implement TARGET_EMPTY_RECORD_P. */
18490 :
18491 : static bool
18492 1442610527 : ix86_is_empty_record (const_tree type)
18493 : {
18494 1442610527 : if (!TARGET_64BIT)
18495 : return false;
18496 1411804127 : return default_is_empty_record (type);
18497 : }
18498 :
18499 : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18500 :
18501 : static void
18502 15182377 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18503 : {
18504 15182377 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
18505 :
18506 15182377 : if (!cum->warn_empty)
18507 : return;
18508 :
18509 12997267 : if (!TYPE_EMPTY_P (type))
18510 : return;
18511 :
18512 : /* Don't warn if the function isn't visible outside of the TU. */
18513 14649 : if (cum->decl && !TREE_PUBLIC (cum->decl))
18514 : return;
18515 :
18516 13189 : tree decl = cum->decl;
18517 13189 : if (!decl)
18518 : /* If we don't know the target, look at the current TU. */
18519 39 : decl = current_function_decl;
18520 :
18521 13189 : const_tree ctx = get_ultimate_context (decl);
18522 13189 : if (ctx == NULL_TREE
18523 26344 : || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18524 : return;
18525 :
18526 : /* If the actual size of the type is zero, then there is no change
18527 : in how objects of this size are passed. */
18528 72 : if (int_size_in_bytes (type) == 0)
18529 : return;
18530 :
18531 66 : warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18532 : "changes in %<-fabi-version=12%> (GCC 8)", type);
18533 :
18534 : /* Only warn once. */
18535 66 : cum->warn_empty = false;
18536 : }
18537 :
18538 : /* This hook returns name of multilib ABI. */
18539 :
18540 : static const char *
18541 3393039 : ix86_get_multilib_abi_name (void)
18542 : {
18543 3393039 : if (!(TARGET_64BIT_P (ix86_isa_flags)))
18544 : return "i386";
18545 3349083 : else if (TARGET_X32_P (ix86_isa_flags))
18546 : return "x32";
18547 : else
18548 3349083 : return "x86_64";
18549 : }
18550 :
18551 : /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18552 : the data type, and ALIGN is the alignment that the object would
18553 : ordinarily have. */
18554 :
18555 : static int
18556 0 : iamcu_alignment (tree type, int align)
18557 : {
18558 0 : machine_mode mode;
18559 :
18560 0 : if (align < 32 || TYPE_USER_ALIGN (type))
18561 : return align;
18562 :
18563 : /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18564 : bytes. */
18565 0 : type = strip_array_types (type);
18566 0 : if (TYPE_ATOMIC (type))
18567 : return align;
18568 :
18569 0 : mode = TYPE_MODE (type);
18570 0 : switch (GET_MODE_CLASS (mode))
18571 : {
18572 : case MODE_INT:
18573 : case MODE_COMPLEX_INT:
18574 : case MODE_COMPLEX_FLOAT:
18575 : case MODE_FLOAT:
18576 : case MODE_DECIMAL_FLOAT:
18577 : return 32;
18578 : default:
18579 : return align;
18580 : }
18581 : }
18582 :
18583 : /* Compute the alignment for a static variable.
18584 : TYPE is the data type, and ALIGN is the alignment that
18585 : the object would ordinarily have. The value of this function is used
18586 : instead of that alignment to align the object. */
18587 :
18588 : int
18589 12017904 : ix86_data_alignment (tree type, unsigned int align, bool opt)
18590 : {
18591 : /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18592 : for symbols from other compilation units or symbols that don't need
18593 : to bind locally. In order to preserve some ABI compatibility with
18594 : those compilers, ensure we don't decrease alignment from what we
18595 : used to assume. */
18596 :
18597 12017904 : unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18598 :
18599 : /* A data structure, equal or greater than the size of a cache line
18600 : (64 bytes in the Pentium 4 and other recent Intel processors, including
18601 : processors based on Intel Core microarchitecture) should be aligned
18602 : so that its base address is a multiple of a cache line size. */
18603 :
18604 24035808 : unsigned int max_align
18605 12017904 : = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18606 :
18607 14600522 : if (max_align < BITS_PER_WORD)
18608 0 : max_align = BITS_PER_WORD;
18609 :
18610 12017904 : switch (ix86_align_data_type)
18611 : {
18612 12017904 : case ix86_align_data_type_abi: opt = false; break;
18613 12017884 : case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18614 : case ix86_align_data_type_cacheline: break;
18615 : }
18616 :
18617 12017904 : if (TARGET_IAMCU)
18618 0 : align = iamcu_alignment (type, align);
18619 :
18620 12017904 : if (opt
18621 5782865 : && AGGREGATE_TYPE_P (type)
18622 3706077 : && TYPE_SIZE (type)
18623 15723929 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18624 : {
18625 6711719 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18626 3706025 : && align < max_align_compat)
18627 700331 : align = max_align_compat;
18628 7349420 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18629 3706025 : && align < max_align)
18630 62630 : align = max_align;
18631 : }
18632 :
18633 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18634 : to 16byte boundary. */
18635 12017904 : if (TARGET_64BIT)
18636 : {
18637 4942981 : if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18638 3255550 : && TYPE_SIZE (type)
18639 3255488 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18640 10866782 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18641 11479870 : && align < 128)
18642 613088 : return 128;
18643 : }
18644 :
18645 11404816 : if (!opt)
18646 6040486 : return align;
18647 :
18648 5364330 : if (TREE_CODE (type) == ARRAY_TYPE)
18649 : {
18650 1099464 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18651 : return 64;
18652 1099464 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18653 : return 128;
18654 : }
18655 4264866 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18656 : {
18657 :
18658 12969 : if (TYPE_MODE (type) == DCmode && align < 64)
18659 : return 64;
18660 12969 : if ((TYPE_MODE (type) == XCmode
18661 12969 : || TYPE_MODE (type) == TCmode) && align < 128)
18662 : return 128;
18663 : }
18664 4251897 : else if (RECORD_OR_UNION_TYPE_P (type)
18665 4251897 : && TYPE_FIELDS (type))
18666 : {
18667 2186323 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18668 : return 64;
18669 2186323 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18670 : return 128;
18671 : }
18672 2065574 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18673 : || TREE_CODE (type) == INTEGER_TYPE)
18674 : {
18675 1918063 : if (TYPE_MODE (type) == DFmode && align < 64)
18676 : return 64;
18677 1918063 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18678 : return 128;
18679 : }
18680 :
18681 5364217 : return align;
18682 : }
18683 :
18684 : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18685 : static void
18686 31460724 : ix86_lower_local_decl_alignment (tree decl)
18687 : {
18688 31460724 : unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18689 31460724 : DECL_ALIGN (decl), true);
18690 31460724 : if (new_align < DECL_ALIGN (decl))
18691 0 : SET_DECL_ALIGN (decl, new_align);
18692 31460724 : }
18693 :
18694 : /* Compute the alignment for a local variable or a stack slot. EXP is
18695 : the data type or decl itself, MODE is the widest mode available and
18696 : ALIGN is the alignment that the object would ordinarily have. The
18697 : value of this macro is used instead of that alignment to align the
18698 : object. */
18699 :
18700 : unsigned int
18701 48740137 : ix86_local_alignment (tree exp, machine_mode mode,
18702 : unsigned int align, bool may_lower)
18703 : {
18704 48740137 : tree type, decl;
18705 :
18706 48740137 : if (exp && DECL_P (exp))
18707 : {
18708 46578009 : type = TREE_TYPE (exp);
18709 46578009 : decl = exp;
18710 : }
18711 : else
18712 : {
18713 : type = exp;
18714 : decl = NULL;
18715 : }
18716 :
18717 : /* Don't do dynamic stack realignment for long long objects with
18718 : -mpreferred-stack-boundary=2. */
18719 48740137 : if (may_lower
18720 31460724 : && !TARGET_64BIT
18721 248581 : && align == 64
18722 38958 : && ix86_preferred_stack_boundary < 64
18723 0 : && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18724 0 : && (!type || (!TYPE_USER_ALIGN (type)
18725 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18726 48740137 : && (!decl || !DECL_USER_ALIGN (decl)))
18727 : align = 32;
18728 :
18729 : /* If TYPE is NULL, we are allocating a stack slot for caller-save
18730 : register in MODE. We will return the largest alignment of XF
18731 : and DF. */
18732 48740137 : if (!type)
18733 : {
18734 1415746 : if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18735 1476 : align = GET_MODE_ALIGNMENT (DFmode);
18736 1415746 : return align;
18737 : }
18738 :
18739 : /* Don't increase alignment for Intel MCU psABI. */
18740 47324391 : if (TARGET_IAMCU)
18741 : return align;
18742 :
18743 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18744 : to 16byte boundary. Exact wording is:
18745 :
18746 : An array uses the same alignment as its elements, except that a local or
18747 : global array variable of length at least 16 bytes or
18748 : a C99 variable-length array variable always has alignment of at least 16 bytes.
18749 :
18750 : This was added to allow use of aligned SSE instructions at arrays. This
18751 : rule is meant for static storage (where compiler cannot do the analysis
18752 : by itself). We follow it for automatic variables only when convenient.
18753 : We fully control everything in the function compiled and functions from
18754 : other unit cannot rely on the alignment.
18755 :
18756 : Exclude va_list type. It is the common case of local array where
18757 : we cannot benefit from the alignment.
18758 :
18759 : TODO: Probably one should optimize for size only when var is not escaping. */
18760 44489600 : if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18761 91461311 : && TARGET_SSE)
18762 : {
18763 44097249 : if (AGGREGATE_TYPE_P (type)
18764 9005276 : && (va_list_type_node == NULL_TREE
18765 9005276 : || (TYPE_MAIN_VARIANT (type)
18766 9005276 : != TYPE_MAIN_VARIANT (va_list_type_node)))
18767 8906563 : && TYPE_SIZE (type)
18768 8906563 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18769 45157376 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18770 50539179 : && align < 128)
18771 5381803 : return 128;
18772 : }
18773 41942588 : if (TREE_CODE (type) == ARRAY_TYPE)
18774 : {
18775 791022 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18776 : return 64;
18777 791022 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18778 : return 128;
18779 : }
18780 41151566 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18781 : {
18782 154234 : if (TYPE_MODE (type) == DCmode && align < 64)
18783 : return 64;
18784 154234 : if ((TYPE_MODE (type) == XCmode
18785 154234 : || TYPE_MODE (type) == TCmode) && align < 128)
18786 : return 128;
18787 : }
18788 40997332 : else if (RECORD_OR_UNION_TYPE_P (type)
18789 40997332 : && TYPE_FIELDS (type))
18790 : {
18791 4803261 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18792 : return 64;
18793 4800156 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18794 : return 128;
18795 : }
18796 36194071 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18797 : || TREE_CODE (type) == INTEGER_TYPE)
18798 : {
18799 :
18800 29720340 : if (TYPE_MODE (type) == DFmode && align < 64)
18801 : return 64;
18802 29720340 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18803 : return 128;
18804 : }
18805 : return align;
18806 : }
18807 :
18808 : /* Compute the minimum required alignment for dynamic stack realignment
18809 : purposes for a local variable, parameter or a stack slot. EXP is
18810 : the data type or decl itself, MODE is its mode and ALIGN is the
18811 : alignment that the object would ordinarily have. */
18812 :
18813 : unsigned int
18814 47686965 : ix86_minimum_alignment (tree exp, machine_mode mode,
18815 : unsigned int align)
18816 : {
18817 47686965 : tree type, decl;
18818 :
18819 47686965 : if (exp && DECL_P (exp))
18820 : {
18821 14966988 : type = TREE_TYPE (exp);
18822 14966988 : decl = exp;
18823 : }
18824 : else
18825 : {
18826 : type = exp;
18827 : decl = NULL;
18828 : }
18829 :
18830 47686965 : if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18831 : return align;
18832 :
18833 : /* Don't do dynamic stack realignment for long long objects with
18834 : -mpreferred-stack-boundary=2. */
18835 0 : if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18836 0 : && (!type || (!TYPE_USER_ALIGN (type)
18837 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18838 0 : && (!decl || !DECL_USER_ALIGN (decl)))
18839 : {
18840 0 : gcc_checking_assert (!TARGET_STV);
18841 : return 32;
18842 : }
18843 :
18844 : return align;
18845 : }
18846 :
18847 : /* Find a location for the static chain incoming to a nested function.
18848 : This is a register, unless all free registers are used by arguments. */
18849 :
18850 : static rtx
18851 269378 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18852 : {
18853 269378 : unsigned regno;
18854 :
18855 269378 : if (TARGET_64BIT)
18856 : {
18857 : /* We always use R10 in 64-bit mode. */
18858 : regno = R10_REG;
18859 : }
18860 : else
18861 : {
18862 88535 : const_tree fntype, fndecl;
18863 88535 : unsigned int ccvt;
18864 :
18865 : /* By default in 32-bit mode we use ECX to pass the static chain. */
18866 88535 : regno = CX_REG;
18867 :
18868 88535 : if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18869 : {
18870 78559 : fntype = TREE_TYPE (fndecl_or_type);
18871 78559 : fndecl = fndecl_or_type;
18872 : }
18873 : else
18874 : {
18875 : fntype = fndecl_or_type;
18876 : fndecl = NULL;
18877 : }
18878 :
18879 88535 : ccvt = ix86_get_callcvt (fntype);
18880 88535 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18881 : {
18882 : /* Fastcall functions use ecx/edx for arguments, which leaves
18883 : us with EAX for the static chain.
18884 : Thiscall functions use ecx for arguments, which also
18885 : leaves us with EAX for the static chain. */
18886 : regno = AX_REG;
18887 : }
18888 88535 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18889 : {
18890 : /* Thiscall functions use ecx for arguments, which leaves
18891 : us with EAX and EDX for the static chain.
18892 : We are using for abi-compatibility EAX. */
18893 : regno = AX_REG;
18894 : }
18895 88535 : else if (ix86_function_regparm (fntype, fndecl) == 3)
18896 : {
18897 : /* For regparm 3, we have no free call-clobbered registers in
18898 : which to store the static chain. In order to implement this,
18899 : we have the trampoline push the static chain to the stack.
18900 : However, we can't push a value below the return address when
18901 : we call the nested function directly, so we have to use an
18902 : alternate entry point. For this we use ESI, and have the
18903 : alternate entry point push ESI, so that things appear the
18904 : same once we're executing the nested function. */
18905 0 : if (incoming_p)
18906 : {
18907 0 : if (fndecl == current_function_decl
18908 0 : && !ix86_static_chain_on_stack)
18909 : {
18910 0 : gcc_assert (!reload_completed);
18911 0 : ix86_static_chain_on_stack = true;
18912 : }
18913 0 : return gen_frame_mem (SImode,
18914 0 : plus_constant (Pmode,
18915 : arg_pointer_rtx, -8));
18916 : }
18917 : regno = SI_REG;
18918 : }
18919 : }
18920 :
18921 357926 : return gen_rtx_REG (Pmode, regno);
18922 : }
18923 :
18924 : /* Emit RTL insns to initialize the variable parts of a trampoline.
18925 : FNDECL is the decl of the target address; M_TRAMP is a MEM for
18926 : the trampoline, and CHAIN_VALUE is an RTX for the static chain
18927 : to be passed to the target function. */
18928 :
18929 : static void
18930 296 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18931 : {
18932 296 : rtx mem, fnaddr;
18933 296 : int opcode;
18934 296 : int offset = 0;
18935 296 : bool need_endbr = (flag_cf_protection & CF_BRANCH);
18936 :
18937 296 : fnaddr = XEXP (DECL_RTL (fndecl), 0);
18938 :
18939 296 : if (TARGET_64BIT)
18940 : {
18941 296 : int size;
18942 :
18943 296 : if (need_endbr)
18944 : {
18945 : /* Insert ENDBR64. */
18946 1 : mem = adjust_address (m_tramp, SImode, offset);
18947 1 : emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18948 1 : offset += 4;
18949 : }
18950 :
18951 : /* Load the function address to r11. Try to load address using
18952 : the shorter movl instead of movabs. We may want to support
18953 : movq for kernel mode, but kernel does not use trampolines at
18954 : the moment. FNADDR is a 32bit address and may not be in
18955 : DImode when ptr_mode == SImode. Always use movl in this
18956 : case. */
18957 296 : if (ptr_mode == SImode
18958 296 : || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18959 : {
18960 264 : fnaddr = copy_addr_to_reg (fnaddr);
18961 :
18962 264 : mem = adjust_address (m_tramp, HImode, offset);
18963 264 : emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18964 :
18965 264 : mem = adjust_address (m_tramp, SImode, offset + 2);
18966 264 : emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18967 264 : offset += 6;
18968 : }
18969 : else
18970 : {
18971 32 : mem = adjust_address (m_tramp, HImode, offset);
18972 32 : emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18973 :
18974 32 : mem = adjust_address (m_tramp, DImode, offset + 2);
18975 32 : emit_move_insn (mem, fnaddr);
18976 32 : offset += 10;
18977 : }
18978 :
18979 : /* Load static chain using movabs to r10. Use the shorter movl
18980 : instead of movabs when ptr_mode == SImode. */
18981 296 : if (ptr_mode == SImode)
18982 : {
18983 : opcode = 0xba41;
18984 : size = 6;
18985 : }
18986 : else
18987 : {
18988 296 : opcode = 0xba49;
18989 296 : size = 10;
18990 : }
18991 :
18992 296 : mem = adjust_address (m_tramp, HImode, offset);
18993 296 : emit_move_insn (mem, gen_int_mode (opcode, HImode));
18994 :
18995 296 : mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18996 296 : emit_move_insn (mem, chain_value);
18997 296 : offset += size;
18998 :
18999 : /* Jump to r11; the last (unused) byte is a nop, only there to
19000 : pad the write out to a single 32-bit store. */
19001 296 : mem = adjust_address (m_tramp, SImode, offset);
19002 296 : emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
19003 296 : offset += 4;
19004 : }
19005 : else
19006 : {
19007 0 : rtx disp, chain;
19008 :
19009 : /* Depending on the static chain location, either load a register
19010 : with a constant, or push the constant to the stack. All of the
19011 : instructions are the same size. */
19012 0 : chain = ix86_static_chain (fndecl, true);
19013 0 : if (REG_P (chain))
19014 : {
19015 0 : switch (REGNO (chain))
19016 : {
19017 : case AX_REG:
19018 : opcode = 0xb8; break;
19019 0 : case CX_REG:
19020 0 : opcode = 0xb9; break;
19021 0 : default:
19022 0 : gcc_unreachable ();
19023 : }
19024 : }
19025 : else
19026 : opcode = 0x68;
19027 :
19028 0 : if (need_endbr)
19029 : {
19030 : /* Insert ENDBR32. */
19031 0 : mem = adjust_address (m_tramp, SImode, offset);
19032 0 : emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
19033 0 : offset += 4;
19034 : }
19035 :
19036 0 : mem = adjust_address (m_tramp, QImode, offset);
19037 0 : emit_move_insn (mem, gen_int_mode (opcode, QImode));
19038 :
19039 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19040 0 : emit_move_insn (mem, chain_value);
19041 0 : offset += 5;
19042 :
19043 0 : mem = adjust_address (m_tramp, QImode, offset);
19044 0 : emit_move_insn (mem, gen_int_mode (0xe9, QImode));
19045 :
19046 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19047 :
19048 : /* Compute offset from the end of the jmp to the target function.
19049 : In the case in which the trampoline stores the static chain on
19050 : the stack, we need to skip the first insn which pushes the
19051 : (call-saved) register static chain; this push is 1 byte. */
19052 0 : offset += 5;
19053 0 : int skip = MEM_P (chain) ? 1 : 0;
19054 : /* Skip ENDBR32 at the entry of the target function. */
19055 0 : if (need_endbr
19056 0 : && !cgraph_node::get (fndecl)->only_called_directly_p ())
19057 0 : skip += 4;
19058 0 : disp = expand_binop (SImode, sub_optab, fnaddr,
19059 0 : plus_constant (Pmode, XEXP (m_tramp, 0),
19060 0 : offset - skip),
19061 : NULL_RTX, 1, OPTAB_DIRECT);
19062 0 : emit_move_insn (mem, disp);
19063 : }
19064 :
19065 296 : gcc_assert (offset <= TRAMPOLINE_SIZE);
19066 :
19067 : #ifdef HAVE_ENABLE_EXECUTE_STACK
19068 : #ifdef CHECK_EXECUTE_STACK_ENABLED
19069 : if (CHECK_EXECUTE_STACK_ENABLED)
19070 : #endif
19071 : emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19072 : LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
19073 : #endif
19074 296 : }
19075 :
19076 : static bool
19077 53790351 : ix86_allocate_stack_slots_for_args (void)
19078 : {
19079 : /* Naked functions should not allocate stack slots for arguments. */
19080 53790351 : return !ix86_function_naked (current_function_decl);
19081 : }
19082 :
19083 : static bool
19084 38102474 : ix86_warn_func_return (tree decl)
19085 : {
19086 : /* Naked functions are implemented entirely in assembly, including the
19087 : return sequence, so suppress warnings about this. */
19088 38102474 : return !ix86_function_naked (decl);
19089 : }
19090 :
19091 : /* Return the shift count of a vector by scalar shift builtin second argument
19092 : ARG1. */
19093 : static tree
19094 14142 : ix86_vector_shift_count (tree arg1)
19095 : {
19096 14142 : if (tree_fits_uhwi_p (arg1))
19097 : return arg1;
19098 8316 : else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
19099 : {
19100 : /* The count argument is weird, passed in as various 128-bit
19101 : (or 64-bit) vectors, the low 64 bits from it are the count. */
19102 162 : unsigned char buf[16];
19103 162 : int len = native_encode_expr (arg1, buf, 16);
19104 162 : if (len == 0)
19105 162 : return NULL_TREE;
19106 162 : tree t = native_interpret_expr (uint64_type_node, buf, len);
19107 162 : if (t && tree_fits_uhwi_p (t))
19108 : return t;
19109 : }
19110 : return NULL_TREE;
19111 : }
19112 :
19113 : /* Return true if arg_mask is all ones, ELEMS is elements number of
19114 : corresponding vector. */
19115 : static bool
19116 25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
19117 : {
19118 25042 : if (TREE_CODE (arg_mask) != INTEGER_CST)
19119 : return false;
19120 :
19121 7462 : unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
19122 7462 : if (elems == HOST_BITS_PER_WIDE_INT)
19123 33 : return mask == HOST_WIDE_INT_M1U;
19124 7429 : if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
19125 2681 : return false;
19126 :
19127 : return true;
19128 : }
19129 :
19130 : static tree
19131 68159638 : ix86_fold_builtin (tree fndecl, int n_args,
19132 : tree *args, bool ignore ATTRIBUTE_UNUSED)
19133 : {
19134 68159638 : if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
19135 : {
19136 68159638 : enum ix86_builtins fn_code
19137 68159638 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19138 68159638 : enum rtx_code rcode;
19139 68159638 : bool is_vshift;
19140 68159638 : enum tree_code tcode;
19141 68159638 : bool is_scalar;
19142 68159638 : unsigned HOST_WIDE_INT mask;
19143 :
19144 68159638 : switch (fn_code)
19145 : {
19146 8764 : case IX86_BUILTIN_CPU_IS:
19147 8764 : case IX86_BUILTIN_CPU_SUPPORTS:
19148 8764 : gcc_assert (n_args == 1);
19149 8764 : return fold_builtin_cpu (fndecl, args);
19150 :
19151 24861 : case IX86_BUILTIN_NANQ:
19152 24861 : case IX86_BUILTIN_NANSQ:
19153 24861 : {
19154 24861 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19155 24861 : const char *str = c_getstr (*args);
19156 24861 : int quiet = fn_code == IX86_BUILTIN_NANQ;
19157 24861 : REAL_VALUE_TYPE real;
19158 :
19159 24861 : if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
19160 24861 : return build_real (type, real);
19161 0 : return NULL_TREE;
19162 : }
19163 :
19164 108 : case IX86_BUILTIN_INFQ:
19165 108 : case IX86_BUILTIN_HUGE_VALQ:
19166 108 : {
19167 108 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19168 108 : REAL_VALUE_TYPE inf;
19169 108 : real_inf (&inf);
19170 108 : return build_real (type, inf);
19171 : }
19172 :
19173 62447 : case IX86_BUILTIN_TZCNT16:
19174 62447 : case IX86_BUILTIN_CTZS:
19175 62447 : case IX86_BUILTIN_TZCNT32:
19176 62447 : case IX86_BUILTIN_TZCNT64:
19177 62447 : gcc_assert (n_args == 1);
19178 62447 : if (TREE_CODE (args[0]) == INTEGER_CST)
19179 : {
19180 45 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19181 45 : tree arg = args[0];
19182 45 : if (fn_code == IX86_BUILTIN_TZCNT16
19183 45 : || fn_code == IX86_BUILTIN_CTZS)
19184 3 : arg = fold_convert (short_unsigned_type_node, arg);
19185 45 : if (integer_zerop (arg))
19186 6 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19187 : else
19188 39 : return fold_const_call (CFN_CTZ, type, arg);
19189 : }
19190 : break;
19191 :
19192 51998 : case IX86_BUILTIN_LZCNT16:
19193 51998 : case IX86_BUILTIN_CLZS:
19194 51998 : case IX86_BUILTIN_LZCNT32:
19195 51998 : case IX86_BUILTIN_LZCNT64:
19196 51998 : gcc_assert (n_args == 1);
19197 51998 : if (TREE_CODE (args[0]) == INTEGER_CST)
19198 : {
19199 54 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19200 54 : tree arg = args[0];
19201 54 : if (fn_code == IX86_BUILTIN_LZCNT16
19202 54 : || fn_code == IX86_BUILTIN_CLZS)
19203 18 : arg = fold_convert (short_unsigned_type_node, arg);
19204 54 : if (integer_zerop (arg))
19205 3 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19206 : else
19207 51 : return fold_const_call (CFN_CLZ, type, arg);
19208 : }
19209 : break;
19210 :
19211 61231 : case IX86_BUILTIN_BEXTR32:
19212 61231 : case IX86_BUILTIN_BEXTR64:
19213 61231 : case IX86_BUILTIN_BEXTRI32:
19214 61231 : case IX86_BUILTIN_BEXTRI64:
19215 61231 : gcc_assert (n_args == 2);
19216 61231 : if (tree_fits_uhwi_p (args[1]))
19217 : {
19218 152 : unsigned HOST_WIDE_INT res = 0;
19219 152 : unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
19220 152 : unsigned int start = tree_to_uhwi (args[1]);
19221 152 : unsigned int len = (start & 0xff00) >> 8;
19222 152 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19223 152 : start &= 0xff;
19224 152 : if (start >= prec || len == 0)
19225 111 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19226 : args[0]);
19227 41 : else if (!tree_fits_uhwi_p (args[0]))
19228 : break;
19229 : else
19230 24 : res = tree_to_uhwi (args[0]) >> start;
19231 24 : if (len > prec)
19232 : len = prec;
19233 24 : if (len < HOST_BITS_PER_WIDE_INT)
19234 15 : res &= (HOST_WIDE_INT_1U << len) - 1;
19235 24 : return build_int_cstu (lhs_type, res);
19236 : }
19237 : break;
19238 :
19239 21034 : case IX86_BUILTIN_BZHI32:
19240 21034 : case IX86_BUILTIN_BZHI64:
19241 21034 : gcc_assert (n_args == 2);
19242 21034 : if (tree_fits_uhwi_p (args[1]))
19243 : {
19244 190 : unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
19245 190 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19246 190 : if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
19247 : return args[0];
19248 190 : if (idx == 0)
19249 52 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19250 : args[0]);
19251 138 : if (!tree_fits_uhwi_p (args[0]))
19252 : break;
19253 12 : unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
19254 12 : res &= ~(HOST_WIDE_INT_M1U << idx);
19255 12 : return build_int_cstu (lhs_type, res);
19256 : }
19257 : break;
19258 :
19259 20792 : case IX86_BUILTIN_PDEP32:
19260 20792 : case IX86_BUILTIN_PDEP64:
19261 20792 : gcc_assert (n_args == 2);
19262 20792 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19263 : {
19264 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19265 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19266 46 : unsigned HOST_WIDE_INT res = 0;
19267 46 : unsigned HOST_WIDE_INT m, k = 1;
19268 2990 : for (m = 1; m; m <<= 1)
19269 2944 : if ((mask & m) != 0)
19270 : {
19271 1440 : if ((src & k) != 0)
19272 789 : res |= m;
19273 1440 : k <<= 1;
19274 : }
19275 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19276 : }
19277 : break;
19278 :
19279 20794 : case IX86_BUILTIN_PEXT32:
19280 20794 : case IX86_BUILTIN_PEXT64:
19281 20794 : gcc_assert (n_args == 2);
19282 20794 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19283 : {
19284 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19285 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19286 46 : unsigned HOST_WIDE_INT res = 0;
19287 46 : unsigned HOST_WIDE_INT m, k = 1;
19288 2990 : for (m = 1; m; m <<= 1)
19289 2944 : if ((mask & m) != 0)
19290 : {
19291 2016 : if ((src & m) != 0)
19292 1063 : res |= k;
19293 2016 : k <<= 1;
19294 : }
19295 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19296 : }
19297 : break;
19298 :
19299 100430 : case IX86_BUILTIN_MOVMSKPS:
19300 100430 : case IX86_BUILTIN_PMOVMSKB:
19301 100430 : case IX86_BUILTIN_MOVMSKPD:
19302 100430 : case IX86_BUILTIN_PMOVMSKB128:
19303 100430 : case IX86_BUILTIN_MOVMSKPD256:
19304 100430 : case IX86_BUILTIN_MOVMSKPS256:
19305 100430 : case IX86_BUILTIN_PMOVMSKB256:
19306 100430 : gcc_assert (n_args == 1);
19307 100430 : if (TREE_CODE (args[0]) == VECTOR_CST)
19308 : {
19309 : HOST_WIDE_INT res = 0;
19310 1460 : for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
19311 : {
19312 1218 : tree e = VECTOR_CST_ELT (args[0], i);
19313 1218 : if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
19314 : {
19315 624 : if (wi::neg_p (wi::to_wide (e)))
19316 575 : res |= HOST_WIDE_INT_1 << i;
19317 : }
19318 594 : else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
19319 : {
19320 594 : if (TREE_REAL_CST (e).sign)
19321 505 : res |= HOST_WIDE_INT_1 << i;
19322 : }
19323 : else
19324 : return NULL_TREE;
19325 : }
19326 242 : return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
19327 : }
19328 : break;
19329 :
19330 659772 : case IX86_BUILTIN_PSLLD:
19331 659772 : case IX86_BUILTIN_PSLLD128:
19332 659772 : case IX86_BUILTIN_PSLLD128_MASK:
19333 659772 : case IX86_BUILTIN_PSLLD256:
19334 659772 : case IX86_BUILTIN_PSLLD256_MASK:
19335 659772 : case IX86_BUILTIN_PSLLD512:
19336 659772 : case IX86_BUILTIN_PSLLDI:
19337 659772 : case IX86_BUILTIN_PSLLDI128:
19338 659772 : case IX86_BUILTIN_PSLLDI128_MASK:
19339 659772 : case IX86_BUILTIN_PSLLDI256:
19340 659772 : case IX86_BUILTIN_PSLLDI256_MASK:
19341 659772 : case IX86_BUILTIN_PSLLDI512:
19342 659772 : case IX86_BUILTIN_PSLLQ:
19343 659772 : case IX86_BUILTIN_PSLLQ128:
19344 659772 : case IX86_BUILTIN_PSLLQ128_MASK:
19345 659772 : case IX86_BUILTIN_PSLLQ256:
19346 659772 : case IX86_BUILTIN_PSLLQ256_MASK:
19347 659772 : case IX86_BUILTIN_PSLLQ512:
19348 659772 : case IX86_BUILTIN_PSLLQI:
19349 659772 : case IX86_BUILTIN_PSLLQI128:
19350 659772 : case IX86_BUILTIN_PSLLQI128_MASK:
19351 659772 : case IX86_BUILTIN_PSLLQI256:
19352 659772 : case IX86_BUILTIN_PSLLQI256_MASK:
19353 659772 : case IX86_BUILTIN_PSLLQI512:
19354 659772 : case IX86_BUILTIN_PSLLW:
19355 659772 : case IX86_BUILTIN_PSLLW128:
19356 659772 : case IX86_BUILTIN_PSLLW128_MASK:
19357 659772 : case IX86_BUILTIN_PSLLW256:
19358 659772 : case IX86_BUILTIN_PSLLW256_MASK:
19359 659772 : case IX86_BUILTIN_PSLLW512_MASK:
19360 659772 : case IX86_BUILTIN_PSLLWI:
19361 659772 : case IX86_BUILTIN_PSLLWI128:
19362 659772 : case IX86_BUILTIN_PSLLWI128_MASK:
19363 659772 : case IX86_BUILTIN_PSLLWI256:
19364 659772 : case IX86_BUILTIN_PSLLWI256_MASK:
19365 659772 : case IX86_BUILTIN_PSLLWI512_MASK:
19366 659772 : rcode = ASHIFT;
19367 659772 : is_vshift = false;
19368 659772 : goto do_shift;
19369 601367 : case IX86_BUILTIN_PSRAD:
19370 601367 : case IX86_BUILTIN_PSRAD128:
19371 601367 : case IX86_BUILTIN_PSRAD128_MASK:
19372 601367 : case IX86_BUILTIN_PSRAD256:
19373 601367 : case IX86_BUILTIN_PSRAD256_MASK:
19374 601367 : case IX86_BUILTIN_PSRAD512:
19375 601367 : case IX86_BUILTIN_PSRADI:
19376 601367 : case IX86_BUILTIN_PSRADI128:
19377 601367 : case IX86_BUILTIN_PSRADI128_MASK:
19378 601367 : case IX86_BUILTIN_PSRADI256:
19379 601367 : case IX86_BUILTIN_PSRADI256_MASK:
19380 601367 : case IX86_BUILTIN_PSRADI512:
19381 601367 : case IX86_BUILTIN_PSRAQ128_MASK:
19382 601367 : case IX86_BUILTIN_PSRAQ256_MASK:
19383 601367 : case IX86_BUILTIN_PSRAQ512:
19384 601367 : case IX86_BUILTIN_PSRAQI128_MASK:
19385 601367 : case IX86_BUILTIN_PSRAQI256_MASK:
19386 601367 : case IX86_BUILTIN_PSRAQI512:
19387 601367 : case IX86_BUILTIN_PSRAW:
19388 601367 : case IX86_BUILTIN_PSRAW128:
19389 601367 : case IX86_BUILTIN_PSRAW128_MASK:
19390 601367 : case IX86_BUILTIN_PSRAW256:
19391 601367 : case IX86_BUILTIN_PSRAW256_MASK:
19392 601367 : case IX86_BUILTIN_PSRAW512:
19393 601367 : case IX86_BUILTIN_PSRAWI:
19394 601367 : case IX86_BUILTIN_PSRAWI128:
19395 601367 : case IX86_BUILTIN_PSRAWI128_MASK:
19396 601367 : case IX86_BUILTIN_PSRAWI256:
19397 601367 : case IX86_BUILTIN_PSRAWI256_MASK:
19398 601367 : case IX86_BUILTIN_PSRAWI512:
19399 601367 : rcode = ASHIFTRT;
19400 601367 : is_vshift = false;
19401 601367 : goto do_shift;
19402 633647 : case IX86_BUILTIN_PSRLD:
19403 633647 : case IX86_BUILTIN_PSRLD128:
19404 633647 : case IX86_BUILTIN_PSRLD128_MASK:
19405 633647 : case IX86_BUILTIN_PSRLD256:
19406 633647 : case IX86_BUILTIN_PSRLD256_MASK:
19407 633647 : case IX86_BUILTIN_PSRLD512:
19408 633647 : case IX86_BUILTIN_PSRLDI:
19409 633647 : case IX86_BUILTIN_PSRLDI128:
19410 633647 : case IX86_BUILTIN_PSRLDI128_MASK:
19411 633647 : case IX86_BUILTIN_PSRLDI256:
19412 633647 : case IX86_BUILTIN_PSRLDI256_MASK:
19413 633647 : case IX86_BUILTIN_PSRLDI512:
19414 633647 : case IX86_BUILTIN_PSRLQ:
19415 633647 : case IX86_BUILTIN_PSRLQ128:
19416 633647 : case IX86_BUILTIN_PSRLQ128_MASK:
19417 633647 : case IX86_BUILTIN_PSRLQ256:
19418 633647 : case IX86_BUILTIN_PSRLQ256_MASK:
19419 633647 : case IX86_BUILTIN_PSRLQ512:
19420 633647 : case IX86_BUILTIN_PSRLQI:
19421 633647 : case IX86_BUILTIN_PSRLQI128:
19422 633647 : case IX86_BUILTIN_PSRLQI128_MASK:
19423 633647 : case IX86_BUILTIN_PSRLQI256:
19424 633647 : case IX86_BUILTIN_PSRLQI256_MASK:
19425 633647 : case IX86_BUILTIN_PSRLQI512:
19426 633647 : case IX86_BUILTIN_PSRLW:
19427 633647 : case IX86_BUILTIN_PSRLW128:
19428 633647 : case IX86_BUILTIN_PSRLW128_MASK:
19429 633647 : case IX86_BUILTIN_PSRLW256:
19430 633647 : case IX86_BUILTIN_PSRLW256_MASK:
19431 633647 : case IX86_BUILTIN_PSRLW512:
19432 633647 : case IX86_BUILTIN_PSRLWI:
19433 633647 : case IX86_BUILTIN_PSRLWI128:
19434 633647 : case IX86_BUILTIN_PSRLWI128_MASK:
19435 633647 : case IX86_BUILTIN_PSRLWI256:
19436 633647 : case IX86_BUILTIN_PSRLWI256_MASK:
19437 633647 : case IX86_BUILTIN_PSRLWI512:
19438 633647 : rcode = LSHIFTRT;
19439 633647 : is_vshift = false;
19440 633647 : goto do_shift;
19441 276063 : case IX86_BUILTIN_PSLLVV16HI:
19442 276063 : case IX86_BUILTIN_PSLLVV16SI:
19443 276063 : case IX86_BUILTIN_PSLLVV2DI:
19444 276063 : case IX86_BUILTIN_PSLLVV2DI_MASK:
19445 276063 : case IX86_BUILTIN_PSLLVV32HI:
19446 276063 : case IX86_BUILTIN_PSLLVV4DI:
19447 276063 : case IX86_BUILTIN_PSLLVV4DI_MASK:
19448 276063 : case IX86_BUILTIN_PSLLVV4SI:
19449 276063 : case IX86_BUILTIN_PSLLVV4SI_MASK:
19450 276063 : case IX86_BUILTIN_PSLLVV8DI:
19451 276063 : case IX86_BUILTIN_PSLLVV8HI:
19452 276063 : case IX86_BUILTIN_PSLLVV8SI:
19453 276063 : case IX86_BUILTIN_PSLLVV8SI_MASK:
19454 276063 : rcode = ASHIFT;
19455 276063 : is_vshift = true;
19456 276063 : goto do_shift;
19457 275642 : case IX86_BUILTIN_PSRAVQ128:
19458 275642 : case IX86_BUILTIN_PSRAVQ256:
19459 275642 : case IX86_BUILTIN_PSRAVV16HI:
19460 275642 : case IX86_BUILTIN_PSRAVV16SI:
19461 275642 : case IX86_BUILTIN_PSRAVV32HI:
19462 275642 : case IX86_BUILTIN_PSRAVV4SI:
19463 275642 : case IX86_BUILTIN_PSRAVV4SI_MASK:
19464 275642 : case IX86_BUILTIN_PSRAVV8DI:
19465 275642 : case IX86_BUILTIN_PSRAVV8HI:
19466 275642 : case IX86_BUILTIN_PSRAVV8SI:
19467 275642 : case IX86_BUILTIN_PSRAVV8SI_MASK:
19468 275642 : rcode = ASHIFTRT;
19469 275642 : is_vshift = true;
19470 275642 : goto do_shift;
19471 276054 : case IX86_BUILTIN_PSRLVV16HI:
19472 276054 : case IX86_BUILTIN_PSRLVV16SI:
19473 276054 : case IX86_BUILTIN_PSRLVV2DI:
19474 276054 : case IX86_BUILTIN_PSRLVV2DI_MASK:
19475 276054 : case IX86_BUILTIN_PSRLVV32HI:
19476 276054 : case IX86_BUILTIN_PSRLVV4DI:
19477 276054 : case IX86_BUILTIN_PSRLVV4DI_MASK:
19478 276054 : case IX86_BUILTIN_PSRLVV4SI:
19479 276054 : case IX86_BUILTIN_PSRLVV4SI_MASK:
19480 276054 : case IX86_BUILTIN_PSRLVV8DI:
19481 276054 : case IX86_BUILTIN_PSRLVV8HI:
19482 276054 : case IX86_BUILTIN_PSRLVV8SI:
19483 276054 : case IX86_BUILTIN_PSRLVV8SI_MASK:
19484 276054 : rcode = LSHIFTRT;
19485 276054 : is_vshift = true;
19486 276054 : goto do_shift;
19487 :
19488 2722545 : do_shift:
19489 2722545 : gcc_assert (n_args >= 2);
19490 2722545 : if (TREE_CODE (args[0]) != VECTOR_CST)
19491 : break;
19492 927 : mask = HOST_WIDE_INT_M1U;
19493 927 : if (n_args > 2)
19494 : {
19495 : /* This is masked shift. */
19496 678 : if (!tree_fits_uhwi_p (args[n_args - 1])
19497 678 : || TREE_SIDE_EFFECTS (args[n_args - 2]))
19498 : break;
19499 678 : mask = tree_to_uhwi (args[n_args - 1]);
19500 678 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19501 678 : mask |= HOST_WIDE_INT_M1U << elems;
19502 678 : if (mask != HOST_WIDE_INT_M1U
19503 567 : && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19504 : break;
19505 633 : if (mask == (HOST_WIDE_INT_M1U << elems))
19506 : return args[n_args - 2];
19507 : }
19508 879 : if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19509 : break;
19510 879 : if (tree tem = (is_vshift ? integer_one_node
19511 879 : : ix86_vector_shift_count (args[1])))
19512 : {
19513 558 : unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19514 558 : unsigned HOST_WIDE_INT prec
19515 558 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19516 558 : if (count == 0 && mask == HOST_WIDE_INT_M1U)
19517 : return args[0];
19518 558 : if (count >= prec)
19519 : {
19520 72 : if (rcode == ASHIFTRT)
19521 27 : count = prec - 1;
19522 45 : else if (mask == HOST_WIDE_INT_M1U)
19523 3 : return build_zero_cst (TREE_TYPE (args[0]));
19524 : }
19525 555 : tree countt = NULL_TREE;
19526 555 : if (!is_vshift)
19527 : {
19528 377 : if (count >= prec)
19529 42 : countt = integer_zero_node;
19530 : else
19531 335 : countt = build_int_cst (integer_type_node, count);
19532 : }
19533 555 : tree_vector_builder builder;
19534 555 : if (mask != HOST_WIDE_INT_M1U || is_vshift)
19535 392 : builder.new_vector (TREE_TYPE (args[0]),
19536 784 : TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19537 : 1);
19538 : else
19539 163 : builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
19540 : false);
19541 555 : unsigned int cnt = builder.encoded_nelts ();
19542 5967 : for (unsigned int i = 0; i < cnt; ++i)
19543 : {
19544 5412 : tree elt = VECTOR_CST_ELT (args[0], i);
19545 5412 : if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19546 0 : return NULL_TREE;
19547 5412 : tree type = TREE_TYPE (elt);
19548 5412 : if (rcode == LSHIFTRT)
19549 2040 : elt = fold_convert (unsigned_type_for (type), elt);
19550 5412 : if (is_vshift)
19551 : {
19552 1846 : countt = VECTOR_CST_ELT (args[1], i);
19553 1846 : if (TREE_CODE (countt) != INTEGER_CST
19554 1846 : || TREE_OVERFLOW (countt))
19555 : return NULL_TREE;
19556 1846 : if (wi::neg_p (wi::to_wide (countt))
19557 3610 : || wi::to_widest (countt) >= prec)
19558 : {
19559 325 : if (rcode == ASHIFTRT)
19560 108 : countt = build_int_cst (TREE_TYPE (countt),
19561 108 : prec - 1);
19562 : else
19563 : {
19564 217 : elt = build_zero_cst (TREE_TYPE (elt));
19565 217 : countt = build_zero_cst (TREE_TYPE (countt));
19566 : }
19567 : }
19568 : }
19569 3566 : else if (count >= prec)
19570 504 : elt = build_zero_cst (TREE_TYPE (elt));
19571 8950 : elt = const_binop (rcode == ASHIFT
19572 : ? LSHIFT_EXPR : RSHIFT_EXPR,
19573 5412 : TREE_TYPE (elt), elt, countt);
19574 5412 : if (!elt || TREE_CODE (elt) != INTEGER_CST)
19575 : return NULL_TREE;
19576 5412 : if (rcode == LSHIFTRT)
19577 2040 : elt = fold_convert (type, elt);
19578 5412 : if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19579 : {
19580 1566 : elt = VECTOR_CST_ELT (args[n_args - 2], i);
19581 1566 : if (TREE_CODE (elt) != INTEGER_CST
19582 1566 : || TREE_OVERFLOW (elt))
19583 : return NULL_TREE;
19584 : }
19585 5412 : builder.quick_push (elt);
19586 : }
19587 555 : return builder.build ();
19588 555 : }
19589 : break;
19590 :
19591 32724 : case IX86_BUILTIN_MINSS:
19592 32724 : case IX86_BUILTIN_MINSH_MASK:
19593 32724 : tcode = LT_EXPR;
19594 32724 : is_scalar = true;
19595 32724 : goto do_minmax;
19596 :
19597 32724 : case IX86_BUILTIN_MAXSS:
19598 32724 : case IX86_BUILTIN_MAXSH_MASK:
19599 32724 : tcode = GT_EXPR;
19600 32724 : is_scalar = true;
19601 32724 : goto do_minmax;
19602 :
19603 350642 : case IX86_BUILTIN_MINPS:
19604 350642 : case IX86_BUILTIN_MINPD:
19605 350642 : case IX86_BUILTIN_MINPS256:
19606 350642 : case IX86_BUILTIN_MINPD256:
19607 350642 : case IX86_BUILTIN_MINPS512:
19608 350642 : case IX86_BUILTIN_MINPD512:
19609 350642 : case IX86_BUILTIN_MINPS128_MASK:
19610 350642 : case IX86_BUILTIN_MINPD128_MASK:
19611 350642 : case IX86_BUILTIN_MINPS256_MASK:
19612 350642 : case IX86_BUILTIN_MINPD256_MASK:
19613 350642 : case IX86_BUILTIN_MINPH128_MASK:
19614 350642 : case IX86_BUILTIN_MINPH256_MASK:
19615 350642 : case IX86_BUILTIN_MINPH512_MASK:
19616 350642 : tcode = LT_EXPR;
19617 350642 : is_scalar = false;
19618 350642 : goto do_minmax;
19619 :
19620 : case IX86_BUILTIN_MAXPS:
19621 : case IX86_BUILTIN_MAXPD:
19622 : case IX86_BUILTIN_MAXPS256:
19623 : case IX86_BUILTIN_MAXPD256:
19624 : case IX86_BUILTIN_MAXPS512:
19625 : case IX86_BUILTIN_MAXPD512:
19626 : case IX86_BUILTIN_MAXPS128_MASK:
19627 : case IX86_BUILTIN_MAXPD128_MASK:
19628 : case IX86_BUILTIN_MAXPS256_MASK:
19629 : case IX86_BUILTIN_MAXPD256_MASK:
19630 : case IX86_BUILTIN_MAXPH128_MASK:
19631 : case IX86_BUILTIN_MAXPH256_MASK:
19632 : case IX86_BUILTIN_MAXPH512_MASK:
19633 : tcode = GT_EXPR;
19634 : is_scalar = false;
19635 766752 : do_minmax:
19636 766752 : gcc_assert (n_args >= 2);
19637 766752 : if (TREE_CODE (args[0]) != VECTOR_CST
19638 76 : || TREE_CODE (args[1]) != VECTOR_CST)
19639 : break;
19640 76 : mask = HOST_WIDE_INT_M1U;
19641 76 : if (n_args > 2)
19642 : {
19643 36 : gcc_assert (n_args >= 4);
19644 : /* This is masked minmax. */
19645 36 : if (TREE_CODE (args[3]) != INTEGER_CST
19646 36 : || TREE_SIDE_EFFECTS (args[2]))
19647 : break;
19648 36 : mask = TREE_INT_CST_LOW (args[3]);
19649 36 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19650 36 : mask |= HOST_WIDE_INT_M1U << elems;
19651 36 : if (mask != HOST_WIDE_INT_M1U
19652 32 : && TREE_CODE (args[2]) != VECTOR_CST)
19653 : break;
19654 36 : if (n_args >= 5)
19655 : {
19656 20 : if (!tree_fits_uhwi_p (args[4]))
19657 : break;
19658 20 : if (tree_to_uhwi (args[4]) != 4
19659 0 : && tree_to_uhwi (args[4]) != 8)
19660 : break;
19661 : }
19662 36 : if (mask == (HOST_WIDE_INT_M1U << elems))
19663 : return args[2];
19664 : }
19665 : /* Punt on NaNs, unless exceptions are disabled. */
19666 76 : if (HONOR_NANS (args[0])
19667 76 : && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19668 184 : for (int i = 0; i < 2; ++i)
19669 : {
19670 134 : unsigned count = vector_cst_encoded_nelts (args[i]);
19671 957 : for (unsigned j = 0; j < count; ++j)
19672 849 : if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19673 : return NULL_TREE;
19674 : }
19675 50 : {
19676 50 : tree res = const_binop (tcode,
19677 50 : truth_type_for (TREE_TYPE (args[0])),
19678 : args[0], args[1]);
19679 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19680 : break;
19681 50 : res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19682 : args[0], args[1]);
19683 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19684 : break;
19685 50 : if (mask != HOST_WIDE_INT_M1U)
19686 : {
19687 32 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19688 32 : vec_perm_builder sel (nelts, nelts, 1);
19689 328 : for (unsigned int i = 0; i < nelts; i++)
19690 296 : if (mask & (HOST_WIDE_INT_1U << i))
19691 160 : sel.quick_push (i);
19692 : else
19693 136 : sel.quick_push (nelts + i);
19694 32 : vec_perm_indices indices (sel, 2, nelts);
19695 32 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19696 : indices);
19697 32 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19698 : break;
19699 32 : }
19700 50 : if (is_scalar)
19701 : {
19702 10 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19703 10 : vec_perm_builder sel (nelts, nelts, 1);
19704 10 : sel.quick_push (0);
19705 40 : for (unsigned int i = 1; i < nelts; i++)
19706 30 : sel.quick_push (nelts + i);
19707 10 : vec_perm_indices indices (sel, 2, nelts);
19708 10 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19709 : indices);
19710 10 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19711 : break;
19712 10 : }
19713 50 : return res;
19714 : }
19715 :
19716 : default:
19717 : break;
19718 : }
19719 : }
19720 :
19721 : #ifdef SUBTARGET_FOLD_BUILTIN
19722 : return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19723 : #endif
19724 :
19725 : return NULL_TREE;
19726 : }
19727 :
19728 : /* Fold a MD builtin (use ix86_fold_builtin for folding into
19729 : constant) in GIMPLE. */
19730 :
19731 : bool
19732 1121435 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19733 : {
19734 1121435 : gimple *stmt = gsi_stmt (*gsi), *g;
19735 1121435 : gimple_seq stmts = NULL;
19736 1121435 : tree fndecl = gimple_call_fndecl (stmt);
19737 1121435 : gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19738 1121435 : int n_args = gimple_call_num_args (stmt);
19739 1121435 : enum ix86_builtins fn_code
19740 1121435 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19741 1121435 : tree decl = NULL_TREE;
19742 1121435 : tree arg0, arg1, arg2;
19743 1121435 : enum rtx_code rcode;
19744 1121435 : enum tree_code tcode;
19745 1121435 : unsigned HOST_WIDE_INT count;
19746 1121435 : bool is_vshift;
19747 1121435 : unsigned HOST_WIDE_INT elems;
19748 1121435 : location_t loc;
19749 :
19750 : /* Don't fold when there's isa mismatch. */
19751 1121435 : if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19752 : return false;
19753 :
19754 1121308 : switch (fn_code)
19755 : {
19756 288 : case IX86_BUILTIN_TZCNT32:
19757 288 : decl = builtin_decl_implicit (BUILT_IN_CTZ);
19758 288 : goto fold_tzcnt_lzcnt;
19759 :
19760 237 : case IX86_BUILTIN_TZCNT64:
19761 237 : decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19762 237 : goto fold_tzcnt_lzcnt;
19763 :
19764 215 : case IX86_BUILTIN_LZCNT32:
19765 215 : decl = builtin_decl_implicit (BUILT_IN_CLZ);
19766 215 : goto fold_tzcnt_lzcnt;
19767 :
19768 224 : case IX86_BUILTIN_LZCNT64:
19769 224 : decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19770 224 : goto fold_tzcnt_lzcnt;
19771 :
19772 964 : fold_tzcnt_lzcnt:
19773 964 : gcc_assert (n_args == 1);
19774 964 : arg0 = gimple_call_arg (stmt, 0);
19775 964 : if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19776 : {
19777 799 : int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19778 : /* If arg0 is provably non-zero, optimize into generic
19779 : __builtin_c[tl]z{,ll} function the middle-end handles
19780 : better. */
19781 799 : if (!expr_not_equal_to (arg0, wi::zero (prec)))
19782 : return false;
19783 :
19784 9 : loc = gimple_location (stmt);
19785 9 : g = gimple_build_call (decl, 1, arg0);
19786 9 : gimple_set_location (g, loc);
19787 9 : tree lhs = make_ssa_name (integer_type_node);
19788 9 : gimple_call_set_lhs (g, lhs);
19789 9 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
19790 9 : g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19791 9 : gimple_set_location (g, loc);
19792 9 : gsi_replace (gsi, g, false);
19793 9 : return true;
19794 : }
19795 : break;
19796 :
19797 491 : case IX86_BUILTIN_BZHI32:
19798 491 : case IX86_BUILTIN_BZHI64:
19799 491 : gcc_assert (n_args == 2);
19800 491 : arg1 = gimple_call_arg (stmt, 1);
19801 491 : if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19802 : {
19803 195 : unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19804 195 : arg0 = gimple_call_arg (stmt, 0);
19805 195 : if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19806 : break;
19807 31 : loc = gimple_location (stmt);
19808 31 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19809 31 : gimple_set_location (g, loc);
19810 31 : gsi_replace (gsi, g, false);
19811 31 : return true;
19812 : }
19813 : break;
19814 :
19815 502 : case IX86_BUILTIN_PDEP32:
19816 502 : case IX86_BUILTIN_PDEP64:
19817 502 : case IX86_BUILTIN_PEXT32:
19818 502 : case IX86_BUILTIN_PEXT64:
19819 502 : gcc_assert (n_args == 2);
19820 502 : arg1 = gimple_call_arg (stmt, 1);
19821 502 : if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19822 : {
19823 4 : loc = gimple_location (stmt);
19824 4 : arg0 = gimple_call_arg (stmt, 0);
19825 4 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19826 4 : gimple_set_location (g, loc);
19827 4 : gsi_replace (gsi, g, false);
19828 4 : return true;
19829 : }
19830 : break;
19831 :
19832 145 : case IX86_BUILTIN_PBLENDVB256:
19833 145 : case IX86_BUILTIN_BLENDVPS256:
19834 145 : case IX86_BUILTIN_BLENDVPD256:
19835 : /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19836 : to scalar operations and not combined back. */
19837 145 : if (!TARGET_AVX2)
19838 : break;
19839 :
19840 : /* FALLTHRU. */
19841 112 : case IX86_BUILTIN_BLENDVPD:
19842 : /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19843 : w/o sse4.2, it's veclowered to scalar operations and
19844 : not combined back. */
19845 112 : if (!TARGET_SSE4_2)
19846 : break;
19847 : /* FALLTHRU. */
19848 166 : case IX86_BUILTIN_PBLENDVB128:
19849 166 : case IX86_BUILTIN_BLENDVPS:
19850 166 : gcc_assert (n_args == 3);
19851 166 : arg0 = gimple_call_arg (stmt, 0);
19852 166 : arg1 = gimple_call_arg (stmt, 1);
19853 166 : arg2 = gimple_call_arg (stmt, 2);
19854 166 : if (gimple_call_lhs (stmt))
19855 : {
19856 166 : loc = gimple_location (stmt);
19857 166 : tree type = TREE_TYPE (arg2);
19858 166 : if (VECTOR_FLOAT_TYPE_P (type))
19859 : {
19860 73 : tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19861 73 : ? intSI_type_node : intDI_type_node;
19862 73 : type = get_same_sized_vectype (itype, type);
19863 : }
19864 : else
19865 93 : type = signed_type_for (type);
19866 166 : arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19867 166 : tree zero_vec = build_zero_cst (type);
19868 166 : tree cmp_type = truth_type_for (type);
19869 166 : tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19870 166 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19871 166 : g = gimple_build_assign (gimple_call_lhs (stmt),
19872 : VEC_COND_EXPR, cmp,
19873 : arg1, arg0);
19874 166 : gimple_set_location (g, loc);
19875 166 : gsi_replace (gsi, g, false);
19876 : }
19877 : else
19878 0 : gsi_replace (gsi, gimple_build_nop (), false);
19879 : return true;
19880 :
19881 :
19882 16 : case IX86_BUILTIN_PCMPEQB128:
19883 16 : case IX86_BUILTIN_PCMPEQW128:
19884 16 : case IX86_BUILTIN_PCMPEQD128:
19885 16 : case IX86_BUILTIN_PCMPEQQ:
19886 16 : case IX86_BUILTIN_PCMPEQB256:
19887 16 : case IX86_BUILTIN_PCMPEQW256:
19888 16 : case IX86_BUILTIN_PCMPEQD256:
19889 16 : case IX86_BUILTIN_PCMPEQQ256:
19890 16 : tcode = EQ_EXPR;
19891 16 : goto do_cmp;
19892 :
19893 : case IX86_BUILTIN_PCMPGTB128:
19894 : case IX86_BUILTIN_PCMPGTW128:
19895 : case IX86_BUILTIN_PCMPGTD128:
19896 : case IX86_BUILTIN_PCMPGTQ:
19897 : case IX86_BUILTIN_PCMPGTB256:
19898 : case IX86_BUILTIN_PCMPGTW256:
19899 : case IX86_BUILTIN_PCMPGTD256:
19900 : case IX86_BUILTIN_PCMPGTQ256:
19901 : tcode = GT_EXPR;
19902 :
19903 33 : do_cmp:
19904 33 : gcc_assert (n_args == 2);
19905 33 : arg0 = gimple_call_arg (stmt, 0);
19906 33 : arg1 = gimple_call_arg (stmt, 1);
19907 33 : if (gimple_call_lhs (stmt))
19908 : {
19909 32 : loc = gimple_location (stmt);
19910 32 : tree type = TREE_TYPE (arg0);
19911 32 : tree zero_vec = build_zero_cst (type);
19912 32 : tree minus_one_vec = build_minus_one_cst (type);
19913 32 : tree cmp_type = truth_type_for (type);
19914 32 : tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19915 32 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19916 32 : g = gimple_build_assign (gimple_call_lhs (stmt),
19917 : VEC_COND_EXPR, cmp,
19918 : minus_one_vec, zero_vec);
19919 32 : gimple_set_location (g, loc);
19920 32 : gsi_replace (gsi, g, false);
19921 : }
19922 : else
19923 1 : gsi_replace (gsi, gimple_build_nop (), false);
19924 : return true;
19925 :
19926 9297 : case IX86_BUILTIN_PSLLD:
19927 9297 : case IX86_BUILTIN_PSLLD128:
19928 9297 : case IX86_BUILTIN_PSLLD128_MASK:
19929 9297 : case IX86_BUILTIN_PSLLD256:
19930 9297 : case IX86_BUILTIN_PSLLD256_MASK:
19931 9297 : case IX86_BUILTIN_PSLLD512:
19932 9297 : case IX86_BUILTIN_PSLLDI:
19933 9297 : case IX86_BUILTIN_PSLLDI128:
19934 9297 : case IX86_BUILTIN_PSLLDI128_MASK:
19935 9297 : case IX86_BUILTIN_PSLLDI256:
19936 9297 : case IX86_BUILTIN_PSLLDI256_MASK:
19937 9297 : case IX86_BUILTIN_PSLLDI512:
19938 9297 : case IX86_BUILTIN_PSLLQ:
19939 9297 : case IX86_BUILTIN_PSLLQ128:
19940 9297 : case IX86_BUILTIN_PSLLQ128_MASK:
19941 9297 : case IX86_BUILTIN_PSLLQ256:
19942 9297 : case IX86_BUILTIN_PSLLQ256_MASK:
19943 9297 : case IX86_BUILTIN_PSLLQ512:
19944 9297 : case IX86_BUILTIN_PSLLQI:
19945 9297 : case IX86_BUILTIN_PSLLQI128:
19946 9297 : case IX86_BUILTIN_PSLLQI128_MASK:
19947 9297 : case IX86_BUILTIN_PSLLQI256:
19948 9297 : case IX86_BUILTIN_PSLLQI256_MASK:
19949 9297 : case IX86_BUILTIN_PSLLQI512:
19950 9297 : case IX86_BUILTIN_PSLLW:
19951 9297 : case IX86_BUILTIN_PSLLW128:
19952 9297 : case IX86_BUILTIN_PSLLW128_MASK:
19953 9297 : case IX86_BUILTIN_PSLLW256:
19954 9297 : case IX86_BUILTIN_PSLLW256_MASK:
19955 9297 : case IX86_BUILTIN_PSLLW512_MASK:
19956 9297 : case IX86_BUILTIN_PSLLWI:
19957 9297 : case IX86_BUILTIN_PSLLWI128:
19958 9297 : case IX86_BUILTIN_PSLLWI128_MASK:
19959 9297 : case IX86_BUILTIN_PSLLWI256:
19960 9297 : case IX86_BUILTIN_PSLLWI256_MASK:
19961 9297 : case IX86_BUILTIN_PSLLWI512_MASK:
19962 9297 : rcode = ASHIFT;
19963 9297 : is_vshift = false;
19964 9297 : goto do_shift;
19965 6495 : case IX86_BUILTIN_PSRAD:
19966 6495 : case IX86_BUILTIN_PSRAD128:
19967 6495 : case IX86_BUILTIN_PSRAD128_MASK:
19968 6495 : case IX86_BUILTIN_PSRAD256:
19969 6495 : case IX86_BUILTIN_PSRAD256_MASK:
19970 6495 : case IX86_BUILTIN_PSRAD512:
19971 6495 : case IX86_BUILTIN_PSRADI:
19972 6495 : case IX86_BUILTIN_PSRADI128:
19973 6495 : case IX86_BUILTIN_PSRADI128_MASK:
19974 6495 : case IX86_BUILTIN_PSRADI256:
19975 6495 : case IX86_BUILTIN_PSRADI256_MASK:
19976 6495 : case IX86_BUILTIN_PSRADI512:
19977 6495 : case IX86_BUILTIN_PSRAQ128_MASK:
19978 6495 : case IX86_BUILTIN_PSRAQ256_MASK:
19979 6495 : case IX86_BUILTIN_PSRAQ512:
19980 6495 : case IX86_BUILTIN_PSRAQI128_MASK:
19981 6495 : case IX86_BUILTIN_PSRAQI256_MASK:
19982 6495 : case IX86_BUILTIN_PSRAQI512:
19983 6495 : case IX86_BUILTIN_PSRAW:
19984 6495 : case IX86_BUILTIN_PSRAW128:
19985 6495 : case IX86_BUILTIN_PSRAW128_MASK:
19986 6495 : case IX86_BUILTIN_PSRAW256:
19987 6495 : case IX86_BUILTIN_PSRAW256_MASK:
19988 6495 : case IX86_BUILTIN_PSRAW512:
19989 6495 : case IX86_BUILTIN_PSRAWI:
19990 6495 : case IX86_BUILTIN_PSRAWI128:
19991 6495 : case IX86_BUILTIN_PSRAWI128_MASK:
19992 6495 : case IX86_BUILTIN_PSRAWI256:
19993 6495 : case IX86_BUILTIN_PSRAWI256_MASK:
19994 6495 : case IX86_BUILTIN_PSRAWI512:
19995 6495 : rcode = ASHIFTRT;
19996 6495 : is_vshift = false;
19997 6495 : goto do_shift;
19998 7960 : case IX86_BUILTIN_PSRLD:
19999 7960 : case IX86_BUILTIN_PSRLD128:
20000 7960 : case IX86_BUILTIN_PSRLD128_MASK:
20001 7960 : case IX86_BUILTIN_PSRLD256:
20002 7960 : case IX86_BUILTIN_PSRLD256_MASK:
20003 7960 : case IX86_BUILTIN_PSRLD512:
20004 7960 : case IX86_BUILTIN_PSRLDI:
20005 7960 : case IX86_BUILTIN_PSRLDI128:
20006 7960 : case IX86_BUILTIN_PSRLDI128_MASK:
20007 7960 : case IX86_BUILTIN_PSRLDI256:
20008 7960 : case IX86_BUILTIN_PSRLDI256_MASK:
20009 7960 : case IX86_BUILTIN_PSRLDI512:
20010 7960 : case IX86_BUILTIN_PSRLQ:
20011 7960 : case IX86_BUILTIN_PSRLQ128:
20012 7960 : case IX86_BUILTIN_PSRLQ128_MASK:
20013 7960 : case IX86_BUILTIN_PSRLQ256:
20014 7960 : case IX86_BUILTIN_PSRLQ256_MASK:
20015 7960 : case IX86_BUILTIN_PSRLQ512:
20016 7960 : case IX86_BUILTIN_PSRLQI:
20017 7960 : case IX86_BUILTIN_PSRLQI128:
20018 7960 : case IX86_BUILTIN_PSRLQI128_MASK:
20019 7960 : case IX86_BUILTIN_PSRLQI256:
20020 7960 : case IX86_BUILTIN_PSRLQI256_MASK:
20021 7960 : case IX86_BUILTIN_PSRLQI512:
20022 7960 : case IX86_BUILTIN_PSRLW:
20023 7960 : case IX86_BUILTIN_PSRLW128:
20024 7960 : case IX86_BUILTIN_PSRLW128_MASK:
20025 7960 : case IX86_BUILTIN_PSRLW256:
20026 7960 : case IX86_BUILTIN_PSRLW256_MASK:
20027 7960 : case IX86_BUILTIN_PSRLW512:
20028 7960 : case IX86_BUILTIN_PSRLWI:
20029 7960 : case IX86_BUILTIN_PSRLWI128:
20030 7960 : case IX86_BUILTIN_PSRLWI128_MASK:
20031 7960 : case IX86_BUILTIN_PSRLWI256:
20032 7960 : case IX86_BUILTIN_PSRLWI256_MASK:
20033 7960 : case IX86_BUILTIN_PSRLWI512:
20034 7960 : rcode = LSHIFTRT;
20035 7960 : is_vshift = false;
20036 7960 : goto do_shift;
20037 2384 : case IX86_BUILTIN_PSLLVV16HI:
20038 2384 : case IX86_BUILTIN_PSLLVV16SI:
20039 2384 : case IX86_BUILTIN_PSLLVV2DI:
20040 2384 : case IX86_BUILTIN_PSLLVV2DI_MASK:
20041 2384 : case IX86_BUILTIN_PSLLVV32HI:
20042 2384 : case IX86_BUILTIN_PSLLVV4DI:
20043 2384 : case IX86_BUILTIN_PSLLVV4DI_MASK:
20044 2384 : case IX86_BUILTIN_PSLLVV4SI:
20045 2384 : case IX86_BUILTIN_PSLLVV4SI_MASK:
20046 2384 : case IX86_BUILTIN_PSLLVV8DI:
20047 2384 : case IX86_BUILTIN_PSLLVV8HI:
20048 2384 : case IX86_BUILTIN_PSLLVV8SI:
20049 2384 : case IX86_BUILTIN_PSLLVV8SI_MASK:
20050 2384 : rcode = ASHIFT;
20051 2384 : is_vshift = true;
20052 2384 : goto do_shift;
20053 2341 : case IX86_BUILTIN_PSRAVQ128:
20054 2341 : case IX86_BUILTIN_PSRAVQ256:
20055 2341 : case IX86_BUILTIN_PSRAVV16HI:
20056 2341 : case IX86_BUILTIN_PSRAVV16SI:
20057 2341 : case IX86_BUILTIN_PSRAVV32HI:
20058 2341 : case IX86_BUILTIN_PSRAVV4SI:
20059 2341 : case IX86_BUILTIN_PSRAVV4SI_MASK:
20060 2341 : case IX86_BUILTIN_PSRAVV8DI:
20061 2341 : case IX86_BUILTIN_PSRAVV8HI:
20062 2341 : case IX86_BUILTIN_PSRAVV8SI:
20063 2341 : case IX86_BUILTIN_PSRAVV8SI_MASK:
20064 2341 : rcode = ASHIFTRT;
20065 2341 : is_vshift = true;
20066 2341 : goto do_shift;
20067 2380 : case IX86_BUILTIN_PSRLVV16HI:
20068 2380 : case IX86_BUILTIN_PSRLVV16SI:
20069 2380 : case IX86_BUILTIN_PSRLVV2DI:
20070 2380 : case IX86_BUILTIN_PSRLVV2DI_MASK:
20071 2380 : case IX86_BUILTIN_PSRLVV32HI:
20072 2380 : case IX86_BUILTIN_PSRLVV4DI:
20073 2380 : case IX86_BUILTIN_PSRLVV4DI_MASK:
20074 2380 : case IX86_BUILTIN_PSRLVV4SI:
20075 2380 : case IX86_BUILTIN_PSRLVV4SI_MASK:
20076 2380 : case IX86_BUILTIN_PSRLVV8DI:
20077 2380 : case IX86_BUILTIN_PSRLVV8HI:
20078 2380 : case IX86_BUILTIN_PSRLVV8SI:
20079 2380 : case IX86_BUILTIN_PSRLVV8SI_MASK:
20080 2380 : rcode = LSHIFTRT;
20081 2380 : is_vshift = true;
20082 2380 : goto do_shift;
20083 :
20084 30857 : do_shift:
20085 30857 : gcc_assert (n_args >= 2);
20086 30857 : if (!gimple_call_lhs (stmt))
20087 : {
20088 1 : gsi_replace (gsi, gimple_build_nop (), false);
20089 1 : return true;
20090 : }
20091 30856 : arg0 = gimple_call_arg (stmt, 0);
20092 30856 : arg1 = gimple_call_arg (stmt, 1);
20093 30856 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20094 : /* For masked shift, only optimize if the mask is all ones. */
20095 30856 : if (n_args > 2
20096 30856 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20097 : break;
20098 16081 : if (is_vshift)
20099 : {
20100 2640 : if (TREE_CODE (arg1) != VECTOR_CST)
20101 : break;
20102 69 : count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
20103 69 : if (integer_zerop (arg1))
20104 27 : count = 0;
20105 42 : else if (rcode == ASHIFTRT)
20106 : break;
20107 : else
20108 230 : for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
20109 : {
20110 212 : tree elt = VECTOR_CST_ELT (arg1, i);
20111 212 : if (!wi::neg_p (wi::to_wide (elt))
20112 375 : && wi::to_widest (elt) < count)
20113 16 : return false;
20114 : }
20115 : }
20116 : else
20117 : {
20118 13441 : arg1 = ix86_vector_shift_count (arg1);
20119 13441 : if (!arg1)
20120 : break;
20121 5608 : count = tree_to_uhwi (arg1);
20122 : }
20123 5653 : if (count == 0)
20124 : {
20125 : /* Just return the first argument for shift by 0. */
20126 93 : loc = gimple_location (stmt);
20127 93 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
20128 93 : gimple_set_location (g, loc);
20129 93 : gsi_replace (gsi, g, false);
20130 93 : return true;
20131 : }
20132 5560 : if (rcode != ASHIFTRT
20133 5560 : && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
20134 : {
20135 : /* For shift counts equal or greater than precision, except for
20136 : arithmetic right shift the result is zero. */
20137 78 : loc = gimple_location (stmt);
20138 78 : g = gimple_build_assign (gimple_call_lhs (stmt),
20139 78 : build_zero_cst (TREE_TYPE (arg0)));
20140 78 : gimple_set_location (g, loc);
20141 78 : gsi_replace (gsi, g, false);
20142 78 : return true;
20143 : }
20144 : break;
20145 :
20146 531 : case IX86_BUILTIN_SHUFPD512:
20147 531 : case IX86_BUILTIN_SHUFPS512:
20148 531 : case IX86_BUILTIN_SHUFPD:
20149 531 : case IX86_BUILTIN_SHUFPD256:
20150 531 : case IX86_BUILTIN_SHUFPS:
20151 531 : case IX86_BUILTIN_SHUFPS256:
20152 531 : arg0 = gimple_call_arg (stmt, 0);
20153 531 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20154 : /* This is masked shuffle. Only optimize if the mask is all ones. */
20155 531 : if (n_args > 3
20156 895 : && !ix86_masked_all_ones (elems,
20157 364 : gimple_call_arg (stmt, n_args - 1)))
20158 : break;
20159 203 : arg2 = gimple_call_arg (stmt, 2);
20160 203 : if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
20161 : {
20162 146 : unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
20163 : /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
20164 146 : if (shuffle_mask > 255)
20165 : return false;
20166 :
20167 144 : machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
20168 144 : loc = gimple_location (stmt);
20169 144 : tree itype = (imode == E_DFmode
20170 144 : ? long_long_integer_type_node : integer_type_node);
20171 144 : tree vtype = build_vector_type (itype, elems);
20172 144 : tree_vector_builder elts (vtype, elems, 1);
20173 :
20174 :
20175 : /* Transform integer shuffle_mask to vector perm_mask which
20176 : is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
20177 840 : for (unsigned i = 0; i != elems; i++)
20178 : {
20179 696 : unsigned sel_idx;
20180 : /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
20181 : provide 2 select constrols for each element of the
20182 : destination. */
20183 696 : if (imode == E_DFmode)
20184 240 : sel_idx = (i & 1) * elems + (i & ~1)
20185 240 : + ((shuffle_mask >> i) & 1);
20186 : else
20187 : {
20188 : /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
20189 : controls for each element of the destination. */
20190 456 : unsigned j = i % 4;
20191 456 : sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
20192 456 : + ((shuffle_mask >> 2 * j) & 3);
20193 : }
20194 696 : elts.quick_push (build_int_cst (itype, sel_idx));
20195 : }
20196 :
20197 144 : tree perm_mask = elts.build ();
20198 144 : arg1 = gimple_call_arg (stmt, 1);
20199 144 : g = gimple_build_assign (gimple_call_lhs (stmt),
20200 : VEC_PERM_EXPR,
20201 : arg0, arg1, perm_mask);
20202 144 : gimple_set_location (g, loc);
20203 144 : gsi_replace (gsi, g, false);
20204 144 : return true;
20205 144 : }
20206 : // Do not error yet, the constant could be propagated later?
20207 : break;
20208 :
20209 48 : case IX86_BUILTIN_PABSB:
20210 48 : case IX86_BUILTIN_PABSW:
20211 48 : case IX86_BUILTIN_PABSD:
20212 : /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
20213 48 : if (!TARGET_MMX_WITH_SSE)
20214 : break;
20215 : /* FALLTHRU. */
20216 2190 : case IX86_BUILTIN_PABSB128:
20217 2190 : case IX86_BUILTIN_PABSB256:
20218 2190 : case IX86_BUILTIN_PABSB512:
20219 2190 : case IX86_BUILTIN_PABSW128:
20220 2190 : case IX86_BUILTIN_PABSW256:
20221 2190 : case IX86_BUILTIN_PABSW512:
20222 2190 : case IX86_BUILTIN_PABSD128:
20223 2190 : case IX86_BUILTIN_PABSD256:
20224 2190 : case IX86_BUILTIN_PABSD512:
20225 2190 : case IX86_BUILTIN_PABSQ128:
20226 2190 : case IX86_BUILTIN_PABSQ256:
20227 2190 : case IX86_BUILTIN_PABSQ512:
20228 2190 : case IX86_BUILTIN_PABSB128_MASK:
20229 2190 : case IX86_BUILTIN_PABSB256_MASK:
20230 2190 : case IX86_BUILTIN_PABSW128_MASK:
20231 2190 : case IX86_BUILTIN_PABSW256_MASK:
20232 2190 : case IX86_BUILTIN_PABSD128_MASK:
20233 2190 : case IX86_BUILTIN_PABSD256_MASK:
20234 2190 : gcc_assert (n_args >= 1);
20235 2190 : if (!gimple_call_lhs (stmt))
20236 : {
20237 1 : gsi_replace (gsi, gimple_build_nop (), false);
20238 1 : return true;
20239 : }
20240 2189 : arg0 = gimple_call_arg (stmt, 0);
20241 2189 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20242 : /* For masked ABS, only optimize if the mask is all ones. */
20243 2189 : if (n_args > 1
20244 2189 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20245 : break;
20246 229 : {
20247 229 : tree utype, ures, vce;
20248 229 : utype = unsigned_type_for (TREE_TYPE (arg0));
20249 : /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
20250 : instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
20251 229 : ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
20252 229 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20253 229 : loc = gimple_location (stmt);
20254 229 : vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
20255 229 : g = gimple_build_assign (gimple_call_lhs (stmt),
20256 : VIEW_CONVERT_EXPR, vce);
20257 229 : gsi_replace (gsi, g, false);
20258 : }
20259 229 : return true;
20260 :
20261 2225 : case IX86_BUILTIN_MINPS:
20262 2225 : case IX86_BUILTIN_MINPD:
20263 2225 : case IX86_BUILTIN_MINPS256:
20264 2225 : case IX86_BUILTIN_MINPD256:
20265 2225 : case IX86_BUILTIN_MINPS512:
20266 2225 : case IX86_BUILTIN_MINPD512:
20267 2225 : case IX86_BUILTIN_MINPS128_MASK:
20268 2225 : case IX86_BUILTIN_MINPD128_MASK:
20269 2225 : case IX86_BUILTIN_MINPS256_MASK:
20270 2225 : case IX86_BUILTIN_MINPD256_MASK:
20271 2225 : case IX86_BUILTIN_MINPH128_MASK:
20272 2225 : case IX86_BUILTIN_MINPH256_MASK:
20273 2225 : case IX86_BUILTIN_MINPH512_MASK:
20274 2225 : tcode = LT_EXPR;
20275 2225 : goto do_minmax;
20276 :
20277 : case IX86_BUILTIN_MAXPS:
20278 : case IX86_BUILTIN_MAXPD:
20279 : case IX86_BUILTIN_MAXPS256:
20280 : case IX86_BUILTIN_MAXPD256:
20281 : case IX86_BUILTIN_MAXPS512:
20282 : case IX86_BUILTIN_MAXPD512:
20283 : case IX86_BUILTIN_MAXPS128_MASK:
20284 : case IX86_BUILTIN_MAXPD128_MASK:
20285 : case IX86_BUILTIN_MAXPS256_MASK:
20286 : case IX86_BUILTIN_MAXPD256_MASK:
20287 : case IX86_BUILTIN_MAXPH128_MASK:
20288 : case IX86_BUILTIN_MAXPH256_MASK:
20289 : case IX86_BUILTIN_MAXPH512_MASK:
20290 : tcode = GT_EXPR;
20291 4435 : do_minmax:
20292 4435 : gcc_assert (n_args >= 2);
20293 : /* Without SSE4.1 we often aren't able to pattern match it back to the
20294 : desired instruction. */
20295 4435 : if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
20296 : break;
20297 3865 : arg0 = gimple_call_arg (stmt, 0);
20298 3865 : arg1 = gimple_call_arg (stmt, 1);
20299 3865 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20300 : /* For masked minmax, only optimize if the mask is all ones. */
20301 3865 : if (n_args > 2
20302 3865 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
20303 : break;
20304 647 : if (n_args >= 5)
20305 : {
20306 436 : tree arg4 = gimple_call_arg (stmt, 4);
20307 436 : if (!tree_fits_uhwi_p (arg4))
20308 : break;
20309 424 : if (tree_to_uhwi (arg4) == 4)
20310 : /* Ok. */;
20311 416 : else if (tree_to_uhwi (arg4) != 8)
20312 : /* Invalid round argument. */
20313 : break;
20314 416 : else if (HONOR_NANS (arg0))
20315 : /* Lowering to comparison would raise exceptions which
20316 : shouldn't be raised. */
20317 : break;
20318 : }
20319 219 : {
20320 219 : tree type = truth_type_for (TREE_TYPE (arg0));
20321 219 : tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
20322 219 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20323 219 : g = gimple_build_assign (gimple_call_lhs (stmt),
20324 : VEC_COND_EXPR, cmpres, arg0, arg1);
20325 219 : gsi_replace (gsi, g, false);
20326 : }
20327 219 : return true;
20328 :
20329 : default:
20330 : break;
20331 : }
20332 :
20333 : return false;
20334 : }
20335 :
20336 : /* Handler for an SVML-style interface to
20337 : a library with vectorized intrinsics. */
20338 :
20339 : tree
20340 10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
20341 : {
20342 10 : char name[20];
20343 10 : tree fntype, new_fndecl, args;
20344 10 : unsigned arity;
20345 10 : const char *bname;
20346 10 : machine_mode el_mode, in_mode;
20347 10 : int n, in_n;
20348 :
20349 : /* The SVML is suitable for unsafe math only. */
20350 10 : if (!flag_unsafe_math_optimizations)
20351 : return NULL_TREE;
20352 :
20353 10 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20354 10 : n = TYPE_VECTOR_SUBPARTS (type_out);
20355 10 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20356 10 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20357 10 : if (el_mode != in_mode
20358 10 : || n != in_n)
20359 : return NULL_TREE;
20360 :
20361 10 : switch (fn)
20362 : {
20363 10 : CASE_CFN_EXP:
20364 10 : CASE_CFN_LOG:
20365 10 : CASE_CFN_LOG10:
20366 10 : CASE_CFN_POW:
20367 10 : CASE_CFN_TANH:
20368 10 : CASE_CFN_TAN:
20369 10 : CASE_CFN_ATAN:
20370 10 : CASE_CFN_ATAN2:
20371 10 : CASE_CFN_ATANH:
20372 10 : CASE_CFN_CBRT:
20373 10 : CASE_CFN_SINH:
20374 10 : CASE_CFN_SIN:
20375 10 : CASE_CFN_ASINH:
20376 10 : CASE_CFN_ASIN:
20377 10 : CASE_CFN_COSH:
20378 10 : CASE_CFN_COS:
20379 10 : CASE_CFN_ACOSH:
20380 10 : CASE_CFN_ACOS:
20381 10 : if ((el_mode != DFmode || n != 2)
20382 8 : && (el_mode != SFmode || n != 4))
20383 : return NULL_TREE;
20384 6 : break;
20385 :
20386 : default:
20387 : return NULL_TREE;
20388 : }
20389 :
20390 6 : tree fndecl = mathfn_built_in (el_mode == DFmode
20391 : ? double_type_node : float_type_node, fn);
20392 6 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20393 :
20394 6 : if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
20395 2 : strcpy (name, "vmlsLn4");
20396 4 : else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
20397 0 : strcpy (name, "vmldLn2");
20398 4 : else if (n == 4)
20399 : {
20400 2 : sprintf (name, "vmls%s", bname+10);
20401 2 : name[strlen (name)-1] = '4';
20402 : }
20403 : else
20404 2 : sprintf (name, "vmld%s2", bname+10);
20405 :
20406 : /* Convert to uppercase. */
20407 6 : name[4] &= ~0x20;
20408 :
20409 6 : arity = 0;
20410 6 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20411 0 : arity++;
20412 :
20413 6 : if (arity == 1)
20414 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20415 : else
20416 6 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20417 :
20418 : /* Build a function declaration for the vectorized function. */
20419 6 : new_fndecl = build_decl (BUILTINS_LOCATION,
20420 : FUNCTION_DECL, get_identifier (name), fntype);
20421 6 : TREE_PUBLIC (new_fndecl) = 1;
20422 6 : DECL_EXTERNAL (new_fndecl) = 1;
20423 6 : DECL_IS_NOVOPS (new_fndecl) = 1;
20424 6 : TREE_READONLY (new_fndecl) = 1;
20425 :
20426 6 : return new_fndecl;
20427 : }
20428 :
20429 : /* Handler for an ACML-style interface to
20430 : a library with vectorized intrinsics. */
20431 :
20432 : tree
20433 3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20434 : {
20435 3 : char name[20] = "__vr.._";
20436 3 : tree fntype, new_fndecl, args;
20437 3 : unsigned arity;
20438 3 : const char *bname;
20439 3 : machine_mode el_mode, in_mode;
20440 3 : int n, in_n;
20441 :
20442 : /* The ACML is 64bits only and suitable for unsafe math only as
20443 : it does not correctly support parts of IEEE with the required
20444 : precision such as denormals. */
20445 3 : if (!TARGET_64BIT
20446 3 : || !flag_unsafe_math_optimizations)
20447 : return NULL_TREE;
20448 :
20449 3 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20450 3 : n = TYPE_VECTOR_SUBPARTS (type_out);
20451 3 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20452 3 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20453 3 : if (el_mode != in_mode
20454 3 : || n != in_n)
20455 : return NULL_TREE;
20456 :
20457 3 : switch (fn)
20458 : {
20459 3 : CASE_CFN_SIN:
20460 3 : CASE_CFN_COS:
20461 3 : CASE_CFN_EXP:
20462 3 : CASE_CFN_LOG:
20463 3 : CASE_CFN_LOG2:
20464 3 : CASE_CFN_LOG10:
20465 3 : if (el_mode == DFmode && n == 2)
20466 : {
20467 3 : name[4] = 'd';
20468 3 : name[5] = '2';
20469 : }
20470 0 : else if (el_mode == SFmode && n == 4)
20471 : {
20472 0 : name[4] = 's';
20473 0 : name[5] = '4';
20474 : }
20475 : else
20476 : return NULL_TREE;
20477 3 : break;
20478 :
20479 : default:
20480 : return NULL_TREE;
20481 : }
20482 :
20483 3 : tree fndecl = mathfn_built_in (el_mode == DFmode
20484 : ? double_type_node : float_type_node, fn);
20485 3 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20486 3 : sprintf (name + 7, "%s", bname+10);
20487 :
20488 3 : arity = 0;
20489 3 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20490 0 : arity++;
20491 :
20492 3 : if (arity == 1)
20493 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20494 : else
20495 3 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20496 :
20497 : /* Build a function declaration for the vectorized function. */
20498 3 : new_fndecl = build_decl (BUILTINS_LOCATION,
20499 : FUNCTION_DECL, get_identifier (name), fntype);
20500 3 : TREE_PUBLIC (new_fndecl) = 1;
20501 3 : DECL_EXTERNAL (new_fndecl) = 1;
20502 3 : DECL_IS_NOVOPS (new_fndecl) = 1;
20503 3 : TREE_READONLY (new_fndecl) = 1;
20504 :
20505 3 : return new_fndecl;
20506 : }
20507 :
20508 : /* Handler for an AOCL-LibM-style interface to
20509 : a library with vectorized intrinsics. */
20510 :
20511 : tree
20512 220 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20513 : {
20514 220 : char name[20] = "amd_vr";
20515 220 : int name_len = 6;
20516 220 : tree fntype, new_fndecl, args;
20517 220 : unsigned arity;
20518 220 : const char *bname;
20519 220 : machine_mode el_mode, in_mode;
20520 220 : int n, in_n;
20521 :
20522 : /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20523 : as it trades off some accuracy for increased performance. */
20524 220 : if (!TARGET_64BIT
20525 220 : || !flag_unsafe_math_optimizations)
20526 : return NULL_TREE;
20527 :
20528 220 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20529 220 : n = TYPE_VECTOR_SUBPARTS (type_out);
20530 220 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20531 220 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20532 220 : if (el_mode != in_mode
20533 220 : || n != in_n)
20534 : return NULL_TREE;
20535 :
20536 220 : gcc_checking_assert (n > 0);
20537 :
20538 : /* Decide whether there exists a function for the combination of FN, the mode
20539 : and the vector width. Return early if it doesn't. */
20540 :
20541 220 : if (el_mode != DFmode && el_mode != SFmode)
20542 : return NULL_TREE;
20543 :
20544 : /* Supported vector widths for given FN and single/double precision. Zeros
20545 : are used to fill out unused positions in the arrays. */
20546 220 : static const int supported_n[][2][3] = {
20547 : /* Single prec. , Double prec. */
20548 : { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20549 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20550 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20551 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20552 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20553 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20554 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20555 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20556 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20557 : { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20558 : { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20559 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20560 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20561 : { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20562 : { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20563 : { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20564 : { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20565 : { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20566 : };
20567 :
20568 : /* We cannot simply index the supported_n array with FN since multiple FNs
20569 : may correspond to a single operation (see the definitions of these
20570 : CASE_CFN_* macros). */
20571 220 : int i;
20572 220 : switch (fn)
20573 : {
20574 : CASE_CFN_TAN : i = 0; break;
20575 16 : CASE_CFN_EXP : i = 1; break;
20576 16 : CASE_CFN_EXP2 : i = 2; break;
20577 16 : CASE_CFN_LOG : i = 3; break;
20578 16 : CASE_CFN_LOG2 : i = 4; break;
20579 16 : CASE_CFN_COS : i = 5; break;
20580 16 : CASE_CFN_SIN : i = 6; break;
20581 16 : CASE_CFN_POW : i = 7; break;
20582 16 : CASE_CFN_ERF : i = 8; break;
20583 13 : CASE_CFN_ATAN : i = 9; break;
20584 11 : CASE_CFN_LOG10 : i = 10; break;
20585 8 : CASE_CFN_EXP10 : i = 11; break;
20586 8 : CASE_CFN_LOG1P : i = 12; break;
20587 11 : CASE_CFN_ASIN : i = 13; break;
20588 7 : CASE_CFN_ACOS : i = 14; break;
20589 9 : CASE_CFN_TANH : i = 15; break;
20590 7 : CASE_CFN_EXPM1 : i = 16; break;
20591 9 : CASE_CFN_COSH : i = 17; break;
20592 : default: return NULL_TREE;
20593 : }
20594 :
20595 220 : int j = el_mode == DFmode;
20596 220 : bool n_is_supported = false;
20597 489 : for (unsigned k = 0; k < 3; k++)
20598 470 : if (supported_n[i][j][k] == n)
20599 : {
20600 : n_is_supported = true;
20601 : break;
20602 : }
20603 220 : if (!n_is_supported)
20604 : return NULL_TREE;
20605 :
20606 : /* Append the precision and the vector width to the function name we are
20607 : constructing. */
20608 201 : name[name_len++] = el_mode == DFmode ? 'd' : 's';
20609 201 : switch (n)
20610 : {
20611 148 : case 2:
20612 148 : case 4:
20613 148 : case 8:
20614 148 : name[name_len++] = '0' + n;
20615 148 : break;
20616 53 : case 16:
20617 53 : name[name_len++] = '1';
20618 53 : name[name_len++] = '6';
20619 53 : break;
20620 0 : default:
20621 0 : gcc_unreachable ();
20622 : }
20623 201 : name[name_len++] = '_';
20624 :
20625 : /* Append the operation name (steal it from the name of a builtin). */
20626 201 : tree fndecl = mathfn_built_in (el_mode == DFmode
20627 : ? double_type_node : float_type_node, fn);
20628 201 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20629 201 : sprintf (name + name_len, "%s", bname + 10);
20630 :
20631 201 : arity = 0;
20632 201 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20633 0 : arity++;
20634 :
20635 201 : if (arity == 1)
20636 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20637 : else
20638 201 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20639 :
20640 : /* Build a function declaration for the vectorized function. */
20641 201 : new_fndecl = build_decl (BUILTINS_LOCATION,
20642 : FUNCTION_DECL, get_identifier (name), fntype);
20643 201 : TREE_PUBLIC (new_fndecl) = 1;
20644 201 : DECL_EXTERNAL (new_fndecl) = 1;
20645 201 : TREE_READONLY (new_fndecl) = 1;
20646 :
20647 201 : return new_fndecl;
20648 : }
20649 :
20650 : /* Returns a decl of a function that implements scatter store with
20651 : register type VECTYPE and index type INDEX_TYPE and SCALE.
20652 : Return NULL_TREE if it is not available. */
20653 :
20654 : static tree
20655 127955 : ix86_vectorize_builtin_scatter (const_tree vectype,
20656 : const_tree index_type, int scale)
20657 : {
20658 127955 : bool si;
20659 127955 : enum ix86_builtins code;
20660 :
20661 127955 : if (!TARGET_AVX512F)
20662 : return NULL_TREE;
20663 :
20664 3207 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20665 5760 : ? !TARGET_USE_SCATTER_2PARTS
20666 5760 : : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20667 2553 : ? !TARGET_USE_SCATTER_4PARTS
20668 1773 : : !TARGET_USE_SCATTER_8PARTS))
20669 : return NULL_TREE;
20670 :
20671 3207 : if ((TREE_CODE (index_type) != INTEGER_TYPE
20672 463 : && !POINTER_TYPE_P (index_type))
20673 3670 : || (TYPE_MODE (index_type) != SImode
20674 1392 : && TYPE_MODE (index_type) != DImode))
20675 0 : return NULL_TREE;
20676 :
20677 3399 : if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20678 : return NULL_TREE;
20679 :
20680 : /* v*scatter* insn sign extends index to pointer mode. */
20681 3207 : if (TYPE_PRECISION (index_type) < POINTER_SIZE
20682 3207 : && TYPE_UNSIGNED (index_type))
20683 : return NULL_TREE;
20684 :
20685 : /* Scale can be 1, 2, 4 or 8. */
20686 3207 : if (scale <= 0
20687 3207 : || scale > 8
20688 3193 : || (scale & (scale - 1)) != 0)
20689 : return NULL_TREE;
20690 :
20691 3193 : si = TYPE_MODE (index_type) == SImode;
20692 3193 : switch (TYPE_MODE (vectype))
20693 : {
20694 169 : case E_V8DFmode:
20695 169 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20696 : break;
20697 104 : case E_V8DImode:
20698 104 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20699 : break;
20700 177 : case E_V16SFmode:
20701 177 : code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20702 : break;
20703 257 : case E_V16SImode:
20704 257 : code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20705 : break;
20706 151 : case E_V4DFmode:
20707 151 : if (TARGET_AVX512VL)
20708 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20709 : else
20710 : return NULL_TREE;
20711 : break;
20712 115 : case E_V4DImode:
20713 115 : if (TARGET_AVX512VL)
20714 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20715 : else
20716 : return NULL_TREE;
20717 : break;
20718 132 : case E_V8SFmode:
20719 132 : if (TARGET_AVX512VL)
20720 40 : code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20721 : else
20722 : return NULL_TREE;
20723 : break;
20724 202 : case E_V8SImode:
20725 202 : if (TARGET_AVX512VL)
20726 82 : code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20727 : else
20728 : return NULL_TREE;
20729 : break;
20730 171 : case E_V2DFmode:
20731 171 : if (TARGET_AVX512VL)
20732 66 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20733 : else
20734 : return NULL_TREE;
20735 : break;
20736 141 : case E_V2DImode:
20737 141 : if (TARGET_AVX512VL)
20738 66 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20739 : else
20740 : return NULL_TREE;
20741 : break;
20742 156 : case E_V4SFmode:
20743 156 : if (TARGET_AVX512VL)
20744 68 : code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20745 : else
20746 : return NULL_TREE;
20747 : break;
20748 226 : case E_V4SImode:
20749 226 : if (TARGET_AVX512VL)
20750 110 : code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20751 : else
20752 : return NULL_TREE;
20753 : break;
20754 : default:
20755 : return NULL_TREE;
20756 : }
20757 :
20758 1207 : return get_ix86_builtin (code);
20759 : }
20760 :
20761 : /* Return true if it is safe to use the rsqrt optabs to optimize
20762 : 1.0/sqrt. */
20763 :
20764 : static bool
20765 84 : use_rsqrt_p (machine_mode mode)
20766 : {
20767 84 : return ((mode == HFmode
20768 36 : || (TARGET_SSE && TARGET_SSE_MATH))
20769 84 : && flag_finite_math_only
20770 83 : && !flag_trapping_math
20771 149 : && flag_unsafe_math_optimizations);
20772 : }
20773 :
20774 : /* Helper for avx_vpermilps256_operand et al. This is also used by
20775 : the expansion functions to turn the parallel back into a mask.
20776 : The return value is 0 for no match and the imm8+1 for a match. */
20777 :
20778 : int
20779 64736 : avx_vpermilp_parallel (rtx par, machine_mode mode)
20780 : {
20781 64736 : unsigned i, nelt = GET_MODE_NUNITS (mode);
20782 64736 : unsigned mask = 0;
20783 64736 : unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20784 :
20785 64736 : if (XVECLEN (par, 0) != (int) nelt)
20786 : return 0;
20787 :
20788 : /* Validate that all of the elements are constants, and not totally
20789 : out of range. Copy the data into an integral array to make the
20790 : subsequent checks easier. */
20791 323884 : for (i = 0; i < nelt; ++i)
20792 : {
20793 259148 : rtx er = XVECEXP (par, 0, i);
20794 259148 : unsigned HOST_WIDE_INT ei;
20795 :
20796 259148 : if (!CONST_INT_P (er))
20797 : return 0;
20798 259148 : ei = INTVAL (er);
20799 259148 : if (ei >= nelt)
20800 : return 0;
20801 259148 : ipar[i] = ei;
20802 : }
20803 :
20804 64736 : switch (mode)
20805 : {
20806 : case E_V8DFmode:
20807 : case E_V8DImode:
20808 : /* In the 512-bit DFmode case, we can only move elements within
20809 : a 128-bit lane. First fill the second part of the mask,
20810 : then fallthru. */
20811 4945 : for (i = 4; i < 6; ++i)
20812 : {
20813 3427 : if (!IN_RANGE (ipar[i], 4, 5))
20814 : return 0;
20815 3202 : mask |= (ipar[i] - 4) << i;
20816 : }
20817 3702 : for (i = 6; i < 8; ++i)
20818 : {
20819 2610 : if (!IN_RANGE (ipar[i], 6, 7))
20820 : return 0;
20821 2184 : mask |= (ipar[i] - 6) << i;
20822 : }
20823 : /* FALLTHRU */
20824 :
20825 : case E_V4DFmode:
20826 : case E_V4DImode:
20827 : /* In the 256-bit DFmode case, we can only move elements within
20828 : a 128-bit lane. */
20829 48137 : for (i = 0; i < 2; ++i)
20830 : {
20831 40262 : if (!IN_RANGE (ipar[i], 0, 1))
20832 : return 0;
20833 27160 : mask |= ipar[i] << i;
20834 : }
20835 20927 : for (i = 2; i < 4; ++i)
20836 : {
20837 14406 : if (!IN_RANGE (ipar[i], 2, 3))
20838 : return 0;
20839 13052 : mask |= (ipar[i] - 2) << i;
20840 : }
20841 : break;
20842 :
20843 : case E_V16SFmode:
20844 : case E_V16SImode:
20845 : /* In 512 bit SFmode case, permutation in the upper 256 bits
20846 : must mirror the permutation in the lower 256-bits. */
20847 4398 : for (i = 0; i < 8; ++i)
20848 3918 : if (ipar[i] + 8 != ipar[i + 8])
20849 : return 0;
20850 : /* FALLTHRU */
20851 :
20852 : case E_V8SFmode:
20853 : case E_V8SImode:
20854 : /* In 256 bit SFmode case, we have full freedom of
20855 : movement within the low 128-bit lane, but the high 128-bit
20856 : lane must mirror the exact same pattern. */
20857 37835 : for (i = 0; i < 4; ++i)
20858 32127 : if (ipar[i] + 4 != ipar[i + 4])
20859 : return 0;
20860 : nelt = 4;
20861 : /* FALLTHRU */
20862 :
20863 37609 : case E_V2DFmode:
20864 37609 : case E_V2DImode:
20865 37609 : case E_V4SFmode:
20866 37609 : case E_V4SImode:
20867 : /* In the 128-bit case, we've full freedom in the placement of
20868 : the elements from the source operand. */
20869 132001 : for (i = 0; i < nelt; ++i)
20870 94392 : mask |= ipar[i] << (i * (nelt / 2));
20871 : break;
20872 :
20873 0 : default:
20874 0 : gcc_unreachable ();
20875 : }
20876 :
20877 : /* Make sure success has a non-zero value by adding one. */
20878 44130 : return mask + 1;
20879 : }
20880 :
20881 : /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20882 : the expansion functions to turn the parallel back into a mask.
20883 : The return value is 0 for no match and the imm8+1 for a match. */
20884 :
20885 : int
20886 50646 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
20887 : {
20888 50646 : unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20889 50646 : unsigned mask = 0;
20890 50646 : unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20891 :
20892 50646 : if (XVECLEN (par, 0) != (int) nelt)
20893 : return 0;
20894 :
20895 : /* Validate that all of the elements are constants, and not totally
20896 : out of range. Copy the data into an integral array to make the
20897 : subsequent checks easier. */
20898 404750 : for (i = 0; i < nelt; ++i)
20899 : {
20900 354104 : rtx er = XVECEXP (par, 0, i);
20901 354104 : unsigned HOST_WIDE_INT ei;
20902 :
20903 354104 : if (!CONST_INT_P (er))
20904 : return 0;
20905 354104 : ei = INTVAL (er);
20906 354104 : if (ei >= 2 * nelt)
20907 : return 0;
20908 354104 : ipar[i] = ei;
20909 : }
20910 :
20911 : /* Validate that the halves of the permute are halves. */
20912 98909 : for (i = 0; i < nelt2 - 1; ++i)
20913 79326 : if (ipar[i] + 1 != ipar[i + 1])
20914 : return 0;
20915 57980 : for (i = nelt2; i < nelt - 1; ++i)
20916 39803 : if (ipar[i] + 1 != ipar[i + 1])
20917 : return 0;
20918 :
20919 : /* Reconstruct the mask. */
20920 54443 : for (i = 0; i < 2; ++i)
20921 : {
20922 36312 : unsigned e = ipar[i * nelt2];
20923 36312 : if (e % nelt2)
20924 : return 0;
20925 36266 : e /= nelt2;
20926 36266 : mask |= e << (i * 4);
20927 : }
20928 :
20929 : /* Make sure success has a non-zero value by adding one. */
20930 18131 : return mask + 1;
20931 : }
20932 :
20933 : /* Return a mask of VPTERNLOG operands that do not affect output. */
20934 :
20935 : int
20936 2441 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
20937 : {
20938 2441 : int mask = 0;
20939 2441 : int imm8 = INTVAL (pternlog_imm);
20940 :
20941 2441 : if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20942 6 : mask |= 1;
20943 2441 : if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20944 6 : mask |= 2;
20945 2441 : if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20946 163 : mask |= 4;
20947 :
20948 2441 : return mask;
20949 : }
20950 :
20951 : /* Eliminate false dependencies on operands that do not affect output
20952 : by substituting other operands of a VPTERNLOG. */
20953 :
20954 : void
20955 85 : substitute_vpternlog_operands (rtx *operands)
20956 : {
20957 85 : int mask = vpternlog_redundant_operand_mask (operands[4]);
20958 :
20959 85 : if (mask & 1) /* The first operand is redundant. */
20960 2 : operands[1] = operands[2];
20961 :
20962 85 : if (mask & 2) /* The second operand is redundant. */
20963 2 : operands[2] = operands[1];
20964 :
20965 85 : if (mask & 4) /* The third operand is redundant. */
20966 81 : operands[3] = operands[1];
20967 4 : else if (REG_P (operands[3]))
20968 : {
20969 0 : if (mask & 1)
20970 0 : operands[1] = operands[3];
20971 0 : if (mask & 2)
20972 0 : operands[2] = operands[3];
20973 : }
20974 85 : }
20975 :
20976 : /* Return a register priority for hard reg REGNO. */
20977 : static int
20978 58017026 : ix86_register_priority (int hard_regno)
20979 : {
20980 : /* ebp and r13 as the base always wants a displacement, r12 as the
20981 : base always wants an index. So discourage their usage in an
20982 : address. */
20983 58017026 : if (hard_regno == R12_REG || hard_regno == R13_REG)
20984 : return 0;
20985 53596806 : if (hard_regno == BP_REG)
20986 : return 1;
20987 : /* New x86-64 int registers result in bigger code size. Discourage them. */
20988 51622791 : if (REX_INT_REGNO_P (hard_regno))
20989 : return 2;
20990 35231263 : if (REX2_INT_REGNO_P (hard_regno))
20991 : return 2;
20992 : /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20993 35228823 : if (REX_SSE_REGNO_P (hard_regno))
20994 : return 2;
20995 29146013 : if (EXT_REX_SSE_REGNO_P (hard_regno))
20996 : return 1;
20997 : /* Usage of AX register results in smaller code. Prefer it. */
20998 28867813 : if (hard_regno == AX_REG)
20999 3776138 : return 4;
21000 : return 3;
21001 : }
21002 :
21003 : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
21004 :
21005 : Put float CONST_DOUBLE in the constant pool instead of fp regs.
21006 : QImode must go into class Q_REGS.
21007 : Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21008 : movdf to do mem-to-mem moves through integer regs. */
21009 :
21010 : static reg_class_t
21011 547161196 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
21012 : {
21013 547161196 : machine_mode mode = GET_MODE (x);
21014 :
21015 : /* We're only allowed to return a subclass of CLASS. Many of the
21016 : following checks fail for NO_REGS, so eliminate that early. */
21017 547161196 : if (regclass == NO_REGS)
21018 : return NO_REGS;
21019 :
21020 : /* All classes can load zeros. */
21021 546305686 : if (x == CONST0_RTX (mode))
21022 : return regclass;
21023 :
21024 : /* Force constants into memory if we are loading a (nonzero) constant into
21025 : an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
21026 : instructions to load from a constant. */
21027 521419865 : if (CONSTANT_P (x)
21028 521419865 : && (MAYBE_MMX_CLASS_P (regclass)
21029 151790656 : || MAYBE_SSE_CLASS_P (regclass)
21030 121944000 : || MAYBE_MASK_CLASS_P (regclass)))
21031 29978025 : return NO_REGS;
21032 :
21033 : /* Floating-point constants need more complex checks. */
21034 491441840 : if (CONST_DOUBLE_P (x))
21035 : {
21036 : /* General regs can load everything. */
21037 302444 : if (INTEGER_CLASS_P (regclass))
21038 : return regclass;
21039 :
21040 : /* Floats can load 0 and 1 plus some others. Note that we eliminated
21041 : zero above. We only want to wind up preferring 80387 registers if
21042 : we plan on doing computation with them. */
21043 179609 : if (IS_STACK_MODE (mode)
21044 237835 : && standard_80387_constant_p (x) > 0)
21045 : {
21046 : /* Limit class to FP regs. */
21047 40456 : if (FLOAT_CLASS_P (regclass))
21048 : return FLOAT_REGS;
21049 : }
21050 :
21051 139153 : return NO_REGS;
21052 : }
21053 :
21054 : /* Prefer SSE if we can use them for math. Also allow integer regs
21055 : when moves between register units are cheap. */
21056 491139396 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21057 : {
21058 31056765 : if (TARGET_INTER_UNIT_MOVES_FROM_VEC
21059 31041852 : && TARGET_INTER_UNIT_MOVES_TO_VEC
21060 93130957 : && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
21061 30890491 : return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21062 : else
21063 166274 : return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21064 : }
21065 :
21066 : /* Generally when we see PLUS here, it's the function invariant
21067 : (plus soft-fp const_int). Which can only be computed into general
21068 : regs. */
21069 460082631 : if (GET_CODE (x) == PLUS)
21070 1885460 : return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
21071 :
21072 : /* QImode constants are easy to load, but non-constant QImode data
21073 : must go into Q_REGS or ALL_MASK_REGS. */
21074 458197171 : if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21075 : {
21076 24799588 : if (Q_CLASS_P (regclass))
21077 : return regclass;
21078 20044440 : else if (reg_class_subset_p (Q_REGS, regclass))
21079 : return Q_REGS;
21080 54851 : else if (MASK_CLASS_P (regclass))
21081 : return regclass;
21082 : else
21083 : return NO_REGS;
21084 : }
21085 :
21086 : return regclass;
21087 : }
21088 :
21089 : /* Discourage putting floating-point values in SSE registers unless
21090 : SSE math is being used, and likewise for the 387 registers. */
21091 : static reg_class_t
21092 74413340 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
21093 : {
21094 : /* Restrict the output reload class to the register bank that we are doing
21095 : math on. If we would like not to return a subset of CLASS, reject this
21096 : alternative: if reload cannot do this, it will still use its choice. */
21097 74413340 : machine_mode mode = GET_MODE (x);
21098 74413340 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21099 7210118 : return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
21100 :
21101 67203222 : if (IS_STACK_MODE (mode))
21102 209534 : return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21103 :
21104 : return regclass;
21105 : }
21106 :
21107 : static reg_class_t
21108 385974437 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
21109 : machine_mode mode, secondary_reload_info *sri)
21110 : {
21111 : /* Double-word spills from general registers to non-offsettable memory
21112 : references (zero-extended addresses) require special handling. */
21113 385974437 : if (TARGET_64BIT
21114 332924926 : && MEM_P (x)
21115 181286047 : && GET_MODE_SIZE (mode) > UNITS_PER_WORD
21116 19175809 : && INTEGER_CLASS_P (rclass)
21117 388739780 : && !offsettable_memref_p (x))
21118 : {
21119 2567866 : sri->icode = (in_p
21120 1283933 : ? CODE_FOR_reload_noff_load
21121 : : CODE_FOR_reload_noff_store);
21122 : /* Add the cost of moving address to a temporary. */
21123 1283933 : sri->extra_cost = 1;
21124 :
21125 1283933 : return NO_REGS;
21126 : }
21127 :
21128 : /* QImode spills from non-QI registers require
21129 : intermediate register on 32bit targets. */
21130 384690504 : if (mode == QImode
21131 384690504 : && ((!TARGET_64BIT && !in_p
21132 589586 : && INTEGER_CLASS_P (rclass)
21133 589542 : && MAYBE_NON_Q_CLASS_P (rclass))
21134 22234024 : || (!TARGET_AVX512DQ
21135 22035069 : && MAYBE_MASK_CLASS_P (rclass))))
21136 : {
21137 6560 : int regno = true_regnum (x);
21138 :
21139 : /* Return Q_REGS if the operand is in memory. */
21140 6560 : if (regno == -1)
21141 : return Q_REGS;
21142 :
21143 : return NO_REGS;
21144 : }
21145 :
21146 : /* Require movement to gpr, and then store to memory. */
21147 384683944 : if ((mode == HFmode || mode == HImode || mode == V2QImode
21148 : || mode == BFmode)
21149 3877606 : && !TARGET_SSE4_1
21150 3275361 : && SSE_CLASS_P (rclass)
21151 224056 : && !in_p && MEM_P (x))
21152 : {
21153 114616 : sri->extra_cost = 1;
21154 114616 : return GENERAL_REGS;
21155 : }
21156 :
21157 : /* This condition handles corner case where an expression involving
21158 : pointers gets vectorized. We're trying to use the address of a
21159 : stack slot as a vector initializer.
21160 :
21161 : (set (reg:V2DI 74 [ vect_cst_.2 ])
21162 : (vec_duplicate:V2DI (reg/f:DI 20 frame)))
21163 :
21164 : Eventually frame gets turned into sp+offset like this:
21165 :
21166 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21167 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21168 : (const_int 392 [0x188]))))
21169 :
21170 : That later gets turned into:
21171 :
21172 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21173 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21174 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
21175 :
21176 : We'll have the following reload recorded:
21177 :
21178 : Reload 0: reload_in (DI) =
21179 : (plus:DI (reg/f:DI 7 sp)
21180 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
21181 : reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21182 : SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
21183 : reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
21184 : reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21185 : reload_reg_rtx: (reg:V2DI 22 xmm1)
21186 :
21187 : Which isn't going to work since SSE instructions can't handle scalar
21188 : additions. Returning GENERAL_REGS forces the addition into integer
21189 : register and reload can handle subsequent reloads without problems. */
21190 :
21191 221412140 : if (in_p && GET_CODE (x) == PLUS
21192 2 : && SSE_CLASS_P (rclass)
21193 384569328 : && SCALAR_INT_MODE_P (mode))
21194 : return GENERAL_REGS;
21195 :
21196 : return NO_REGS;
21197 : }
21198 :
21199 : /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
21200 :
21201 : static bool
21202 716056063 : ix86_class_likely_spilled_p (reg_class_t rclass)
21203 : {
21204 705980310 : switch (rclass)
21205 : {
21206 : case AREG:
21207 : case DREG:
21208 : case CREG:
21209 : case BREG:
21210 : case AD_REGS:
21211 : case SIREG:
21212 : case DIREG:
21213 : case SSE_FIRST_REG:
21214 : case FP_TOP_REG:
21215 : case FP_SECOND_REG:
21216 : return true;
21217 :
21218 684500871 : default:
21219 684500871 : break;
21220 : }
21221 :
21222 684500871 : return false;
21223 : }
21224 :
21225 : /* Implement TARGET_CALLEE_SAVE_COST. */
21226 :
21227 : static int
21228 81614046 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
21229 : unsigned int, int mem_cost, const HARD_REG_SET &, bool)
21230 : {
21231 : /* Account for the fact that push and pop are shorter and do their
21232 : own allocation and deallocation. */
21233 81614046 : if (GENERAL_REGNO_P (hard_regno))
21234 : {
21235 : /* push is 1 byte while typical spill is 4-5 bytes.
21236 : ??? We probably should adjust size costs accordingly.
21237 : Costs are relative to reg-reg move that has 2 bytes for 32bit
21238 : and 3 bytes otherwise. Be sure that no cost table sets cost
21239 : to 2, so we end up with 0. */
21240 81604828 : if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
21241 3602502 : return 1;
21242 78002326 : return mem_cost - 2;
21243 : }
21244 : return mem_cost;
21245 : }
21246 :
21247 : /* Return true if a set of DST by the expression SRC should be allowed.
21248 : This prevents complex sets of likely_spilled hard regs before split1. */
21249 :
21250 : bool
21251 629510621 : ix86_hardreg_mov_ok (rtx dst, rtx src)
21252 : {
21253 : /* Avoid complex sets of likely_spilled hard registers before reload. */
21254 512760979 : if (REG_P (dst) && HARD_REGISTER_P (dst)
21255 303162292 : && !REG_P (src) && !MEM_P (src)
21256 95250163 : && !(VECTOR_MODE_P (GET_MODE (dst))
21257 95250163 : ? standard_sse_constant_p (src, GET_MODE (dst))
21258 47389563 : : x86_64_immediate_operand (src, GET_MODE (dst)))
21259 10075753 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
21260 638361985 : && ix86_pre_reload_split ())
21261 : return false;
21262 : return true;
21263 : }
21264 :
21265 : /* If we are copying between registers from different register sets
21266 : (e.g. FP and integer), we may need a memory location.
21267 :
21268 : The function can't work reliably when one of the CLASSES is a class
21269 : containing registers from multiple sets. We avoid this by never combining
21270 : different sets in a single alternative in the machine description.
21271 : Ensure that this constraint holds to avoid unexpected surprises.
21272 :
21273 : When STRICT is false, we are being called from REGISTER_MOVE_COST,
21274 : so do not enforce these sanity checks.
21275 :
21276 : To optimize register_move_cost performance, define inline variant. */
21277 :
21278 : static inline bool
21279 5673301605 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21280 : reg_class_t class2, int strict)
21281 : {
21282 5673301605 : if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
21283 : return false;
21284 :
21285 5640930102 : if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21286 4807022566 : || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21287 4104915351 : || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21288 3916510533 : || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21289 3738211332 : || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21290 3738211332 : || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
21291 3738211332 : || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
21292 9209236946 : || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
21293 : {
21294 2234523297 : gcc_assert (!strict || lra_in_progress);
21295 : return true;
21296 : }
21297 :
21298 3406406805 : if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21299 : return true;
21300 :
21301 : /* ??? This is a lie. We do have moves between mmx/general, and for
21302 : mmx/sse2. But by saying we need secondary memory we discourage the
21303 : register allocator from using the mmx registers unless needed. */
21304 3257469294 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21305 : return true;
21306 :
21307 : /* Between mask and general, we have moves no larger than word size. */
21308 3161333296 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21309 : {
21310 2608754 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
21311 3410587 : || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21312 192332 : return true;
21313 : }
21314 :
21315 3161140964 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21316 : {
21317 : /* SSE1 doesn't have any direct moves from other classes. */
21318 686719224 : if (!TARGET_SSE2)
21319 : return true;
21320 :
21321 684105540 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
21322 : return true;
21323 :
21324 : /* If the target says that inter-unit moves are more expensive
21325 : than moving through memory, then don't generate them. */
21326 1025710655 : if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
21327 1025224875 : || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
21328 1320929 : return true;
21329 :
21330 : /* With SSE4.1, *mov{ti,di}_internal supports moves between
21331 : SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
21332 682784611 : if (TARGET_SSE4_1
21333 36618779 : && (TARGET_64BIT ? mode == TImode : mode == DImode))
21334 : return false;
21335 :
21336 681197239 : int msize = GET_MODE_SIZE (mode);
21337 :
21338 : /* Between SSE and general, we have moves no larger than word size. */
21339 697582142 : if (msize > UNITS_PER_WORD)
21340 : return true;
21341 :
21342 : /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
21343 : Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
21344 589217864 : int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
21345 :
21346 589217864 : if (msize < minsize)
21347 : return true;
21348 : }
21349 :
21350 : return false;
21351 : }
21352 :
21353 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21354 :
21355 : static bool
21356 70966917 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21357 : reg_class_t class2)
21358 : {
21359 70966917 : return inline_secondary_memory_needed (mode, class1, class2, true);
21360 : }
21361 :
21362 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21363 :
21364 : get_secondary_mem widens integral modes to BITS_PER_WORD.
21365 : There is no need to emit full 64 bit move on 64 bit targets
21366 : for integral modes that can be moved using 32 bit move. */
21367 :
21368 : static machine_mode
21369 13178 : ix86_secondary_memory_needed_mode (machine_mode mode)
21370 : {
21371 26356 : if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21372 19 : return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21373 : return mode;
21374 : }
21375 :
21376 : /* Implement the TARGET_CLASS_MAX_NREGS hook.
21377 :
21378 : On the 80386, this is the size of MODE in words,
21379 : except in the FP regs, where a single reg is always enough. */
21380 :
21381 : static unsigned char
21382 5942220194 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21383 : {
21384 5942220194 : if (MAYBE_INTEGER_CLASS_P (rclass))
21385 : {
21386 3997427355 : if (mode == XFmode)
21387 145675691 : return (TARGET_64BIT ? 2 : 3);
21388 3851751664 : else if (mode == XCmode)
21389 145675322 : return (TARGET_64BIT ? 4 : 6);
21390 : else
21391 7518172516 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21392 : }
21393 : else
21394 : {
21395 1944792839 : if (COMPLEX_MODE_P (mode))
21396 : return 2;
21397 : else
21398 1660986378 : return 1;
21399 : }
21400 : }
21401 :
21402 : /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21403 :
21404 : static bool
21405 39492967 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
21406 : reg_class_t regclass)
21407 : {
21408 39492967 : if (from == to)
21409 : return true;
21410 :
21411 : /* x87 registers can't do subreg at all, as all values are reformatted
21412 : to extended precision.
21413 :
21414 : ??? middle-end queries mode changes for ALL_REGS and this makes
21415 : vec_series_lowpart_p to always return false. We probably should
21416 : restrict this to modes supported by i387 and check if it is enabled. */
21417 38094437 : if (MAYBE_FLOAT_CLASS_P (regclass))
21418 : return false;
21419 :
21420 33425905 : if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21421 : {
21422 : /* Vector registers do not support QI or HImode loads. If we don't
21423 : disallow a change to these modes, reload will assume it's ok to
21424 : drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21425 : the vec_dupv4hi pattern.
21426 : NB: SSE2 can load 16bit data to sse register via pinsrw. */
21427 16252196 : int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21428 16252196 : if (GET_MODE_SIZE (from) < mov_size
21429 32504112 : || GET_MODE_SIZE (to) < mov_size)
21430 : return false;
21431 : }
21432 :
21433 : return true;
21434 : }
21435 :
21436 : /* Return index of MODE in the sse load/store tables. */
21437 :
21438 : static inline int
21439 772271204 : sse_store_index (machine_mode mode)
21440 : {
21441 : /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21442 : costs to processor_costs, which requires changes to all entries in
21443 : processor cost table. */
21444 772271204 : if (mode == E_HFmode)
21445 136922112 : mode = E_SFmode;
21446 :
21447 1544542408 : switch (GET_MODE_SIZE (mode))
21448 : {
21449 : case 4:
21450 : return 0;
21451 : case 8:
21452 : return 1;
21453 : case 16:
21454 : return 2;
21455 : case 32:
21456 : return 3;
21457 : case 64:
21458 : return 4;
21459 : default:
21460 : return -1;
21461 : }
21462 : }
21463 :
21464 : /* Return the cost of moving data of mode M between a
21465 : register and memory. A value of 2 is the default; this cost is
21466 : relative to those in `REGISTER_MOVE_COST'.
21467 :
21468 : This function is used extensively by register_move_cost that is used to
21469 : build tables at startup. Make it inline in this case.
21470 : When IN is 2, return maximum of in and out move cost.
21471 :
21472 : If moving between registers and memory is more expensive than
21473 : between two registers, you should define this macro to express the
21474 : relative cost.
21475 :
21476 : Model also increased moving costs of QImode registers in non
21477 : Q_REGS classes.
21478 : */
21479 : static inline int
21480 6904884914 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21481 : {
21482 6904884914 : int cost;
21483 :
21484 6904884914 : if (FLOAT_CLASS_P (regclass))
21485 : {
21486 352629704 : int index;
21487 352629704 : switch (mode)
21488 : {
21489 : case E_SFmode:
21490 : index = 0;
21491 : break;
21492 : case E_DFmode:
21493 : index = 1;
21494 : break;
21495 : case E_XFmode:
21496 : index = 2;
21497 : break;
21498 : default:
21499 : return 100;
21500 : }
21501 105406598 : if (in == 2)
21502 101473476 : return MAX (ix86_cost->hard_register.fp_load [index],
21503 : ix86_cost->hard_register.fp_store [index]);
21504 3933122 : return in ? ix86_cost->hard_register.fp_load [index]
21505 3933122 : : ix86_cost->hard_register.fp_store [index];
21506 : }
21507 6552255210 : if (SSE_CLASS_P (regclass))
21508 : {
21509 643880004 : int index = sse_store_index (mode);
21510 643880004 : if (index == -1)
21511 : return 100;
21512 559961169 : if (in == 2)
21513 396815180 : return MAX (ix86_cost->hard_register.sse_load [index],
21514 : ix86_cost->hard_register.sse_store [index]);
21515 163145989 : return in ? ix86_cost->hard_register.sse_load [index]
21516 163145989 : : ix86_cost->hard_register.sse_store [index];
21517 : }
21518 5908375206 : if (MASK_CLASS_P (regclass))
21519 : {
21520 108126550 : int index;
21521 216253100 : switch (GET_MODE_SIZE (mode))
21522 : {
21523 : case 1:
21524 : index = 0;
21525 : break;
21526 8921149 : case 2:
21527 8921149 : index = 1;
21528 8921149 : break;
21529 : /* DImode loads and stores assumed to cost the same as SImode. */
21530 40127153 : case 4:
21531 40127153 : case 8:
21532 40127153 : index = 2;
21533 40127153 : break;
21534 : default:
21535 : return 100;
21536 : }
21537 :
21538 52620886 : if (in == 2)
21539 584275 : return MAX (ix86_cost->hard_register.mask_load[index],
21540 : ix86_cost->hard_register.mask_store[index]);
21541 52036611 : return in ? ix86_cost->hard_register.mask_load[2]
21542 52036611 : : ix86_cost->hard_register.mask_store[2];
21543 : }
21544 5800248656 : if (MMX_CLASS_P (regclass))
21545 : {
21546 172171813 : int index;
21547 344343626 : switch (GET_MODE_SIZE (mode))
21548 : {
21549 : case 4:
21550 : index = 0;
21551 : break;
21552 101046497 : case 8:
21553 101046497 : index = 1;
21554 101046497 : break;
21555 : default:
21556 : return 100;
21557 : }
21558 138348049 : if (in == 2)
21559 118400679 : return MAX (ix86_cost->hard_register.mmx_load [index],
21560 : ix86_cost->hard_register.mmx_store [index]);
21561 19947370 : return in ? ix86_cost->hard_register.mmx_load [index]
21562 19947370 : : ix86_cost->hard_register.mmx_store [index];
21563 : }
21564 11256153686 : switch (GET_MODE_SIZE (mode))
21565 : {
21566 124585275 : case 1:
21567 124585275 : if (Q_CLASS_P (regclass) || TARGET_64BIT)
21568 : {
21569 121954562 : if (!in)
21570 19563556 : return ix86_cost->hard_register.int_store[0];
21571 102391006 : if (TARGET_PARTIAL_REG_DEPENDENCY
21572 102391006 : && optimize_function_for_speed_p (cfun))
21573 95490616 : cost = ix86_cost->hard_register.movzbl_load;
21574 : else
21575 6900390 : cost = ix86_cost->hard_register.int_load[0];
21576 102391006 : if (in == 2)
21577 82799870 : return MAX (cost, ix86_cost->hard_register.int_store[0]);
21578 : return cost;
21579 : }
21580 : else
21581 : {
21582 2630713 : if (in == 2)
21583 1863020 : return MAX (ix86_cost->hard_register.movzbl_load,
21584 : ix86_cost->hard_register.int_store[0] + 4);
21585 767693 : if (in)
21586 383901 : return ix86_cost->hard_register.movzbl_load;
21587 : else
21588 383792 : return ix86_cost->hard_register.int_store[0] + 4;
21589 : }
21590 643094145 : break;
21591 643094145 : case 2:
21592 643094145 : {
21593 643094145 : int cost;
21594 643094145 : if (in == 2)
21595 543336563 : cost = MAX (ix86_cost->hard_register.int_load[1],
21596 : ix86_cost->hard_register.int_store[1]);
21597 : else
21598 99757582 : cost = in ? ix86_cost->hard_register.int_load[1]
21599 : : ix86_cost->hard_register.int_store[1];
21600 :
21601 643094145 : if (mode == E_HFmode)
21602 : {
21603 : /* Prefer SSE over GPR for HFmode. */
21604 124616280 : int sse_cost;
21605 124616280 : int index = sse_store_index (mode);
21606 124616280 : if (in == 2)
21607 114642544 : sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21608 : ix86_cost->hard_register.sse_store[index]);
21609 : else
21610 19947472 : sse_cost = (in
21611 9973736 : ? ix86_cost->hard_register.sse_load [index]
21612 : : ix86_cost->hard_register.sse_store [index]);
21613 124616280 : if (sse_cost >= cost)
21614 124616280 : cost = sse_cost + 1;
21615 : }
21616 : return cost;
21617 : }
21618 4860397423 : default:
21619 4860397423 : if (in == 2)
21620 3762908338 : cost = MAX (ix86_cost->hard_register.int_load[2],
21621 : ix86_cost->hard_register.int_store[2]);
21622 1097489085 : else if (in)
21623 548937015 : cost = ix86_cost->hard_register.int_load[2];
21624 : else
21625 548552070 : cost = ix86_cost->hard_register.int_store[2];
21626 : /* Multiply with the number of GPR moves needed. */
21627 9839842331 : return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21628 : }
21629 : }
21630 :
21631 : static int
21632 1775099928 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21633 : {
21634 2662323276 : return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
21635 : }
21636 :
21637 :
21638 : /* Return the cost of moving data from a register in class CLASS1 to
21639 : one in class CLASS2.
21640 :
21641 : It is not required that the cost always equal 2 when FROM is the same as TO;
21642 : on some machines it is expensive to move between registers if they are not
21643 : general registers. */
21644 :
21645 : static int
21646 5602334688 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21647 : reg_class_t class2_i)
21648 : {
21649 5602334688 : enum reg_class class1 = (enum reg_class) class1_i;
21650 5602334688 : enum reg_class class2 = (enum reg_class) class2_i;
21651 :
21652 : /* In case we require secondary memory, compute cost of the store followed
21653 : by load. In order to avoid bad register allocation choices, we need
21654 : for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21655 :
21656 5602334688 : if (inline_secondary_memory_needed (mode, class1, class2, false))
21657 : {
21658 2564892493 : int cost = 1;
21659 :
21660 2564892493 : cost += inline_memory_move_cost (mode, class1, 2);
21661 2564892493 : cost += inline_memory_move_cost (mode, class2, 2);
21662 :
21663 : /* In case of copying from general_purpose_register we may emit multiple
21664 : stores followed by single load causing memory size mismatch stall.
21665 : Count this as arbitrarily high cost of 20. */
21666 5129784986 : if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21667 767892416 : && TARGET_MEMORY_MISMATCH_STALL
21668 4100677325 : && targetm.class_max_nregs (class1, mode)
21669 767892416 : > targetm.class_max_nregs (class2, mode))
21670 146115602 : cost += 20;
21671 :
21672 : /* In the case of FP/MMX moves, the registers actually overlap, and we
21673 : have to switch modes in order to treat them differently. */
21674 59200376 : if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21675 2614769652 : || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21676 18646434 : cost += 20;
21677 :
21678 2564892493 : return cost;
21679 : }
21680 :
21681 : /* Moves between MMX and non-MMX units require secondary memory. */
21682 3037442195 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21683 0 : gcc_unreachable ();
21684 :
21685 3037442195 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21686 581442737 : return (SSE_CLASS_P (class1)
21687 581442737 : ? ix86_cost->hard_register.sse_to_integer
21688 581442737 : : ix86_cost->hard_register.integer_to_sse);
21689 :
21690 : /* Moves between mask register and GPR. */
21691 2455999458 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21692 : {
21693 1055171 : return (MASK_CLASS_P (class1)
21694 1055171 : ? ix86_cost->hard_register.mask_to_integer
21695 1055171 : : ix86_cost->hard_register.integer_to_mask);
21696 : }
21697 : /* Moving between mask registers. */
21698 2454944287 : if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21699 101344 : return ix86_cost->hard_register.mask_move;
21700 :
21701 2454842943 : if (MAYBE_FLOAT_CLASS_P (class1))
21702 11783902 : return ix86_cost->hard_register.fp_move;
21703 2443059041 : if (MAYBE_SSE_CLASS_P (class1))
21704 : {
21705 229315214 : if (GET_MODE_BITSIZE (mode) <= 128)
21706 112184339 : return ix86_cost->hard_register.xmm_move;
21707 4946536 : if (GET_MODE_BITSIZE (mode) <= 256)
21708 1572175 : return ix86_cost->hard_register.ymm_move;
21709 901093 : return ix86_cost->hard_register.zmm_move;
21710 : }
21711 2328401434 : if (MAYBE_MMX_CLASS_P (class1))
21712 2168243 : return ix86_cost->hard_register.mmx_move;
21713 : return 2;
21714 : }
21715 :
21716 : /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21717 : words of a value of mode MODE but can be less for certain modes in
21718 : special long registers.
21719 :
21720 : Actually there are no two word move instructions for consecutive
21721 : registers. And only registers 0-3 may have mov byte instructions
21722 : applied to them. */
21723 :
21724 : static unsigned int
21725 8884675888 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21726 : {
21727 8884675888 : if (GENERAL_REGNO_P (regno))
21728 : {
21729 3090322048 : if (mode == XFmode)
21730 25400608 : return TARGET_64BIT ? 2 : 3;
21731 3065400096 : if (mode == XCmode)
21732 25400608 : return TARGET_64BIT ? 4 : 6;
21733 6139352320 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21734 : }
21735 5794353840 : if (COMPLEX_MODE_P (mode))
21736 : return 2;
21737 : /* Register pair for mask registers. */
21738 5046695280 : if (mode == P2QImode || mode == P2HImode)
21739 93457320 : return 2;
21740 :
21741 : return 1;
21742 : }
21743 :
21744 : /* Implement REGMODE_NATURAL_SIZE(MODE). */
21745 : unsigned int
21746 106615971 : ix86_regmode_natural_size (machine_mode mode)
21747 : {
21748 106615971 : if (mode == P2HImode || mode == P2QImode)
21749 2480 : return GET_MODE_SIZE (mode) / 2;
21750 106614731 : return UNITS_PER_WORD;
21751 : }
21752 :
21753 : /* Implement TARGET_HARD_REGNO_MODE_OK. */
21754 :
21755 : static bool
21756 54098043673 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21757 : {
21758 : /* Flags and only flags can only hold CCmode values. */
21759 54098043673 : if (CC_REGNO_P (regno))
21760 430750482 : return GET_MODE_CLASS (mode) == MODE_CC;
21761 53667293191 : if (GET_MODE_CLASS (mode) == MODE_CC
21762 : || GET_MODE_CLASS (mode) == MODE_RANDOM)
21763 : return false;
21764 48149166281 : if (STACK_REGNO_P (regno))
21765 4686072895 : return VALID_FP_MODE_P (mode);
21766 43463093386 : if (MASK_REGNO_P (regno))
21767 : {
21768 : /* Register pair only starts at even register number. */
21769 3661855084 : if ((mode == P2QImode || mode == P2HImode))
21770 51042110 : return MASK_PAIR_REGNO_P(regno);
21771 :
21772 1001431422 : return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21773 4591880768 : || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21774 : }
21775 :
21776 39801238302 : if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21777 : return false;
21778 :
21779 38825104011 : if (SSE_REGNO_P (regno))
21780 : {
21781 : /* We implement the move patterns for all vector modes into and
21782 : out of SSE registers, even when no operation instructions
21783 : are available. */
21784 :
21785 : /* For AVX-512 we allow, regardless of regno:
21786 : - XI mode
21787 : - any of 512-bit wide vector mode
21788 : - any scalar mode. */
21789 16794332125 : if (TARGET_AVX512F
21790 : && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21791 : || VALID_AVX512F_SCALAR_MODE (mode)))
21792 : return true;
21793 :
21794 : /* TODO check for QI/HI scalars. */
21795 : /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21796 16103517863 : if (TARGET_AVX512VL
21797 1751315882 : && (VALID_AVX256_REG_OR_OI_MODE (mode)
21798 1539034657 : || VALID_AVX512VL_128_REG_MODE (mode)))
21799 : return true;
21800 :
21801 : /* xmm16-xmm31 are only available for AVX-512. */
21802 15656955310 : if (EXT_REX_SSE_REGNO_P (regno))
21803 : return false;
21804 :
21805 : /* OImode and AVX modes are available only when AVX is enabled. */
21806 9054877239 : return ((TARGET_AVX
21807 1925399835 : && VALID_AVX256_REG_OR_OI_MODE (mode))
21808 : || VALID_SSE_REG_MODE (mode)
21809 : || VALID_SSE2_REG_MODE (mode)
21810 : || VALID_MMX_REG_MODE (mode)
21811 9054877239 : || VALID_MMX_REG_MODE_3DNOW (mode));
21812 : }
21813 22030771886 : if (MMX_REGNO_P (regno))
21814 : {
21815 : /* We implement the move patterns for 3DNOW modes even in MMX mode,
21816 : so if the register is available at all, then we can move data of
21817 : the given mode into or out of it. */
21818 3927156805 : return (VALID_MMX_REG_MODE (mode)
21819 : || VALID_MMX_REG_MODE_3DNOW (mode));
21820 : }
21821 :
21822 18103615081 : if (mode == QImode)
21823 : {
21824 : /* Take care for QImode values - they can be in non-QI regs,
21825 : but then they do cause partial register stalls. */
21826 205468178 : if (ANY_QI_REGNO_P (regno))
21827 : return true;
21828 14282128 : if (!TARGET_PARTIAL_REG_STALL)
21829 : return true;
21830 : /* LRA checks if the hard register is OK for the given mode.
21831 : QImode values can live in non-QI regs, so we allow all
21832 : registers here. */
21833 0 : if (lra_in_progress)
21834 : return true;
21835 0 : return !can_create_pseudo_p ();
21836 : }
21837 : /* We handle both integer and floats in the general purpose registers. */
21838 17898146903 : else if (VALID_INT_MODE_P (mode)
21839 13092050575 : || VALID_FP_MODE_P (mode))
21840 : return true;
21841 : /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21842 : on to use that value in smaller contexts, this can easily force a
21843 : pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21844 : supporting DImode, allow it. */
21845 12037341810 : else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21846 : return true;
21847 :
21848 : return false;
21849 : }
21850 :
21851 : /* Implement TARGET_INSN_CALLEE_ABI. */
21852 :
21853 : const predefined_function_abi &
21854 247865737 : ix86_insn_callee_abi (const rtx_insn *insn)
21855 : {
21856 247865737 : unsigned int abi_id = 0;
21857 247865737 : rtx pat = PATTERN (insn);
21858 247865737 : if (vzeroupper_pattern (pat, VOIDmode))
21859 410334 : abi_id = ABI_VZEROUPPER;
21860 :
21861 247865737 : return function_abis[abi_id];
21862 : }
21863 :
21864 : /* Initialize function_abis with corresponding abi_id,
21865 : currently only handle vzeroupper. */
21866 : void
21867 22624 : ix86_initialize_callee_abi (unsigned int abi_id)
21868 : {
21869 22624 : gcc_assert (abi_id == ABI_VZEROUPPER);
21870 22624 : predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21871 22624 : if (!vzeroupper_abi.initialized_p ())
21872 : {
21873 : HARD_REG_SET full_reg_clobbers;
21874 4274 : CLEAR_HARD_REG_SET (full_reg_clobbers);
21875 4274 : vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21876 : }
21877 22624 : }
21878 :
21879 : void
21880 22624 : ix86_expand_avx_vzeroupper (void)
21881 : {
21882 : /* Initialize vzeroupper_abi here. */
21883 22624 : ix86_initialize_callee_abi (ABI_VZEROUPPER);
21884 22624 : rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21885 : /* Return false for non-local goto in can_nonlocal_goto. */
21886 22624 : make_reg_eh_region_note (insn, 0, INT_MIN);
21887 : /* Flag used for call_insn indicates it's a fake call. */
21888 22624 : RTX_FLAG (insn, used) = 1;
21889 22624 : }
21890 :
21891 :
21892 : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21893 : saves SSE registers across calls is Win64 (thus no need to check the
21894 : current ABI here), and with AVX enabled Win64 only guarantees that
21895 : the low 16 bytes are saved. */
21896 :
21897 : static bool
21898 2035087238 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21899 : machine_mode mode)
21900 : {
21901 : /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21902 2035087238 : if (abi_id == ABI_VZEROUPPER)
21903 30889952 : return (GET_MODE_SIZE (mode) > 16
21904 30889952 : && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21905 4723004 : || LEGACY_SSE_REGNO_P (regno)));
21906 :
21907 2637800286 : return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21908 : }
21909 :
21910 : /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21911 : tieable integer mode. */
21912 :
21913 : static bool
21914 52269727 : ix86_tieable_integer_mode_p (machine_mode mode)
21915 : {
21916 52269727 : switch (mode)
21917 : {
21918 : case E_HImode:
21919 : case E_SImode:
21920 : return true;
21921 :
21922 5260227 : case E_QImode:
21923 5260227 : return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21924 :
21925 10229056 : case E_DImode:
21926 10229056 : return TARGET_64BIT;
21927 :
21928 : default:
21929 : return false;
21930 : }
21931 : }
21932 :
21933 : /* Implement TARGET_MODES_TIEABLE_P.
21934 :
21935 : Return true if MODE1 is accessible in a register that can hold MODE2
21936 : without copying. That is, all register classes that can hold MODE2
21937 : can also hold MODE1. */
21938 :
21939 : static bool
21940 33856412 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21941 : {
21942 33856412 : if (mode1 == mode2)
21943 : return true;
21944 :
21945 33770079 : if (ix86_tieable_integer_mode_p (mode1)
21946 33770079 : && ix86_tieable_integer_mode_p (mode2))
21947 : return true;
21948 :
21949 : /* MODE2 being XFmode implies fp stack or general regs, which means we
21950 : can tie any smaller floating point modes to it. Note that we do not
21951 : tie this with TFmode. */
21952 24677965 : if (mode2 == XFmode)
21953 4313 : return mode1 == SFmode || mode1 == DFmode;
21954 :
21955 : /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21956 : that we can tie it with SFmode. */
21957 24673652 : if (mode2 == DFmode)
21958 249967 : return mode1 == SFmode;
21959 :
21960 : /* If MODE2 is only appropriate for an SSE register, then tie with
21961 : any vector modes or scalar floating point modes acceptable to SSE
21962 : registers, excluding scalar integer modes with SUBREG:
21963 : (subreg:QI (reg:TI 99) 0))
21964 : (subreg:HI (reg:TI 99) 0))
21965 : (subreg:SI (reg:TI 99) 0))
21966 : (subreg:DI (reg:TI 99) 0))
21967 : to avoid unnecessary move from SSE register to integer register.
21968 : */
21969 24423685 : if (GET_MODE_SIZE (mode2) >= 16
21970 38246266 : && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
21971 13443828 : || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
21972 483220 : && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
21973 30344011 : && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21974 5487303 : return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
21975 :
21976 : /* If MODE2 is appropriate for an MMX register, then tie
21977 : with any other mode acceptable to MMX registers. */
21978 18936382 : if (GET_MODE_SIZE (mode2) == 8
21979 18936382 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
21980 3293870 : return (GET_MODE_SIZE (mode1) == 8
21981 3293870 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
21982 :
21983 : /* SCmode and DImode can be tied. */
21984 15642512 : if ((mode1 == E_SCmode && mode2 == E_DImode)
21985 15642512 : || (mode1 == E_DImode && mode2 == E_SCmode))
21986 108 : return TARGET_64BIT;
21987 :
21988 : /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21989 15642404 : if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21990 15642404 : || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21991 15642404 : || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21992 15642404 : || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21993 0 : return true;
21994 :
21995 : return false;
21996 : }
21997 :
21998 : /* Return the cost of moving between two registers of mode MODE. */
21999 :
22000 : static int
22001 29222860 : ix86_set_reg_reg_cost (machine_mode mode)
22002 : {
22003 29222860 : unsigned int units = UNITS_PER_WORD;
22004 :
22005 29222860 : switch (GET_MODE_CLASS (mode))
22006 : {
22007 : default:
22008 : break;
22009 :
22010 : case MODE_CC:
22011 29222860 : units = GET_MODE_SIZE (CCmode);
22012 : break;
22013 :
22014 1162795 : case MODE_FLOAT:
22015 1162795 : if ((TARGET_SSE && mode == TFmode)
22016 681542 : || (TARGET_80387 && mode == XFmode)
22017 209441 : || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
22018 141802 : || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
22019 2295732 : units = GET_MODE_SIZE (mode);
22020 : break;
22021 :
22022 1305680 : case MODE_COMPLEX_FLOAT:
22023 1305680 : if ((TARGET_SSE && mode == TCmode)
22024 875386 : || (TARGET_80387 && mode == XCmode)
22025 444972 : || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
22026 14530 : || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
22027 2604876 : units = GET_MODE_SIZE (mode);
22028 : break;
22029 :
22030 18683826 : case MODE_VECTOR_INT:
22031 18683826 : case MODE_VECTOR_FLOAT:
22032 18683826 : if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
22033 18587725 : || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
22034 18415523 : || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22035 15791197 : || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22036 14487537 : || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
22037 14442027 : && VALID_MMX_REG_MODE (mode)))
22038 8499570 : units = GET_MODE_SIZE (mode);
22039 : }
22040 :
22041 : /* Return the cost of moving between two registers of mode MODE,
22042 : assuming that the move will be in pieces of at most UNITS bytes. */
22043 29222860 : return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
22044 : }
22045 :
22046 : /* Return cost of vector operation in MODE given that scalar version has
22047 : COST. */
22048 :
22049 : static int
22050 2827787303 : ix86_vec_cost (machine_mode mode, int cost)
22051 : {
22052 2827787303 : if (!VECTOR_MODE_P (mode))
22053 : return cost;
22054 :
22055 2827563605 : if (GET_MODE_BITSIZE (mode) == 128
22056 2827563605 : && TARGET_SSE_SPLIT_REGS)
22057 2862046 : return cost * GET_MODE_BITSIZE (mode) / 64;
22058 2826132582 : else if (GET_MODE_BITSIZE (mode) > 128
22059 2826132582 : && TARGET_AVX256_SPLIT_REGS)
22060 1676180 : return cost * GET_MODE_BITSIZE (mode) / 128;
22061 2825294492 : else if (GET_MODE_BITSIZE (mode) > 256
22062 2825294492 : && TARGET_AVX512_SPLIT_REGS)
22063 194508 : return cost * GET_MODE_BITSIZE (mode) / 256;
22064 : return cost;
22065 : }
22066 :
22067 : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
22068 : vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
22069 : static int
22070 1018 : ix86_widen_mult_cost (const struct processor_costs *cost,
22071 : enum machine_mode mode, bool uns_p)
22072 : {
22073 1018 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
22074 1018 : int extra_cost = 0;
22075 1018 : int basic_cost = 0;
22076 1018 : switch (mode)
22077 : {
22078 108 : case V8HImode:
22079 108 : case V16HImode:
22080 108 : if (!uns_p || mode == V16HImode)
22081 43 : extra_cost = cost->sse_op * 2;
22082 108 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22083 108 : break;
22084 188 : case V4SImode:
22085 188 : case V8SImode:
22086 : /* pmulhw/pmullw can be used. */
22087 188 : basic_cost = cost->mulss * 2 + cost->sse_op * 2;
22088 188 : break;
22089 659 : case V2DImode:
22090 : /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
22091 : require extra 4 mul, 4 add, 4 cmp and 2 shift. */
22092 659 : if (!TARGET_SSE4_1 && !uns_p)
22093 403 : extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
22094 403 : + cost->sse_op * 2;
22095 : /* Fallthru. */
22096 706 : case V4DImode:
22097 706 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22098 706 : break;
22099 : default:
22100 : /* Not implemented. */
22101 : return 100;
22102 : }
22103 1002 : return ix86_vec_cost (mode, basic_cost + extra_cost);
22104 : }
22105 :
22106 : /* Return cost of multiplication in MODE. */
22107 :
22108 : static int
22109 1206941595 : ix86_multiplication_cost (const struct processor_costs *cost,
22110 : enum machine_mode mode)
22111 : {
22112 1206941595 : machine_mode inner_mode = mode;
22113 1206941595 : if (VECTOR_MODE_P (mode))
22114 1205974837 : inner_mode = GET_MODE_INNER (mode);
22115 :
22116 1206941595 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22117 718345 : return inner_mode == DFmode ? cost->mulsd : cost->mulss;
22118 1206223250 : else if (X87_FLOAT_MODE_P (mode))
22119 162330 : return cost->fmul;
22120 1206060920 : else if (FLOAT_MODE_P (mode))
22121 211613 : return ix86_vec_cost (mode,
22122 211613 : inner_mode == DFmode ? cost->mulsd : cost->mulss);
22123 1205849307 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22124 : {
22125 1205787470 : int nmults, nops;
22126 : /* Cost of reading the memory. */
22127 1205787470 : int extra;
22128 :
22129 1205787470 : switch (mode)
22130 : {
22131 18986644 : case V4QImode:
22132 18986644 : case V8QImode:
22133 : /* Partial V*QImode is emulated with 4-6 insns. */
22134 18986644 : nmults = 1;
22135 18986644 : nops = 3;
22136 18986644 : extra = 0;
22137 :
22138 18986644 : if (TARGET_AVX512BW && TARGET_AVX512VL)
22139 : ;
22140 18877935 : else if (TARGET_AVX2)
22141 : nops += 2;
22142 18370944 : else if (TARGET_XOP)
22143 9504 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22144 : else
22145 : {
22146 18361440 : nops += 1;
22147 18361440 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22148 : }
22149 18986644 : goto do_qimode;
22150 :
22151 9494564 : case V16QImode:
22152 : /* V*QImode is emulated with 4-11 insns. */
22153 9494564 : nmults = 1;
22154 9494564 : nops = 3;
22155 9494564 : extra = 0;
22156 :
22157 9494564 : if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
22158 : {
22159 306115 : if (!(TARGET_AVX512BW && TARGET_AVX512VL))
22160 252083 : nops += 3;
22161 : }
22162 9188449 : else if (TARGET_XOP)
22163 : {
22164 5200 : nmults += 1;
22165 5200 : nops += 2;
22166 5200 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22167 : }
22168 : else
22169 : {
22170 9183249 : nmults += 1;
22171 9183249 : nops += 4;
22172 9183249 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22173 : }
22174 9494564 : goto do_qimode;
22175 :
22176 9493273 : case V32QImode:
22177 9493273 : nmults = 1;
22178 9493273 : nops = 3;
22179 9493273 : extra = 0;
22180 :
22181 9493273 : if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
22182 : {
22183 9410724 : nmults += 1;
22184 9410724 : nops += 4;
22185 : /* 2 loads, so no division by 2. */
22186 9410724 : extra += COSTS_N_INSNS (cost->sse_load[3]);
22187 : }
22188 9493273 : goto do_qimode;
22189 :
22190 9492678 : case V64QImode:
22191 9492678 : nmults = 2;
22192 9492678 : nops = 9;
22193 : /* 2 loads of each size, so no division by 2. */
22194 9492678 : extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
22195 :
22196 47467159 : do_qimode:
22197 47467159 : return ix86_vec_cost (mode, cost->mulss * nmults
22198 47467159 : + cost->sse_op * nops) + extra;
22199 :
22200 40594555 : case V4SImode:
22201 : /* pmulld is used in this case. No emulation is needed. */
22202 40594555 : if (TARGET_SSE4_1)
22203 2237777 : goto do_native;
22204 : /* V4SImode is emulated with 7 insns. */
22205 : else
22206 38356778 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
22207 :
22208 164084866 : case V2DImode:
22209 164084866 : case V4DImode:
22210 : /* vpmullq is used in this case. No emulation is needed. */
22211 164084866 : if (TARGET_AVX512DQ && TARGET_AVX512VL)
22212 588498 : goto do_native;
22213 : /* V*DImode is emulated with 6-8 insns. */
22214 163496368 : else if (TARGET_XOP && mode == V2DImode)
22215 52592 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
22216 : /* FALLTHRU */
22217 245426814 : case V8DImode:
22218 : /* vpmullq is used in this case. No emulation is needed. */
22219 245426814 : if (TARGET_AVX512DQ && mode == V8DImode)
22220 387514 : goto do_native;
22221 : else
22222 245039300 : return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
22223 :
22224 874871641 : default:
22225 874871641 : do_native:
22226 874871641 : return ix86_vec_cost (mode, cost->mulss);
22227 : }
22228 : }
22229 : else
22230 123666 : return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
22231 : }
22232 :
22233 : /* Return cost of multiplication in MODE. */
22234 :
22235 : static int
22236 72506382 : ix86_division_cost (const struct processor_costs *cost,
22237 : enum machine_mode mode)
22238 : {
22239 72506382 : machine_mode inner_mode = mode;
22240 72506382 : if (VECTOR_MODE_P (mode))
22241 53517129 : inner_mode = GET_MODE_INNER (mode);
22242 :
22243 72506382 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22244 247929 : return inner_mode == DFmode ? cost->divsd : cost->divss;
22245 72258453 : else if (X87_FLOAT_MODE_P (mode))
22246 44810 : return cost->fdiv;
22247 72213643 : else if (FLOAT_MODE_P (mode))
22248 17202 : return ix86_vec_cost (mode,
22249 17202 : inner_mode == DFmode ? cost->divsd : cost->divss);
22250 : else
22251 80535944 : return cost->divide[MODE_INDEX (mode)];
22252 : }
22253 :
22254 : /* Return cost of shift in MODE.
22255 : If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
22256 : AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
22257 : if op1 is a result of subreg.
22258 :
22259 : SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
22260 :
22261 : static int
22262 774693582 : ix86_shift_rotate_cost (const struct processor_costs *cost,
22263 : enum rtx_code code,
22264 : enum machine_mode mode, bool constant_op1,
22265 : HOST_WIDE_INT op1_val,
22266 : bool and_in_op1,
22267 : bool shift_and_truncate,
22268 : bool *skip_op0, bool *skip_op1)
22269 : {
22270 774693582 : if (skip_op0)
22271 774637694 : *skip_op0 = *skip_op1 = false;
22272 :
22273 774693582 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22274 : {
22275 398049381 : int count;
22276 : /* Cost of reading the memory. */
22277 398049381 : int extra;
22278 :
22279 398049381 : switch (mode)
22280 : {
22281 6040788 : case V4QImode:
22282 6040788 : case V8QImode:
22283 6040788 : if (TARGET_AVX2)
22284 : /* Use vpbroadcast. */
22285 195728 : extra = cost->sse_op;
22286 : else
22287 5845060 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22288 :
22289 6040788 : if (constant_op1)
22290 : {
22291 6040760 : if (code == ASHIFTRT)
22292 : {
22293 40 : count = 4;
22294 40 : extra *= 2;
22295 : }
22296 : else
22297 : count = 2;
22298 : }
22299 28 : else if (TARGET_AVX512BW && TARGET_AVX512VL)
22300 28 : return ix86_vec_cost (mode, cost->sse_op * 4);
22301 0 : else if (TARGET_SSE4_1)
22302 : count = 5;
22303 0 : else if (code == ASHIFTRT)
22304 : count = 6;
22305 : else
22306 0 : count = 5;
22307 6040760 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22308 :
22309 3023189 : case V16QImode:
22310 3023189 : if (TARGET_XOP)
22311 : {
22312 : /* For XOP we use vpshab, which requires a broadcast of the
22313 : value to the variable shift insn. For constants this
22314 : means a V16Q const in mem; even when we can perform the
22315 : shift with one insn set the cost to prefer paddb. */
22316 3489 : if (constant_op1)
22317 : {
22318 2530 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22319 2530 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22320 : }
22321 : else
22322 : {
22323 959 : count = (code == ASHIFT) ? 3 : 4;
22324 959 : return ix86_vec_cost (mode, cost->sse_op * count);
22325 : }
22326 : }
22327 : /* FALLTHRU */
22328 6040575 : case V32QImode:
22329 6040575 : if (TARGET_GFNI && constant_op1)
22330 : {
22331 : /* Use vgf2p8affine. One extra load for the mask, but in a loop
22332 : with enough registers it will be moved out. So for now don't
22333 : account the constant mask load. This is not quite right
22334 : for non loop vectorization. */
22335 11355 : extra = 0;
22336 11355 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22337 : }
22338 6029220 : if (TARGET_AVX2)
22339 : /* Use vpbroadcast. */
22340 189234 : extra = cost->sse_op;
22341 : else
22342 5839986 : extra = COSTS_N_INSNS (mode == V16QImode
22343 : ? cost->sse_load[2]
22344 5839986 : : cost->sse_load[3]) / 2;
22345 :
22346 6029220 : if (constant_op1)
22347 : {
22348 6029033 : if (code == ASHIFTRT)
22349 : {
22350 177 : count = 4;
22351 177 : extra *= 2;
22352 : }
22353 : else
22354 : count = 2;
22355 : }
22356 187 : else if (TARGET_AVX512BW
22357 75 : && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22358 37 : || (mode == V16QImode && TARGET_AVX512VL
22359 37 : && !TARGET_PREFER_AVX128)))
22360 75 : return ix86_vec_cost (mode, cost->sse_op * 4);
22361 112 : else if (TARGET_AVX2
22362 0 : && mode == V16QImode && !TARGET_PREFER_AVX128)
22363 : count = 6;
22364 112 : else if (TARGET_SSE4_1)
22365 : count = 9;
22366 112 : else if (code == ASHIFTRT)
22367 : count = 10;
22368 : else
22369 76 : count = 9;
22370 6029145 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22371 :
22372 3020858 : case V64QImode:
22373 : /* Ignore the mask load for GF2P8AFFINEQB. */
22374 3020858 : extra = 0;
22375 3020858 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22376 :
22377 54456190 : case V2DImode:
22378 54456190 : case V4DImode:
22379 : /* V*DImode arithmetic right shift is emulated. */
22380 54456190 : if (code == ASHIFTRT && !TARGET_AVX512VL)
22381 : {
22382 1286 : if (constant_op1)
22383 : {
22384 562 : if (op1_val == 63)
22385 440 : count = TARGET_SSE4_2 ? 1 : 2;
22386 421 : else if (TARGET_XOP)
22387 : count = 2;
22388 122 : else if (TARGET_SSE4_1)
22389 : count = 3;
22390 : else
22391 131 : count = 4;
22392 : }
22393 724 : else if (TARGET_XOP)
22394 : count = 3;
22395 43 : else if (TARGET_SSE4_2)
22396 : count = 4;
22397 : else
22398 1286 : count = 5;
22399 :
22400 1286 : return ix86_vec_cost (mode, cost->sse_op * count);
22401 : }
22402 : /* FALLTHRU */
22403 382942385 : default:
22404 382942385 : return ix86_vec_cost (mode, cost->sse_op);
22405 : }
22406 : }
22407 :
22408 761993694 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22409 : {
22410 192836265 : if (constant_op1)
22411 : {
22412 192801795 : if (op1_val > 32)
22413 136980755 : return cost->shift_const + COSTS_N_INSNS (2);
22414 : else
22415 55821040 : return cost->shift_const * 2;
22416 : }
22417 : else
22418 : {
22419 34470 : if (and_in_op1)
22420 63 : return cost->shift_var * 2;
22421 : else
22422 34407 : return cost->shift_var * 6 + COSTS_N_INSNS (2);
22423 : }
22424 : }
22425 : else
22426 : {
22427 183807936 : if (constant_op1)
22428 183076487 : return cost->shift_const;
22429 731449 : else if (shift_and_truncate)
22430 : {
22431 22869 : if (skip_op0)
22432 22869 : *skip_op0 = *skip_op1 = true;
22433 : /* Return the cost after shift-and truncation. */
22434 22869 : return cost->shift_var;
22435 : }
22436 : else
22437 708580 : return cost->shift_var;
22438 : }
22439 : }
22440 :
22441 : static int
22442 149056544 : ix86_insn_cost (rtx_insn *insn, bool speed)
22443 : {
22444 149056544 : int insn_cost = 0;
22445 : /* Add extra cost to avoid post_reload late_combine revert
22446 : the optimization did in pass_rpad. */
22447 149056544 : if (reload_completed
22448 4659561 : && ix86_rpad_gate ()
22449 265597 : && recog_memoized (insn) >= 0
22450 149321887 : && get_attr_avx_partial_xmm_update (insn)
22451 : == AVX_PARTIAL_XMM_UPDATE_TRUE)
22452 : insn_cost += COSTS_N_INSNS (3);
22453 :
22454 149056544 : return insn_cost + pattern_cost (PATTERN (insn), speed);
22455 : }
22456 :
22457 : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22458 :
22459 : static int
22460 745758 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22461 : {
22462 745758 : if (size < 128)
22463 742268 : return cost->cvtss2sd;
22464 3490 : else if (size < 256)
22465 : {
22466 1420 : if (TARGET_SSE_SPLIT_REGS)
22467 0 : return cost->cvtss2sd * size / 64;
22468 1420 : return cost->cvtss2sd;
22469 : }
22470 2070 : if (size < 512)
22471 768 : return cost->vcvtps2pd256;
22472 : else
22473 1302 : return cost->vcvtps2pd512;
22474 : }
22475 :
22476 : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22477 :
22478 : static bool
22479 272281 : unspec_pcmp_p (rtx x)
22480 : {
22481 272281 : return GET_CODE (x) == UNSPEC
22482 272281 : && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22483 : }
22484 :
22485 : /* Compute a (partial) cost for rtx X. Return true if the complete
22486 : cost has been computed, and false if subexpressions should be
22487 : scanned. In either case, *TOTAL contains the cost result. */
22488 :
22489 : static bool
22490 7700754681 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22491 : int *total, bool speed)
22492 : {
22493 7700754681 : rtx mask;
22494 7700754681 : enum rtx_code code = GET_CODE (x);
22495 7700754681 : enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22496 4122754523 : const struct processor_costs *cost
22497 7700754681 : = speed ? ix86_tune_cost : &ix86_size_cost;
22498 7700754681 : int src_cost;
22499 :
22500 : /* Handling different vternlog variants. */
22501 7700754681 : if ((GET_MODE_SIZE (mode) == 64
22502 7700754681 : ? TARGET_AVX512F
22503 6517342257 : : (TARGET_AVX512VL
22504 6455757773 : || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22505 178122495 : && GET_MODE_SIZE (mode) >= 16
22506 120959225 : && outer_code_i == SET
22507 7747495853 : && ternlog_operand (x, mode))
22508 : {
22509 33846 : rtx args[3];
22510 :
22511 33846 : args[0] = NULL_RTX;
22512 33846 : args[1] = NULL_RTX;
22513 33846 : args[2] = NULL_RTX;
22514 33846 : int idx = ix86_ternlog_idx (x, args);
22515 33846 : gcc_assert (idx >= 0);
22516 :
22517 33846 : *total = cost->sse_op;
22518 135384 : for (int i = 0; i != 3; i++)
22519 101538 : if (args[i])
22520 71400 : *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22521 33846 : return true;
22522 : }
22523 :
22524 :
22525 7700720835 : switch (code)
22526 : {
22527 47590053 : case SET:
22528 47590053 : if (register_operand (SET_DEST (x), VOIDmode)
22529 47590053 : && register_operand (SET_SRC (x), VOIDmode))
22530 : {
22531 29222860 : *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22532 29222860 : return true;
22533 : }
22534 :
22535 18367193 : if (register_operand (SET_SRC (x), VOIDmode))
22536 : /* Avoid potentially incorrect high cost from rtx_costs
22537 : for non-tieable SUBREGs. */
22538 : src_cost = 0;
22539 : else
22540 : {
22541 15543083 : src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22542 :
22543 15543083 : if (CONSTANT_P (SET_SRC (x)))
22544 : /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22545 : a small value, possibly zero for cheap constants. */
22546 6970554 : src_cost += COSTS_N_INSNS (1);
22547 : }
22548 :
22549 18367193 : *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22550 18367193 : return true;
22551 :
22552 2836509020 : case CONST_INT:
22553 2836509020 : case CONST:
22554 2836509020 : case LABEL_REF:
22555 2836509020 : case SYMBOL_REF:
22556 2836509020 : if (x86_64_immediate_operand (x, VOIDmode))
22557 2231290181 : *total = 0;
22558 605218839 : else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22559 : /* Consider the zext constants slightly more expensive, as they
22560 : can't appear in most instructions. */
22561 28006819 : *total = 1;
22562 : else
22563 : /* movabsq is slightly more expensive than a simple instruction. */
22564 577212020 : *total = COSTS_N_INSNS (1) + 1;
22565 : return true;
22566 :
22567 7486371 : case CONST_DOUBLE:
22568 7486371 : if (IS_STACK_MODE (mode))
22569 1298367 : switch (standard_80387_constant_p (x))
22570 : {
22571 : case -1:
22572 : case 0:
22573 : break;
22574 283374 : case 1: /* 0.0 */
22575 283374 : *total = 1;
22576 283374 : return true;
22577 484819 : default: /* Other constants */
22578 484819 : *total = 2;
22579 484819 : return true;
22580 : }
22581 : /* FALLTHRU */
22582 :
22583 14304460 : case CONST_VECTOR:
22584 14304460 : switch (standard_sse_constant_p (x, mode))
22585 : {
22586 : case 0:
22587 : break;
22588 4214332 : case 1: /* 0: xor eliminates false dependency */
22589 4214332 : *total = 0;
22590 4214332 : return true;
22591 171790 : default: /* -1: cmp contains false dependency */
22592 171790 : *total = 1;
22593 171790 : return true;
22594 : }
22595 : /* FALLTHRU */
22596 :
22597 10902626 : case CONST_WIDE_INT:
22598 : /* Fall back to (MEM (SYMBOL_REF)), since that's where
22599 : it'll probably end up. Add a penalty for size. */
22600 21805252 : *total = (COSTS_N_INSNS (1)
22601 21575706 : + (!TARGET_64BIT && flag_pic)
22602 21805252 : + (GET_MODE_SIZE (mode) <= 4
22603 19036187 : ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22604 10902626 : return true;
22605 :
22606 22550999 : case ZERO_EXTEND:
22607 : /* The zero extensions is often completely free on x86_64, so make
22608 : it as cheap as possible. */
22609 22550999 : if (TARGET_64BIT && mode == DImode
22610 4944739 : && GET_MODE (XEXP (x, 0)) == SImode)
22611 3062793 : *total = 1;
22612 19488206 : else if (TARGET_ZERO_EXTEND_WITH_AND)
22613 0 : *total = cost->add;
22614 : else
22615 19488206 : *total = cost->movzx;
22616 : return false;
22617 :
22618 2675645 : case SIGN_EXTEND:
22619 2675645 : *total = cost->movsx;
22620 2675645 : return false;
22621 :
22622 637419247 : case ASHIFT:
22623 637419247 : if (SCALAR_INT_MODE_P (mode)
22624 246334806 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22625 680501306 : && CONST_INT_P (XEXP (x, 1)))
22626 : {
22627 42903124 : HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22628 42903124 : if (value == 1)
22629 : {
22630 2477798 : *total = cost->add;
22631 2477798 : return false;
22632 : }
22633 40425326 : if ((value == 2 || value == 3)
22634 4542986 : && cost->lea <= cost->shift_const)
22635 : {
22636 2145991 : *total = cost->lea;
22637 2145991 : return false;
22638 : }
22639 : }
22640 : /* FALLTHRU */
22641 :
22642 774637694 : case ROTATE:
22643 774637694 : case ASHIFTRT:
22644 774637694 : case LSHIFTRT:
22645 774637694 : case ROTATERT:
22646 774637694 : bool skip_op0, skip_op1;
22647 774637694 : *total = ix86_shift_rotate_cost (cost, code, mode,
22648 774637694 : CONSTANT_P (XEXP (x, 1)),
22649 : CONST_INT_P (XEXP (x, 1))
22650 : ? INTVAL (XEXP (x, 1)) : -1,
22651 : GET_CODE (XEXP (x, 1)) == AND,
22652 774637694 : SUBREG_P (XEXP (x, 1))
22653 774637694 : && GET_CODE (XEXP (XEXP (x, 1),
22654 : 0)) == AND,
22655 : &skip_op0, &skip_op1);
22656 774637694 : if (skip_op0 || skip_op1)
22657 : {
22658 22869 : if (!skip_op0)
22659 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22660 22869 : if (!skip_op1)
22661 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22662 22869 : return true;
22663 : }
22664 : return false;
22665 :
22666 230516 : case FMA:
22667 230516 : {
22668 230516 : rtx sub;
22669 :
22670 230516 : gcc_assert (FLOAT_MODE_P (mode));
22671 230516 : gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22672 :
22673 461032 : *total = ix86_vec_cost (mode,
22674 230516 : GET_MODE_INNER (mode) == SFmode
22675 : ? cost->fmass : cost->fmasd);
22676 230516 : *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22677 :
22678 : /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22679 230516 : sub = XEXP (x, 0);
22680 230516 : if (GET_CODE (sub) == NEG)
22681 51068 : sub = XEXP (sub, 0);
22682 230516 : *total += rtx_cost (sub, mode, FMA, 0, speed);
22683 :
22684 230516 : sub = XEXP (x, 2);
22685 230516 : if (GET_CODE (sub) == NEG)
22686 40544 : sub = XEXP (sub, 0);
22687 230516 : *total += rtx_cost (sub, mode, FMA, 2, speed);
22688 230516 : return true;
22689 : }
22690 :
22691 1757982486 : case MULT:
22692 1757982486 : if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22693 : {
22694 551219679 : rtx op0 = XEXP (x, 0);
22695 551219679 : rtx op1 = XEXP (x, 1);
22696 551219679 : int nbits;
22697 551219679 : if (CONST_INT_P (XEXP (x, 1)))
22698 : {
22699 533102640 : unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22700 1081625588 : for (nbits = 0; value != 0; value &= value - 1)
22701 548522948 : nbits++;
22702 : }
22703 : else
22704 : /* This is arbitrary. */
22705 : nbits = 7;
22706 :
22707 : /* Compute costs correctly for widening multiplication. */
22708 551219679 : if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22709 556727295 : && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22710 5507616 : == GET_MODE_SIZE (mode))
22711 : {
22712 5503523 : int is_mulwiden = 0;
22713 5503523 : machine_mode inner_mode = GET_MODE (op0);
22714 :
22715 5503523 : if (GET_CODE (op0) == GET_CODE (op1))
22716 5419662 : is_mulwiden = 1, op1 = XEXP (op1, 0);
22717 83861 : else if (CONST_INT_P (op1))
22718 : {
22719 73929 : if (GET_CODE (op0) == SIGN_EXTEND)
22720 22066 : is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22721 22066 : == INTVAL (op1);
22722 : else
22723 51863 : is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22724 : }
22725 :
22726 5493591 : if (is_mulwiden)
22727 5493591 : op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22728 : }
22729 :
22730 551219679 : int mult_init;
22731 : // Double word multiplication requires 3 mults and 2 adds.
22732 1118114132 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22733 : {
22734 331841506 : mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22735 331841506 : + 2 * cost->add;
22736 331841506 : nbits *= 3;
22737 : }
22738 378351386 : else mult_init = cost->mult_init[MODE_INDEX (mode)];
22739 :
22740 1102439358 : *total = (mult_init
22741 551219679 : + nbits * cost->mult_bit
22742 551219679 : + rtx_cost (op0, mode, outer_code, opno, speed)
22743 551219679 : + rtx_cost (op1, mode, outer_code, opno, speed));
22744 :
22745 551219679 : return true;
22746 : }
22747 1206762807 : *total = ix86_multiplication_cost (cost, mode);
22748 1206762807 : return false;
22749 :
22750 72493989 : case DIV:
22751 72493989 : case UDIV:
22752 72493989 : case MOD:
22753 72493989 : case UMOD:
22754 72493989 : *total = ix86_division_cost (cost, mode);
22755 72493989 : return false;
22756 :
22757 689564503 : case PLUS:
22758 689564503 : if (GET_MODE_CLASS (mode) == MODE_INT
22759 946179397 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22760 : {
22761 143034063 : if (GET_CODE (XEXP (x, 0)) == PLUS
22762 3942838 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22763 839606 : && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22764 839581 : && CONSTANT_P (XEXP (x, 1)))
22765 : {
22766 839524 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22767 839524 : if (val == 2 || val == 4 || val == 8)
22768 : {
22769 839420 : *total = cost->lea;
22770 839420 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22771 : outer_code, opno, speed);
22772 839420 : *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22773 : outer_code, opno, speed);
22774 839420 : *total += rtx_cost (XEXP (x, 1), mode,
22775 : outer_code, opno, speed);
22776 839420 : return true;
22777 : }
22778 : }
22779 142194539 : else if (GET_CODE (XEXP (x, 0)) == MULT
22780 52359504 : && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22781 : {
22782 52300201 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22783 52300201 : if (val == 2 || val == 4 || val == 8)
22784 : {
22785 8007603 : *total = cost->lea;
22786 8007603 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22787 : outer_code, opno, speed);
22788 8007603 : *total += rtx_cost (XEXP (x, 1), mode,
22789 : outer_code, opno, speed);
22790 8007603 : return true;
22791 : }
22792 : }
22793 89894338 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
22794 : {
22795 3103314 : rtx op = XEXP (XEXP (x, 0), 0);
22796 :
22797 : /* Add with carry, ignore the cost of adding a carry flag. */
22798 3103314 : if (ix86_carry_flag_operator (op, mode)
22799 3103314 : || ix86_carry_flag_unset_operator (op, mode))
22800 69349 : *total = cost->add;
22801 : else
22802 : {
22803 3033965 : *total = cost->lea;
22804 3033965 : *total += rtx_cost (op, mode,
22805 : outer_code, opno, speed);
22806 : }
22807 :
22808 3103314 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22809 : outer_code, opno, speed);
22810 3103314 : *total += rtx_cost (XEXP (x, 1), mode,
22811 : outer_code, opno, speed);
22812 3103314 : return true;
22813 : }
22814 : }
22815 : /* FALLTHRU */
22816 :
22817 1835577872 : case MINUS:
22818 : /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22819 1835577872 : if (GET_MODE_CLASS (mode) == MODE_INT
22820 519400917 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22821 234873958 : && GET_CODE (XEXP (x, 0)) == MINUS
22822 1835616632 : && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22823 14630 : || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22824 : {
22825 24130 : *total = cost->add;
22826 24130 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22827 : outer_code, opno, speed);
22828 24130 : *total += rtx_cost (XEXP (x, 1), mode,
22829 : outer_code, opno, speed);
22830 24130 : return true;
22831 : }
22832 :
22833 1835553742 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22834 2385552 : *total = cost->addss;
22835 1833168190 : else if (X87_FLOAT_MODE_P (mode))
22836 217782 : *total = cost->fadd;
22837 1832950408 : else if (FLOAT_MODE_P (mode))
22838 441061 : *total = ix86_vec_cost (mode, cost->addss);
22839 1832509347 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22840 1206412961 : *total = ix86_vec_cost (mode, cost->sse_op);
22841 1291599275 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22842 330841598 : *total = cost->add * 2;
22843 : else
22844 295254788 : *total = cost->add;
22845 : return false;
22846 :
22847 3932216 : case IOR:
22848 3932216 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22849 3683562 : || SSE_FLOAT_MODE_P (mode))
22850 : {
22851 : /* (ior (not ...) ...) can be a single insn in AVX512. */
22852 482 : if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22853 258248 : && (GET_MODE_SIZE (mode) == 64
22854 0 : || (TARGET_AVX512VL
22855 0 : && (GET_MODE_SIZE (mode) == 32
22856 0 : || GET_MODE_SIZE (mode) == 16))))
22857 : {
22858 0 : rtx right = GET_CODE (XEXP (x, 1)) != NOT
22859 0 : ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22860 :
22861 0 : *total = ix86_vec_cost (mode, cost->sse_op)
22862 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22863 : outer_code, opno, speed)
22864 0 : + rtx_cost (right, mode, outer_code, opno, speed);
22865 0 : return true;
22866 : }
22867 258248 : *total = ix86_vec_cost (mode, cost->sse_op);
22868 258248 : }
22869 3673968 : else if (TARGET_64BIT
22870 3381591 : && mode == TImode
22871 1674681 : && GET_CODE (XEXP (x, 0)) == ASHIFT
22872 254017 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22873 252025 : && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22874 252025 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22875 252025 : && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22876 252025 : && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22877 226700 : && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22878 : {
22879 : /* *concatditi3 is cheap. */
22880 226700 : rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22881 226700 : rtx op1 = XEXP (XEXP (x, 1), 0);
22882 1431 : *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22883 226700 : ? COSTS_N_INSNS (1) /* movq. */
22884 225269 : : set_src_cost (op0, DImode, speed);
22885 2336 : *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22886 226700 : ? COSTS_N_INSNS (1) /* movq. */
22887 224377 : : set_src_cost (op1, DImode, speed);
22888 226700 : return true;
22889 : }
22890 3447268 : else if (TARGET_64BIT
22891 3154891 : && mode == TImode
22892 1447981 : && GET_CODE (XEXP (x, 0)) == AND
22893 1385206 : && REG_P (XEXP (XEXP (x, 0), 0))
22894 1379979 : && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22895 1377302 : && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22896 1377302 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22897 902854 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22898 902854 : && GET_CODE (XEXP (x, 1)) == ASHIFT
22899 900734 : && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22900 900734 : && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22901 900734 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22902 4348002 : && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22903 : {
22904 : /* *insvti_highpart is cheap. */
22905 900734 : rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22906 900734 : *total = COSTS_N_INSNS (1) + 1;
22907 1404 : *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22908 900734 : ? COSTS_N_INSNS (1) /* movq. */
22909 899818 : : set_src_cost (op, DImode, speed);
22910 900734 : return true;
22911 : }
22912 5385445 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22913 740963 : *total = cost->add * 2;
22914 : else
22915 1805571 : *total = cost->add;
22916 : return false;
22917 :
22918 560571 : case XOR:
22919 560571 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22920 427592 : || SSE_FLOAT_MODE_P (mode))
22921 132979 : *total = ix86_vec_cost (mode, cost->sse_op);
22922 913937 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22923 16310 : *total = cost->add * 2;
22924 : else
22925 411282 : *total = cost->add;
22926 : return false;
22927 :
22928 7074871 : case AND:
22929 7074871 : if (address_no_seg_operand (x, mode))
22930 : {
22931 15707 : *total = cost->lea;
22932 15707 : return true;
22933 : }
22934 7059164 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22935 6649431 : || SSE_FLOAT_MODE_P (mode))
22936 : {
22937 : /* pandn is a single instruction. */
22938 443280 : if (GET_CODE (XEXP (x, 0)) == NOT)
22939 : {
22940 57869 : rtx right = XEXP (x, 1);
22941 :
22942 : /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22943 418 : if (GET_CODE (right) == NOT && TARGET_AVX512F
22944 57869 : && (GET_MODE_SIZE (mode) == 64
22945 0 : || (TARGET_AVX512VL
22946 0 : && (GET_MODE_SIZE (mode) == 32
22947 0 : || GET_MODE_SIZE (mode) == 16))))
22948 0 : right = XEXP (right, 0);
22949 :
22950 57869 : *total = ix86_vec_cost (mode, cost->sse_op)
22951 57869 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22952 : outer_code, opno, speed)
22953 57869 : + rtx_cost (right, mode, outer_code, opno, speed);
22954 57869 : return true;
22955 : }
22956 385411 : else if (GET_CODE (XEXP (x, 1)) == NOT)
22957 : {
22958 852 : *total = ix86_vec_cost (mode, cost->sse_op)
22959 852 : + rtx_cost (XEXP (x, 0), mode,
22960 : outer_code, opno, speed)
22961 852 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22962 : outer_code, opno, speed);
22963 852 : return true;
22964 : }
22965 384559 : *total = ix86_vec_cost (mode, cost->sse_op);
22966 384559 : }
22967 13931296 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22968 : {
22969 1124489 : if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22970 : {
22971 1670 : *total = cost->add * 2
22972 835 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22973 : outer_code, opno, speed)
22974 835 : + rtx_cost (XEXP (x, 1), mode,
22975 : outer_code, opno, speed);
22976 835 : return true;
22977 : }
22978 1123654 : else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
22979 : {
22980 0 : *total = cost->add * 2
22981 0 : + rtx_cost (XEXP (x, 0), mode,
22982 : outer_code, opno, speed)
22983 0 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22984 : outer_code, opno, speed);
22985 0 : return true;
22986 : }
22987 1123654 : *total = cost->add * 2;
22988 : }
22989 5491395 : else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22990 : {
22991 7578 : *total = cost->add
22992 3789 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22993 : outer_code, opno, speed)
22994 3789 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22995 3789 : return true;
22996 : }
22997 5487606 : else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
22998 : {
22999 112 : *total = cost->add
23000 56 : + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23001 56 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
23002 : outer_code, opno, speed);
23003 56 : return true;
23004 : }
23005 : else
23006 5487550 : *total = cost->add;
23007 : return false;
23008 :
23009 507924 : case NOT:
23010 507924 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23011 : {
23012 : /* (not (xor ...)) can be a single insn in AVX512. */
23013 0 : if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
23014 10934 : && (GET_MODE_SIZE (mode) == 64
23015 0 : || (TARGET_AVX512VL
23016 0 : && (GET_MODE_SIZE (mode) == 32
23017 0 : || GET_MODE_SIZE (mode) == 16))))
23018 : {
23019 0 : *total = ix86_vec_cost (mode, cost->sse_op)
23020 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23021 : outer_code, opno, speed)
23022 0 : + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
23023 : outer_code, opno, speed);
23024 0 : return true;
23025 : }
23026 :
23027 : // vnot is pxor -1.
23028 10934 : *total = ix86_vec_cost (mode, cost->sse_op) + 1;
23029 : }
23030 1138955 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23031 45873 : *total = cost->add * 2;
23032 : else
23033 451117 : *total = cost->add;
23034 : return false;
23035 :
23036 18164110 : case NEG:
23037 18164110 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23038 51389 : *total = cost->sse_op;
23039 18112721 : else if (X87_FLOAT_MODE_P (mode))
23040 15127 : *total = cost->fchs;
23041 18097594 : else if (FLOAT_MODE_P (mode))
23042 27005 : *total = ix86_vec_cost (mode, cost->sse_op);
23043 18070589 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23044 13388430 : *total = ix86_vec_cost (mode, cost->sse_op);
23045 9513456 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23046 1760651 : *total = cost->add * 3;
23047 : else
23048 2921508 : *total = cost->add;
23049 : return false;
23050 :
23051 54410718 : case COMPARE:
23052 54410718 : rtx op0, op1;
23053 54410718 : op0 = XEXP (x, 0);
23054 54410718 : op1 = XEXP (x, 1);
23055 54410718 : if (GET_CODE (op0) == ZERO_EXTRACT
23056 173566 : && XEXP (op0, 1) == const1_rtx
23057 156150 : && CONST_INT_P (XEXP (op0, 2))
23058 156114 : && op1 == const0_rtx)
23059 : {
23060 : /* This kind of construct is implemented using test[bwl].
23061 : Treat it as if we had an AND. */
23062 156114 : mode = GET_MODE (XEXP (op0, 0));
23063 312228 : *total = (cost->add
23064 156114 : + rtx_cost (XEXP (op0, 0), mode, outer_code,
23065 : opno, speed)
23066 156114 : + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
23067 156114 : return true;
23068 : }
23069 :
23070 54254604 : if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
23071 : {
23072 : /* This is an overflow detection, count it as a normal compare. */
23073 139165 : *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
23074 139165 : return true;
23075 : }
23076 :
23077 54115439 : rtx geu;
23078 : /* Match x
23079 : (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
23080 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
23081 54115439 : if (mode == CCCmode
23082 291507 : && GET_CODE (op0) == NEG
23083 8048 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
23084 8045 : && REG_P (XEXP (geu, 0))
23085 8045 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
23086 759 : || GET_MODE (XEXP (geu, 0)) == CCmode)
23087 8045 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
23088 8045 : && XEXP (geu, 1) == const0_rtx
23089 8045 : && GET_CODE (op1) == LTU
23090 8045 : && REG_P (XEXP (op1, 0))
23091 8045 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
23092 8045 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23093 54123484 : && XEXP (op1, 1) == const0_rtx)
23094 : {
23095 : /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
23096 8045 : *total = 0;
23097 8045 : return true;
23098 : }
23099 : /* Match x
23100 : (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
23101 : (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
23102 54107394 : if (mode == CCCmode
23103 283462 : && GET_CODE (op0) == NEG
23104 3 : && GET_CODE (XEXP (op0, 0)) == LTU
23105 3 : && REG_P (XEXP (XEXP (op0, 0), 0))
23106 3 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
23107 3 : && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
23108 3 : && XEXP (XEXP (op0, 0), 1) == const0_rtx
23109 3 : && GET_CODE (op1) == GEU
23110 3 : && REG_P (XEXP (op1, 0))
23111 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode
23112 3 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23113 54107397 : && XEXP (op1, 1) == const0_rtx)
23114 : {
23115 : /* This is *x86_cmc. */
23116 3 : if (!speed)
23117 0 : *total = COSTS_N_BYTES (1);
23118 3 : else if (TARGET_SLOW_STC)
23119 0 : *total = COSTS_N_INSNS (2);
23120 : else
23121 3 : *total = COSTS_N_INSNS (1);
23122 3 : return true;
23123 : }
23124 :
23125 54107391 : if (SCALAR_INT_MODE_P (GET_MODE (op0))
23126 112677719 : && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
23127 : {
23128 750817 : if (op1 == const0_rtx)
23129 217322 : *total = cost->add
23130 108661 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
23131 : else
23132 1284312 : *total = 3*cost->add
23133 642156 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
23134 642156 : + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
23135 750817 : return true;
23136 : }
23137 :
23138 : /* The embedded comparison operand is completely free. */
23139 53356574 : if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
23140 379661 : *total = 0;
23141 :
23142 : return false;
23143 :
23144 1370880 : case FLOAT_EXTEND:
23145 : /* x87 represents all values extended to 80bit. */
23146 1370880 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23147 668736 : *total = 0;
23148 : else
23149 1404288 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23150 : return false;
23151 :
23152 83563 : case FLOAT_TRUNCATE:
23153 83563 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23154 58201 : *total = cost->fadd;
23155 : else
23156 50724 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23157 : return false;
23158 682875 : case FLOAT:
23159 682875 : case UNSIGNED_FLOAT:
23160 682875 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23161 : /* TODO: We do not have cost tables for x87. */
23162 93122 : *total = cost->fadd;
23163 589753 : else if (VECTOR_MODE_P (mode))
23164 0 : *total = ix86_vec_cost (mode, cost->cvtpi2ps);
23165 : else
23166 589753 : *total = cost->cvtsi2ss;
23167 : return false;
23168 :
23169 283553 : case FIX:
23170 283553 : case UNSIGNED_FIX:
23171 283553 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23172 : /* TODO: We do not have cost tables for x87. */
23173 283553 : *total = cost->fadd;
23174 0 : else if (VECTOR_MODE_P (mode))
23175 0 : *total = ix86_vec_cost (mode, cost->cvtps2pi);
23176 : else
23177 0 : *total = cost->cvtss2si;
23178 : return false;
23179 :
23180 338752 : case ABS:
23181 : /* SSE requires memory load for the constant operand. It may make
23182 : sense to account for this. Of course the constant operand may or
23183 : may not be reused. */
23184 338752 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23185 241596 : *total = cost->sse_op;
23186 97156 : else if (X87_FLOAT_MODE_P (mode))
23187 31403 : *total = cost->fabs;
23188 65753 : else if (FLOAT_MODE_P (mode))
23189 25620 : *total = ix86_vec_cost (mode, cost->sse_op);
23190 40133 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23191 6610 : *total = cost->sse_op;
23192 : return false;
23193 :
23194 28633 : case SQRT:
23195 28633 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23196 18295 : *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
23197 10338 : else if (X87_FLOAT_MODE_P (mode))
23198 4315 : *total = cost->fsqrt;
23199 6023 : else if (FLOAT_MODE_P (mode))
23200 6023 : *total = ix86_vec_cost (mode,
23201 : mode == SFmode ? cost->sqrtss : cost->sqrtsd);
23202 : return false;
23203 :
23204 3956463 : case UNSPEC:
23205 3956463 : switch (XINT (x, 1))
23206 : {
23207 125918 : case UNSPEC_TP:
23208 125918 : *total = 0;
23209 125918 : break;
23210 :
23211 5210 : case UNSPEC_VTERNLOG:
23212 5210 : *total = cost->sse_op;
23213 5210 : if (!REG_P (XVECEXP (x, 0, 0)))
23214 720 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23215 5210 : if (!REG_P (XVECEXP (x, 0, 1)))
23216 694 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23217 5210 : if (!REG_P (XVECEXP (x, 0, 2)))
23218 733 : *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
23219 : return true;
23220 :
23221 98550 : case UNSPEC_PTEST:
23222 98550 : {
23223 98550 : *total = cost->sse_op;
23224 98550 : rtx test_op0 = XVECEXP (x, 0, 0);
23225 98550 : if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
23226 : return false;
23227 97873 : if (GET_CODE (test_op0) == AND)
23228 : {
23229 23 : rtx and_op0 = XEXP (test_op0, 0);
23230 23 : if (GET_CODE (and_op0) == NOT)
23231 0 : and_op0 = XEXP (and_op0, 0);
23232 23 : *total += rtx_cost (and_op0, GET_MODE (and_op0),
23233 : AND, 0, speed)
23234 23 : + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
23235 : AND, 1, speed);
23236 : }
23237 : else
23238 97850 : *total = rtx_cost (test_op0, GET_MODE (test_op0),
23239 : UNSPEC, 0, speed);
23240 : }
23241 : return true;
23242 :
23243 20414 : case UNSPEC_BLENDV:
23244 20414 : *total = cost->sse_op;
23245 20414 : if (!REG_P (XVECEXP (x, 0, 0)))
23246 8283 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23247 20414 : if (!REG_P (XVECEXP (x, 0, 1)))
23248 9779 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23249 20414 : if (!REG_P (XVECEXP (x, 0, 2)))
23250 : {
23251 12568 : rtx cond = XVECEXP (x, 0, 2);
23252 12568 : if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
23253 773 : && CONST_VECTOR_P (XEXP (cond, 1)))
23254 : {
23255 : /* avx2_blendvpd256_gt and friends. */
23256 153 : if (!REG_P (XEXP (cond, 0)))
23257 70 : *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
23258 : }
23259 : else
23260 12415 : *total += rtx_cost (cond, mode, code, 2, speed);
23261 : }
23262 : return true;
23263 :
23264 27708 : case UNSPEC_MOVMSK:
23265 27708 : *total = cost->sse_op;
23266 27708 : return true;
23267 :
23268 : default:
23269 : break;
23270 : }
23271 : return false;
23272 :
23273 2016101 : case VEC_CONCAT:
23274 : /* ??? Assume all of these vector manipulation patterns are
23275 : recognizable. In which case they all pretty much have the
23276 : same cost.
23277 : ??? We should still recruse when computing cost. */
23278 2016101 : *total = cost->sse_op;
23279 2016101 : return true;
23280 :
23281 2463874 : case VEC_SELECT:
23282 : /* Special case extracting lower part from the vector.
23283 : This by itself needs to code and most of SSE/AVX instructions have
23284 : packed and single forms where the single form may be represented
23285 : by such VEC_SELECT.
23286 :
23287 : Use cost 1 (despite the fact that functionally equivalent SUBREG has
23288 : cost 0). Making VEC_SELECT completely free, for example instructs CSE
23289 : to forward propagate VEC_SELECT into
23290 :
23291 : (set (reg eax) (reg src))
23292 :
23293 : which then prevents fwprop and combining. See i.e.
23294 : gcc.target/i386/pr91103-1.c.
23295 :
23296 : ??? rtvec_series_p test should be, for valid patterns, equivalent to
23297 : vec_series_lowpart_p but is not, since the latter calls
23298 : can_cange_mode_class on ALL_REGS and this return false since x87 does
23299 : not support subregs at all. */
23300 2463874 : if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
23301 775660 : *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
23302 775660 : outer_code, opno, speed) + 1;
23303 : else
23304 : /* ??? We should still recruse when computing cost. */
23305 1688214 : *total = cost->sse_op;
23306 : return true;
23307 :
23308 1224597 : case VEC_DUPLICATE:
23309 2449194 : *total = rtx_cost (XEXP (x, 0),
23310 1224597 : GET_MODE (XEXP (x, 0)),
23311 : VEC_DUPLICATE, 0, speed);
23312 : /* It's broadcast instruction, not embedded broadcasting. */
23313 1224597 : if (outer_code == SET)
23314 1176401 : *total += cost->sse_op;
23315 :
23316 : return true;
23317 :
23318 723701 : case VEC_MERGE:
23319 723701 : mask = XEXP (x, 2);
23320 : /* Scalar versions of SSE instructions may be represented as:
23321 :
23322 : (vec_merge (vec_duplicate (operation ....))
23323 : (register or memory)
23324 : (const_int 1))
23325 :
23326 : In this case vec_merge and vec_duplicate is for free.
23327 : Just recurse into operation and second operand. */
23328 723701 : if (mask == const1_rtx
23329 211024 : && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
23330 : {
23331 74976 : *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23332 : outer_code, opno, speed)
23333 74976 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23334 74976 : return true;
23335 : }
23336 : /* This is masked instruction, assume the same cost,
23337 : as nonmasked variant. */
23338 648725 : else if (TARGET_AVX512F
23339 648725 : && (register_operand (mask, GET_MODE (mask))
23340 : /* Redunduant clean up of high bits for kmask with VL=2/4
23341 : .i.e (vec_merge op0, op1, (and op3 15)). */
23342 120908 : || (GET_CODE (mask) == AND
23343 369 : && register_operand (XEXP (mask, 0), GET_MODE (mask))
23344 369 : && CONST_INT_P (XEXP (mask, 1))
23345 369 : && ((INTVAL (XEXP (mask, 1)) == 3
23346 131 : && GET_MODE_NUNITS (mode) == 2)
23347 238 : || (INTVAL (XEXP (mask, 1)) == 15
23348 238 : && GET_MODE_NUNITS (mode) == 4)))))
23349 : {
23350 374194 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23351 374194 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23352 374194 : return true;
23353 : }
23354 : /* Combination of the two above:
23355 :
23356 : (vec_merge (vec_merge (vec_duplicate (operation ...))
23357 : (register or memory)
23358 : (reg:QI mask))
23359 : (register or memory)
23360 : (const_int 1))
23361 :
23362 : i.e. avx512fp16_vcvtss2sh_mask. */
23363 274531 : else if (TARGET_AVX512F
23364 120539 : && mask == const1_rtx
23365 46497 : && GET_CODE (XEXP (x, 0)) == VEC_MERGE
23366 27158 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
23367 276793 : && register_operand (XEXP (XEXP (x, 0), 2),
23368 2262 : GET_MODE (XEXP (XEXP (x, 0), 2))))
23369 : {
23370 2250 : *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23371 : mode, outer_code, opno, speed)
23372 2250 : + rtx_cost (XEXP (XEXP (x, 0), 1),
23373 : mode, outer_code, opno, speed)
23374 2250 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23375 2250 : return true;
23376 : }
23377 : /* vcmp. */
23378 272281 : else if (unspec_pcmp_p (mask)
23379 272281 : || (GET_CODE (mask) == NOT
23380 0 : && unspec_pcmp_p (XEXP (mask, 0))))
23381 : {
23382 1950 : rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
23383 1950 : rtx unsop0 = XVECEXP (uns, 0, 0);
23384 : /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
23385 : cost the same as register.
23386 : This is used by avx_cmp<mode>3_ltint_not. */
23387 1950 : if (SUBREG_P (unsop0))
23388 417 : unsop0 = XEXP (unsop0, 0);
23389 1950 : if (GET_CODE (unsop0) == NOT)
23390 18 : unsop0 = XEXP (unsop0, 0);
23391 1950 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23392 1950 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
23393 1950 : + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
23394 1950 : + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
23395 1950 : + cost->sse_op;
23396 1950 : return true;
23397 : }
23398 : else
23399 270331 : *total = cost->sse_op;
23400 270331 : return false;
23401 :
23402 107093139 : case MEM:
23403 : /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23404 : or variants in ix86_vector_duplicate_simode_const. */
23405 :
23406 107093139 : if (GET_MODE_SIZE (mode) >= 16
23407 18292188 : && VECTOR_MODE_P (mode)
23408 12260505 : && SYMBOL_REF_P (XEXP (x, 0))
23409 2215690 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23410 109080385 : && ix86_broadcast_from_constant (mode, x))
23411 : {
23412 494949 : *total = COSTS_N_INSNS (2) + speed;
23413 494949 : return true;
23414 : }
23415 :
23416 : /* An insn that accesses memory is slightly more expensive
23417 : than one that does not. */
23418 106598190 : if (speed)
23419 : {
23420 95146968 : *total += 1;
23421 95146968 : rtx addr = XEXP (x, 0);
23422 : /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23423 : so for MEM (reg) and MEM (reg + 4), the former costs 5,
23424 : the latter costs 9, it is not accurate for x86. Ideally
23425 : address_cost should be used, but it reduce cost too much.
23426 : So current solution is make constant disp as cheap as possible. */
23427 95146968 : if (GET_CODE (addr) == PLUS
23428 77640348 : && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23429 : /* Only handle (reg + disp) since other forms of addr are mostly LEA,
23430 : there's no additional cost for the plus of disp. */
23431 167167265 : && register_operand (XEXP (addr, 0), Pmode))
23432 : {
23433 55906970 : *total += 1;
23434 68759013 : *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23435 55906970 : return true;
23436 : }
23437 : }
23438 :
23439 : return false;
23440 :
23441 53024 : case ZERO_EXTRACT:
23442 53024 : if (XEXP (x, 1) == const1_rtx
23443 11254 : && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23444 0 : && GET_MODE (XEXP (x, 2)) == SImode
23445 0 : && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23446 : {
23447 : /* Ignore cost of zero extension and masking of last argument. */
23448 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23449 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23450 0 : *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23451 0 : return true;
23452 : }
23453 : return false;
23454 :
23455 29334302 : case IF_THEN_ELSE:
23456 29334302 : if (TARGET_XOP
23457 25487 : && VECTOR_MODE_P (mode)
23458 29339917 : && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23459 : {
23460 : /* vpcmov. */
23461 5047 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23462 5047 : if (!REG_P (XEXP (x, 0)))
23463 4887 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23464 5047 : if (!REG_P (XEXP (x, 1)))
23465 4854 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23466 5047 : if (!REG_P (XEXP (x, 2)))
23467 4856 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23468 5047 : return true;
23469 : }
23470 0 : else if (TARGET_CMOVE
23471 29329255 : && SCALAR_INT_MODE_P (mode)
23472 31702577 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23473 : {
23474 : /* cmov. */
23475 2174355 : *total = COSTS_N_INSNS (1);
23476 2174355 : if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23477 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23478 2174355 : if (!REG_P (XEXP (x, 1)))
23479 115950 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23480 2174355 : if (!REG_P (XEXP (x, 2)))
23481 708213 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23482 2174355 : return true;
23483 : }
23484 : return false;
23485 :
23486 18395137 : case EQ:
23487 18395137 : case GT:
23488 18395137 : case GTU:
23489 18395137 : case LT:
23490 18395137 : case LTU:
23491 18395137 : if (TARGET_SSE2
23492 18391933 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23493 18738296 : && GET_MODE_SIZE (mode) >= 8)
23494 : {
23495 : /* vpcmpeq */
23496 338954 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
23497 338954 : if (!REG_P (XEXP (x, 0)))
23498 64007 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23499 338954 : if (!REG_P (XEXP (x, 1)))
23500 130221 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23501 338954 : return true;
23502 : }
23503 18056183 : if (TARGET_XOP
23504 12298 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23505 18056291 : && GET_MODE_SIZE (mode) <= 16)
23506 : {
23507 : /* vpcomeq */
23508 108 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23509 108 : if (!REG_P (XEXP (x, 0)))
23510 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23511 108 : if (!REG_P (XEXP (x, 1)))
23512 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23513 108 : return true;
23514 : }
23515 : return false;
23516 :
23517 16062114 : case NE:
23518 16062114 : case GE:
23519 16062114 : case GEU:
23520 16062114 : if (TARGET_XOP
23521 22467 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23522 16069268 : && GET_MODE_SIZE (mode) <= 16)
23523 : {
23524 : /* vpcomneq */
23525 7154 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23526 7154 : if (!REG_P (XEXP (x, 0)))
23527 1449 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23528 7154 : if (!REG_P (XEXP (x, 1)))
23529 6134 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23530 7154 : return true;
23531 : }
23532 16054960 : if (TARGET_SSE2
23533 16052853 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23534 16056260 : && GET_MODE_SIZE (mode) >= 8)
23535 : {
23536 1326 : if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
23537 : /* vpcmpeq + vpternlog */
23538 28 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
23539 : else
23540 : /* vpcmpeq + pxor + vpcmpeq */
23541 1272 : *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
23542 1284 : if (!REG_P (XEXP (x, 0)))
23543 28 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23544 1284 : if (!REG_P (XEXP (x, 1)))
23545 28 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23546 1284 : return true;
23547 : }
23548 : return false;
23549 :
23550 : default:
23551 : return false;
23552 : }
23553 : }
23554 :
23555 : #if TARGET_MACHO
23556 :
23557 : static int current_machopic_label_num;
23558 :
23559 : /* Given a symbol name and its associated stub, write out the
23560 : definition of the stub. */
23561 :
23562 : void
23563 : machopic_output_stub (FILE *file, const char *symb, const char *stub)
23564 : {
23565 : unsigned int length;
23566 : char *binder_name, *symbol_name, lazy_ptr_name[32];
23567 : int label = ++current_machopic_label_num;
23568 :
23569 : /* For 64-bit we shouldn't get here. */
23570 : gcc_assert (!TARGET_64BIT);
23571 :
23572 : /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23573 : symb = targetm.strip_name_encoding (symb);
23574 :
23575 : length = strlen (stub);
23576 : binder_name = XALLOCAVEC (char, length + 32);
23577 : GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23578 :
23579 : length = strlen (symb);
23580 : symbol_name = XALLOCAVEC (char, length + 32);
23581 : GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23582 :
23583 : sprintf (lazy_ptr_name, "L%d$lz", label);
23584 :
23585 : if (MACHOPIC_ATT_STUB)
23586 : switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23587 : else if (MACHOPIC_PURE)
23588 : switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23589 : else
23590 : switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23591 :
23592 : fprintf (file, "%s:\n", stub);
23593 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23594 :
23595 : if (MACHOPIC_ATT_STUB)
23596 : {
23597 : fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23598 : }
23599 : else if (MACHOPIC_PURE)
23600 : {
23601 : /* PIC stub. */
23602 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23603 : rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23604 : output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23605 : fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23606 : label, lazy_ptr_name, label);
23607 : fprintf (file, "\tjmp\t*%%ecx\n");
23608 : }
23609 : else
23610 : fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23611 :
23612 : /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23613 : it needs no stub-binding-helper. */
23614 : if (MACHOPIC_ATT_STUB)
23615 : return;
23616 :
23617 : fprintf (file, "%s:\n", binder_name);
23618 :
23619 : if (MACHOPIC_PURE)
23620 : {
23621 : fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23622 : fprintf (file, "\tpushl\t%%ecx\n");
23623 : }
23624 : else
23625 : fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23626 :
23627 : fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23628 :
23629 : /* N.B. Keep the correspondence of these
23630 : 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23631 : old-pic/new-pic/non-pic stubs; altering this will break
23632 : compatibility with existing dylibs. */
23633 : if (MACHOPIC_PURE)
23634 : {
23635 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23636 : switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23637 : }
23638 : else
23639 : /* 16-byte -mdynamic-no-pic stub. */
23640 : switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23641 :
23642 : fprintf (file, "%s:\n", lazy_ptr_name);
23643 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23644 : fprintf (file, ASM_LONG "%s\n", binder_name);
23645 : }
23646 : #endif /* TARGET_MACHO */
23647 :
23648 : /* Order the registers for register allocator. */
23649 :
23650 : void
23651 216819 : x86_order_regs_for_local_alloc (void)
23652 : {
23653 216819 : int pos = 0;
23654 216819 : int i;
23655 :
23656 : /* First allocate the local general purpose registers. */
23657 20164167 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23658 26885556 : if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
23659 5646757 : reg_alloc_order [pos++] = i;
23660 :
23661 : /* Global general purpose registers. */
23662 20164167 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23663 23155757 : if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
23664 1291451 : reg_alloc_order [pos++] = i;
23665 :
23666 : /* x87 registers come first in case we are doing FP math
23667 : using them. */
23668 216819 : if (!TARGET_SSE_MATH)
23669 57582 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23670 51184 : reg_alloc_order [pos++] = i;
23671 :
23672 : /* SSE registers. */
23673 1951371 : for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23674 1734552 : reg_alloc_order [pos++] = i;
23675 1951371 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23676 1734552 : reg_alloc_order [pos++] = i;
23677 :
23678 : /* Extended REX SSE registers. */
23679 3685923 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23680 3469104 : reg_alloc_order [pos++] = i;
23681 :
23682 : /* Mask register. */
23683 1951371 : for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23684 1734552 : reg_alloc_order [pos++] = i;
23685 :
23686 : /* x87 registers. */
23687 216819 : if (TARGET_SSE_MATH)
23688 1893789 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23689 1683368 : reg_alloc_order [pos++] = i;
23690 :
23691 1951371 : for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23692 1734552 : reg_alloc_order [pos++] = i;
23693 :
23694 : /* Initialize the rest of array as we do not allocate some registers
23695 : at all. */
23696 1084095 : while (pos < FIRST_PSEUDO_REGISTER)
23697 867276 : reg_alloc_order [pos++] = 0;
23698 216819 : }
23699 :
23700 : static bool
23701 246671595 : ix86_ms_bitfield_layout_p (const_tree record_type)
23702 : {
23703 246671595 : return ((TARGET_MS_BITFIELD_LAYOUT
23704 215 : && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23705 246671595 : || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
23706 : }
23707 :
23708 : /* Returns an expression indicating where the this parameter is
23709 : located on entry to the FUNCTION. */
23710 :
23711 : static rtx
23712 1761 : x86_this_parameter (tree function)
23713 : {
23714 1761 : tree type = TREE_TYPE (function);
23715 1761 : bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23716 1761 : int nregs;
23717 :
23718 1761 : if (TARGET_64BIT)
23719 : {
23720 1759 : const int *parm_regs;
23721 :
23722 1759 : if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
23723 : parm_regs = x86_64_preserve_none_int_parameter_registers;
23724 1759 : else if (ix86_function_type_abi (type) == MS_ABI)
23725 : parm_regs = x86_64_ms_abi_int_parameter_registers;
23726 : else
23727 1759 : parm_regs = x86_64_int_parameter_registers;
23728 1759 : return gen_rtx_REG (Pmode, parm_regs[aggr]);
23729 : }
23730 :
23731 2 : nregs = ix86_function_regparm (type, function);
23732 :
23733 2 : if (nregs > 0 && !stdarg_p (type))
23734 : {
23735 0 : int regno;
23736 0 : unsigned int ccvt = ix86_get_callcvt (type);
23737 :
23738 0 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23739 0 : regno = aggr ? DX_REG : CX_REG;
23740 0 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23741 : {
23742 0 : regno = CX_REG;
23743 0 : if (aggr)
23744 0 : return gen_rtx_MEM (SImode,
23745 0 : plus_constant (Pmode, stack_pointer_rtx, 4));
23746 : }
23747 : else
23748 : {
23749 0 : regno = AX_REG;
23750 0 : if (aggr)
23751 : {
23752 0 : regno = DX_REG;
23753 0 : if (nregs == 1)
23754 0 : return gen_rtx_MEM (SImode,
23755 0 : plus_constant (Pmode,
23756 : stack_pointer_rtx, 4));
23757 : }
23758 : }
23759 0 : return gen_rtx_REG (SImode, regno);
23760 : }
23761 :
23762 4 : return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23763 4 : aggr ? 8 : 4));
23764 : }
23765 :
23766 : /* Determine whether x86_output_mi_thunk can succeed. */
23767 :
23768 : static bool
23769 4908 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23770 : const_tree function)
23771 : {
23772 : /* 64-bit can handle anything. */
23773 4908 : if (TARGET_64BIT)
23774 : return true;
23775 :
23776 : /* For 32-bit, everything's fine if we have one free register. */
23777 76 : if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23778 : return true;
23779 :
23780 : /* Need a free register for vcall_offset. */
23781 0 : if (vcall_offset)
23782 : return false;
23783 :
23784 : /* Need a free register for GOT references. */
23785 0 : if (flag_pic && !targetm.binds_local_p (function))
23786 : return false;
23787 :
23788 : /* Otherwise ok. */
23789 : return true;
23790 : }
23791 :
23792 : /* Output the assembler code for a thunk function. THUNK_DECL is the
23793 : declaration for the thunk function itself, FUNCTION is the decl for
23794 : the target function. DELTA is an immediate constant offset to be
23795 : added to THIS. If VCALL_OFFSET is nonzero, the word at
23796 : *(*this + vcall_offset) should be added to THIS. */
23797 :
23798 : static void
23799 1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23800 : HOST_WIDE_INT vcall_offset, tree function)
23801 : {
23802 1761 : const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23803 1761 : rtx this_param = x86_this_parameter (function);
23804 1761 : rtx this_reg, tmp, fnaddr;
23805 1761 : unsigned int tmp_regno;
23806 1761 : rtx_insn *insn;
23807 1761 : int saved_flag_force_indirect_call = flag_force_indirect_call;
23808 :
23809 1761 : if (TARGET_64BIT)
23810 : tmp_regno = R10_REG;
23811 : else
23812 : {
23813 2 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23814 2 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23815 : tmp_regno = AX_REG;
23816 2 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23817 : tmp_regno = DX_REG;
23818 : else
23819 2 : tmp_regno = CX_REG;
23820 :
23821 2 : if (flag_pic)
23822 2 : flag_force_indirect_call = 0;
23823 : }
23824 :
23825 1761 : emit_note (NOTE_INSN_PROLOGUE_END);
23826 :
23827 : /* CET is enabled, insert EB instruction. */
23828 1761 : if ((flag_cf_protection & CF_BRANCH))
23829 20 : emit_insn (gen_nop_endbr ());
23830 :
23831 : /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23832 : pull it in now and let DELTA benefit. */
23833 1761 : if (REG_P (this_param))
23834 : this_reg = this_param;
23835 2 : else if (vcall_offset)
23836 : {
23837 : /* Put the this parameter into %eax. */
23838 2 : this_reg = gen_rtx_REG (Pmode, AX_REG);
23839 1 : emit_move_insn (this_reg, this_param);
23840 : }
23841 : else
23842 : this_reg = NULL_RTX;
23843 :
23844 : /* Adjust the this parameter by a fixed constant. */
23845 1761 : if (delta)
23846 : {
23847 826 : rtx delta_rtx = GEN_INT (delta);
23848 826 : rtx delta_dst = this_reg ? this_reg : this_param;
23849 :
23850 826 : if (TARGET_64BIT)
23851 : {
23852 825 : if (!x86_64_general_operand (delta_rtx, Pmode))
23853 : {
23854 0 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23855 0 : emit_move_insn (tmp, delta_rtx);
23856 0 : delta_rtx = tmp;
23857 : }
23858 : }
23859 :
23860 827 : ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
23861 : }
23862 :
23863 : /* Adjust the this parameter by a value stored in the vtable. */
23864 1761 : if (vcall_offset)
23865 : {
23866 986 : rtx vcall_addr, vcall_mem, this_mem;
23867 :
23868 987 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23869 :
23870 986 : this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23871 987 : if (Pmode != ptr_mode)
23872 0 : this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23873 986 : emit_move_insn (tmp, this_mem);
23874 :
23875 : /* Adjust the this parameter. */
23876 987 : vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23877 986 : if (TARGET_64BIT
23878 986 : && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
23879 : {
23880 0 : rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23881 0 : emit_move_insn (tmp2, GEN_INT (vcall_offset));
23882 0 : vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23883 : }
23884 :
23885 986 : vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23886 987 : if (Pmode != ptr_mode)
23887 0 : emit_insn (gen_addsi_1_zext (this_reg,
23888 : gen_rtx_REG (ptr_mode,
23889 : REGNO (this_reg)),
23890 : vcall_mem));
23891 : else
23892 986 : ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
23893 : }
23894 :
23895 : /* If necessary, drop THIS back to its stack slot. */
23896 1761 : if (this_reg && this_reg != this_param)
23897 1 : emit_move_insn (this_param, this_reg);
23898 :
23899 1761 : fnaddr = XEXP (DECL_RTL (function), 0);
23900 1761 : if (TARGET_64BIT)
23901 : {
23902 25 : if (!flag_pic || targetm.binds_local_p (function)
23903 1784 : || TARGET_PECOFF)
23904 : ;
23905 : else
23906 : {
23907 0 : tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23908 0 : tmp = gen_rtx_CONST (Pmode, tmp);
23909 0 : fnaddr = gen_const_mem (Pmode, tmp);
23910 : }
23911 : }
23912 : else
23913 : {
23914 2 : if (!flag_pic || targetm.binds_local_p (function))
23915 : ;
23916 : #if TARGET_MACHO
23917 : else if (TARGET_MACHO)
23918 : {
23919 : fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23920 : fnaddr = XEXP (fnaddr, 0);
23921 : }
23922 : #endif /* TARGET_MACHO */
23923 : else
23924 : {
23925 0 : tmp = gen_rtx_REG (Pmode, CX_REG);
23926 0 : output_set_got (tmp, NULL_RTX);
23927 :
23928 0 : fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23929 0 : fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23930 0 : fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23931 0 : fnaddr = gen_const_mem (Pmode, fnaddr);
23932 : }
23933 : }
23934 :
23935 : /* Our sibling call patterns do not allow memories, because we have no
23936 : predicate that can distinguish between frame and non-frame memory.
23937 : For our purposes here, we can get away with (ab)using a jump pattern,
23938 : because we're going to do no optimization. */
23939 1761 : if (MEM_P (fnaddr))
23940 : {
23941 0 : if (sibcall_insn_operand (fnaddr, word_mode))
23942 : {
23943 0 : fnaddr = XEXP (DECL_RTL (function), 0);
23944 0 : tmp = gen_rtx_MEM (QImode, fnaddr);
23945 0 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23946 0 : tmp = emit_call_insn (tmp);
23947 0 : SIBLING_CALL_P (tmp) = 1;
23948 : }
23949 : else
23950 0 : emit_jump_insn (gen_indirect_jump (fnaddr));
23951 : }
23952 : else
23953 : {
23954 1761 : if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23955 : {
23956 : // CM_LARGE_PIC always uses pseudo PIC register which is
23957 : // uninitialized. Since FUNCTION is local and calling it
23958 : // doesn't go through PLT, we use scratch register %r11 as
23959 : // PIC register and initialize it here.
23960 3 : pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23961 3 : ix86_init_large_pic_reg (tmp_regno);
23962 3 : fnaddr = legitimize_pic_address (fnaddr,
23963 3 : gen_rtx_REG (Pmode, tmp_regno));
23964 : }
23965 :
23966 1761 : if (!sibcall_insn_operand (fnaddr, word_mode))
23967 : {
23968 9 : tmp = gen_rtx_REG (word_mode, tmp_regno);
23969 9 : if (GET_MODE (fnaddr) != word_mode)
23970 0 : fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
23971 9 : emit_move_insn (tmp, fnaddr);
23972 9 : fnaddr = tmp;
23973 : }
23974 :
23975 1761 : tmp = gen_rtx_MEM (QImode, fnaddr);
23976 1761 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23977 1761 : tmp = emit_call_insn (tmp);
23978 1761 : SIBLING_CALL_P (tmp) = 1;
23979 : }
23980 1761 : emit_barrier ();
23981 :
23982 : /* Emit just enough of rest_of_compilation to get the insns emitted. */
23983 1761 : insn = get_insns ();
23984 1761 : shorten_branches (insn);
23985 1761 : assemble_start_function (thunk_fndecl, fnname);
23986 1761 : final_start_function (insn, file, 1);
23987 1761 : final (insn, file, 1);
23988 1761 : final_end_function ();
23989 1761 : assemble_end_function (thunk_fndecl, fnname);
23990 :
23991 1761 : flag_force_indirect_call = saved_flag_force_indirect_call;
23992 1761 : }
23993 :
23994 : static void
23995 273426 : x86_file_start (void)
23996 : {
23997 273426 : default_file_start ();
23998 273426 : if (TARGET_16BIT)
23999 6 : fputs ("\t.code16gcc\n", asm_out_file);
24000 : #if TARGET_MACHO
24001 : darwin_file_start ();
24002 : #endif
24003 273426 : if (X86_FILE_START_VERSION_DIRECTIVE)
24004 : fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24005 273426 : if (X86_FILE_START_FLTUSED)
24006 : fputs ("\t.global\t__fltused\n", asm_out_file);
24007 273426 : if (ix86_asm_dialect == ASM_INTEL)
24008 68 : fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24009 273426 : }
24010 :
24011 : int
24012 102261464 : x86_field_alignment (tree type, int computed)
24013 : {
24014 102261464 : machine_mode mode;
24015 :
24016 102261464 : if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24017 : return computed;
24018 9102704 : if (TARGET_IAMCU)
24019 0 : return iamcu_alignment (type, computed);
24020 9102704 : type = strip_array_types (type);
24021 9102704 : mode = TYPE_MODE (type);
24022 9102704 : if (mode == DFmode || mode == DCmode
24023 8997618 : || GET_MODE_CLASS (mode) == MODE_INT
24024 3014821 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24025 : {
24026 6087883 : if (TYPE_ATOMIC (type) && computed > 32)
24027 : {
24028 0 : static bool warned;
24029 :
24030 0 : if (!warned && warn_psabi)
24031 : {
24032 0 : const char *url
24033 : = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
24034 :
24035 0 : warned = true;
24036 0 : inform (input_location, "the alignment of %<_Atomic %T%> "
24037 : "fields changed in %{GCC 11.1%}",
24038 0 : TYPE_MAIN_VARIANT (type), url);
24039 : }
24040 : }
24041 : else
24042 6087883 : return MIN (32, computed);
24043 : }
24044 : return computed;
24045 : }
24046 :
24047 : /* Print call to TARGET to FILE. */
24048 :
24049 : static void
24050 308 : x86_print_call_or_nop (FILE *file, const char *target,
24051 : const char *label)
24052 : {
24053 308 : if (flag_nop_mcount || !strcmp (target, "nop"))
24054 : {
24055 9 : if (TARGET_16BIT)
24056 : /* 3 byte no-op: lea 0(%si), %si */
24057 1 : fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
24058 : else
24059 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
24060 8 : fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
24061 : label);
24062 : }
24063 299 : else if (!TARGET_PECOFF && flag_pic)
24064 : {
24065 8 : gcc_assert (flag_plt);
24066 :
24067 8 : fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
24068 : }
24069 : else
24070 291 : fprintf (file, "%s\tcall\t%s\n", label, target);
24071 308 : }
24072 :
24073 : static bool
24074 328 : current_fentry_name (const char **name)
24075 : {
24076 328 : tree attr = lookup_attribute ("fentry_name",
24077 328 : DECL_ATTRIBUTES (current_function_decl));
24078 328 : if (!attr)
24079 : return false;
24080 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24081 2 : return true;
24082 : }
24083 :
24084 : static bool
24085 16 : current_fentry_section (const char **name)
24086 : {
24087 16 : tree attr = lookup_attribute ("fentry_section",
24088 16 : DECL_ATTRIBUTES (current_function_decl));
24089 16 : if (!attr)
24090 : return false;
24091 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24092 2 : return true;
24093 : }
24094 :
24095 : /* Return a caller-saved register which isn't live or a callee-saved
24096 : register which has been saved on stack in the prologue at entry for
24097 : profile. */
24098 :
24099 : static int
24100 17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
24101 : {
24102 : /* Use %r10 if the profiler is emitted before the prologue or it isn't
24103 : used by DRAP. */
24104 17 : if (ix86_profile_before_prologue ()
24105 4 : || !crtl->drap_reg
24106 17 : || REGNO (crtl->drap_reg) != R10_REG)
24107 : return R10_REG;
24108 :
24109 : /* The profiler is emitted after the prologue. If there is a
24110 : caller-saved register which isn't live or a callee-saved
24111 : register saved on stack in the prologue, use it. */
24112 :
24113 0 : bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
24114 :
24115 0 : int i;
24116 0 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24117 0 : if (GENERAL_REGNO_P (i)
24118 0 : && i != R10_REG
24119 : #ifdef NO_PROFILE_COUNTERS
24120 0 : && (r11_ok || i != R11_REG)
24121 : #else
24122 : && i != R11_REG
24123 : #endif
24124 0 : && TEST_HARD_REG_BIT (accessible_reg_set, i)
24125 0 : && (ix86_save_reg (i, true, true)
24126 0 : || (call_used_regs[i]
24127 0 : && !fixed_regs[i]
24128 0 : && !REGNO_REG_SET_P (reg_live, i))))
24129 0 : return i;
24130 :
24131 0 : sorry ("no register available for profiling %<-mcmodel=large%s%>",
24132 0 : ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
24133 :
24134 0 : return R10_REG;
24135 : }
24136 :
24137 : /* Output assembler code to FILE to increment profiler label # LABELNO
24138 : for profiling a function entry. */
24139 : void
24140 328 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24141 : {
24142 328 : if (cfun->machine->insn_queued_at_entrance)
24143 : {
24144 7 : if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
24145 6 : fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
24146 7 : unsigned int patch_area_size
24147 7 : = crtl->patch_area_size - crtl->patch_area_entry;
24148 7 : if (patch_area_size)
24149 2 : ix86_output_patchable_area (patch_area_size,
24150 : crtl->patch_area_entry == 0);
24151 : }
24152 :
24153 328 : const char *mcount_name = MCOUNT_NAME;
24154 :
24155 328 : bool fentry_section_p
24156 328 : = (flag_record_mcount
24157 641 : || lookup_attribute ("fentry_section",
24158 313 : DECL_ATTRIBUTES (current_function_decl)));
24159 :
24160 : const char *label = fentry_section_p ? "1:" : "";
24161 :
24162 328 : if (current_fentry_name (&mcount_name))
24163 : ;
24164 326 : else if (fentry_name)
24165 1 : mcount_name = fentry_name;
24166 325 : else if (flag_fentry)
24167 313 : mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
24168 :
24169 328 : if (TARGET_64BIT)
24170 : {
24171 : #ifndef NO_PROFILE_COUNTERS
24172 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24173 : fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
24174 : else
24175 : fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
24176 : #endif
24177 :
24178 327 : int scratch;
24179 327 : const char *reg;
24180 327 : char legacy_reg[4] = { 0 };
24181 :
24182 327 : if (!TARGET_PECOFF)
24183 : {
24184 327 : switch (ix86_cmodel)
24185 : {
24186 7 : case CM_LARGE:
24187 7 : scratch = x86_64_select_profile_regnum (true);
24188 7 : reg = hi_reg_name[scratch];
24189 7 : if (LEGACY_INT_REGNO_P (scratch))
24190 : {
24191 0 : legacy_reg[0] = 'r';
24192 0 : legacy_reg[1] = reg[0];
24193 0 : legacy_reg[2] = reg[1];
24194 0 : reg = legacy_reg;
24195 : }
24196 7 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24197 1 : fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
24198 : "\tcall\t%s\n", label, reg, mcount_name,
24199 : reg);
24200 : else
24201 6 : fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
24202 : label, mcount_name, reg, reg);
24203 : break;
24204 10 : case CM_LARGE_PIC:
24205 : #ifdef NO_PROFILE_COUNTERS
24206 10 : scratch = x86_64_select_profile_regnum (false);
24207 10 : reg = hi_reg_name[scratch];
24208 10 : if (LEGACY_INT_REGNO_P (scratch))
24209 : {
24210 0 : legacy_reg[0] = 'r';
24211 0 : legacy_reg[1] = reg[0];
24212 0 : legacy_reg[2] = reg[1];
24213 0 : reg = legacy_reg;
24214 : }
24215 10 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24216 : {
24217 1 : fprintf (file, "1:movabs\tr11, "
24218 : "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
24219 1 : fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
24220 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24221 1 : fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
24222 : mcount_name);
24223 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24224 1 : fprintf (file, "\tcall\t%s\n", reg);
24225 1 : break;
24226 : }
24227 9 : fprintf (file,
24228 : "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
24229 9 : fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
24230 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24231 9 : fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
24232 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24233 9 : fprintf (file, "\tcall\t*%%%s\n", reg);
24234 : #else
24235 : sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
24236 : #endif
24237 9 : break;
24238 12 : case CM_SMALL_PIC:
24239 12 : case CM_MEDIUM_PIC:
24240 12 : if (!flag_plt)
24241 : {
24242 3 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24243 0 : fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
24244 : label, mcount_name);
24245 : else
24246 3 : fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
24247 : label, mcount_name);
24248 : break;
24249 : }
24250 : /* fall through */
24251 307 : default:
24252 307 : x86_print_call_or_nop (file, mcount_name, label);
24253 307 : break;
24254 : }
24255 : }
24256 : else
24257 : x86_print_call_or_nop (file, mcount_name, label);
24258 : }
24259 1 : else if (flag_pic)
24260 : {
24261 : #ifndef NO_PROFILE_COUNTERS
24262 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24263 : fprintf (file,
24264 : "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
24265 : LPREFIX, labelno);
24266 : else
24267 : fprintf (file,
24268 : "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
24269 : LPREFIX, labelno);
24270 : #endif
24271 0 : if (flag_plt)
24272 0 : x86_print_call_or_nop (file, mcount_name, label);
24273 0 : else if (ASSEMBLER_DIALECT == ASM_INTEL)
24274 0 : fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
24275 : label, mcount_name);
24276 : else
24277 0 : fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
24278 : label, mcount_name);
24279 : }
24280 : else
24281 : {
24282 : #ifndef NO_PROFILE_COUNTERS
24283 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24284 : fprintf (file,
24285 : "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
24286 : LPREFIX, labelno);
24287 : else
24288 : fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
24289 : LPREFIX, labelno);
24290 : #endif
24291 1 : x86_print_call_or_nop (file, mcount_name, label);
24292 : }
24293 :
24294 328 : if (fentry_section_p)
24295 : {
24296 16 : const char *sname = "__mcount_loc";
24297 :
24298 16 : if (current_fentry_section (&sname))
24299 : ;
24300 14 : else if (fentry_section)
24301 1 : sname = fentry_section;
24302 :
24303 16 : fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
24304 16 : fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
24305 16 : fprintf (file, "\t.previous\n");
24306 : }
24307 328 : }
24308 :
24309 : /* We don't have exact information about the insn sizes, but we may assume
24310 : quite safely that we are informed about all 1 byte insns and memory
24311 : address sizes. This is enough to eliminate unnecessary padding in
24312 : 99% of cases. */
24313 :
24314 : int
24315 384174065 : ix86_min_insn_size (rtx_insn *insn)
24316 : {
24317 384174065 : int l = 0, len;
24318 :
24319 384174065 : if (!INSN_P (insn) || !active_insn_p (insn))
24320 500333 : return 0;
24321 :
24322 : /* Discard alignments we've emit and jump instructions. */
24323 383673732 : if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24324 383673732 : && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24325 : return 0;
24326 :
24327 : /* Important case - calls are always 5 bytes.
24328 : It is common to have many calls in the row. */
24329 383673727 : if (CALL_P (insn)
24330 9147319 : && symbolic_reference_mentioned_p (PATTERN (insn))
24331 392489459 : && !SIBLING_CALL_P (insn))
24332 : return 5;
24333 375097676 : len = get_attr_length (insn);
24334 375097676 : if (len <= 1)
24335 : return 1;
24336 :
24337 : /* For normal instructions we rely on get_attr_length being exact,
24338 : with a few exceptions. */
24339 366494642 : if (!JUMP_P (insn))
24340 : {
24341 361131576 : enum attr_type type = get_attr_type (insn);
24342 :
24343 361131576 : switch (type)
24344 : {
24345 95304 : case TYPE_MULTI:
24346 95304 : if (GET_CODE (PATTERN (insn)) == ASM_INPUT
24347 95304 : || asm_noperands (PATTERN (insn)) >= 0)
24348 527 : return 0;
24349 : break;
24350 : case TYPE_OTHER:
24351 : case TYPE_FCMP:
24352 : break;
24353 : default:
24354 : /* Otherwise trust get_attr_length. */
24355 : return len;
24356 : }
24357 :
24358 474282 : l = get_attr_length_address (insn);
24359 474282 : if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24360 : l = 4;
24361 : }
24362 383824 : if (l)
24363 90458 : return 1+l;
24364 : else
24365 5746890 : return 2;
24366 : }
24367 :
24368 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24369 :
24370 : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24371 : window. */
24372 :
24373 : static void
24374 45424 : ix86_avoid_jump_mispredicts (void)
24375 : {
24376 45424 : rtx_insn *insn, *start = get_insns ();
24377 45424 : int nbytes = 0, njumps = 0;
24378 45424 : bool isjump = false;
24379 :
24380 : /* Look for all minimal intervals of instructions containing 4 jumps.
24381 : The intervals are bounded by START and INSN. NBYTES is the total
24382 : size of instructions in the interval including INSN and not including
24383 : START. When the NBYTES is smaller than 16 bytes, it is possible
24384 : that the end of START and INSN ends up in the same 16byte page.
24385 :
24386 : The smallest offset in the page INSN can start is the case where START
24387 : ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24388 : We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
24389 :
24390 : Don't consider asm goto as jump, while it can contain a jump, it doesn't
24391 : have to, control transfer to label(s) can be performed through other
24392 : means, and also we estimate minimum length of all asm stmts as 0. */
24393 700744 : for (insn = start; insn; insn = NEXT_INSN (insn))
24394 : {
24395 655320 : int min_size;
24396 :
24397 655320 : if (LABEL_P (insn))
24398 : {
24399 956 : align_flags alignment = label_to_alignment (insn);
24400 956 : int align = alignment.levels[0].log;
24401 956 : int max_skip = alignment.levels[0].maxskip;
24402 :
24403 956 : if (max_skip > 15)
24404 : max_skip = 15;
24405 : /* If align > 3, only up to 16 - max_skip - 1 bytes can be
24406 : already in the current 16 byte page, because otherwise
24407 : ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
24408 : bytes to reach 16 byte boundary. */
24409 956 : if (align <= 0
24410 328 : || (align <= 3 && max_skip != (1 << align) - 1))
24411 956 : max_skip = 0;
24412 956 : if (dump_file)
24413 0 : fprintf (dump_file, "Label %i with max_skip %i\n",
24414 0 : INSN_UID (insn), max_skip);
24415 956 : if (max_skip)
24416 : {
24417 6278 : while (nbytes + max_skip >= 16)
24418 : {
24419 5950 : start = NEXT_INSN (start);
24420 310 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24421 5967 : || CALL_P (start))
24422 350 : njumps--, isjump = true;
24423 : else
24424 : isjump = false;
24425 5950 : nbytes -= ix86_min_insn_size (start);
24426 : }
24427 : }
24428 956 : continue;
24429 956 : }
24430 :
24431 654364 : min_size = ix86_min_insn_size (insn);
24432 654364 : nbytes += min_size;
24433 654364 : if (dump_file)
24434 0 : fprintf (dump_file, "Insn %i estimated to %i bytes\n",
24435 0 : INSN_UID (insn), min_size);
24436 46582 : if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
24437 654384 : || CALL_P (insn))
24438 47597 : njumps++;
24439 : else
24440 606767 : continue;
24441 :
24442 55983 : while (njumps > 3)
24443 : {
24444 8386 : start = NEXT_INSN (start);
24445 545 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24446 8386 : || CALL_P (start))
24447 1247 : njumps--, isjump = true;
24448 : else
24449 : isjump = false;
24450 8386 : nbytes -= ix86_min_insn_size (start);
24451 : }
24452 47597 : gcc_assert (njumps >= 0);
24453 47597 : if (dump_file)
24454 0 : fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24455 0 : INSN_UID (start), INSN_UID (insn), nbytes);
24456 :
24457 47597 : if (njumps == 3 && isjump && nbytes < 16)
24458 : {
24459 40 : int padsize = 15 - nbytes + ix86_min_insn_size (insn);
24460 :
24461 40 : if (dump_file)
24462 0 : fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24463 0 : INSN_UID (insn), padsize);
24464 40 : emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
24465 : }
24466 : }
24467 45424 : }
24468 : #endif
24469 :
24470 : /* AMD Athlon works faster
24471 : when RET is not destination of conditional jump or directly preceded
24472 : by other jump instruction. We avoid the penalty by inserting NOP just
24473 : before the RET instructions in such cases. */
24474 : static void
24475 45144 : ix86_pad_returns (void)
24476 : {
24477 45144 : edge e;
24478 45144 : edge_iterator ei;
24479 :
24480 90312 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24481 : {
24482 45168 : basic_block bb = e->src;
24483 45168 : rtx_insn *ret = BB_END (bb);
24484 45168 : rtx_insn *prev;
24485 45168 : bool replace = false;
24486 :
24487 45158 : if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24488 90326 : || optimize_bb_for_size_p (bb))
24489 23 : continue;
24490 179724 : for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24491 134161 : if (active_insn_p (prev) || LABEL_P (prev))
24492 : break;
24493 45145 : if (prev && LABEL_P (prev))
24494 : {
24495 43 : edge e;
24496 43 : edge_iterator ei;
24497 :
24498 56 : FOR_EACH_EDGE (e, ei, bb->preds)
24499 146 : if (EDGE_FREQUENCY (e) && e->src->index >= 0
24500 97 : && !(e->flags & EDGE_FALLTHRU))
24501 : {
24502 : replace = true;
24503 : break;
24504 : }
24505 : }
24506 43 : if (!replace)
24507 : {
24508 45109 : prev = prev_active_insn (ret);
24509 45109 : if (prev
24510 45109 : && ((JUMP_P (prev) && any_condjump_p (prev))
24511 44673 : || CALL_P (prev)))
24512 : replace = true;
24513 : /* Empty functions get branch mispredict even when
24514 : the jump destination is not visible to us. */
24515 45109 : if (!prev && !optimize_function_for_size_p (cfun))
24516 : replace = true;
24517 : }
24518 44691 : if (replace)
24519 : {
24520 489 : emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24521 489 : delete_insn (ret);
24522 : }
24523 : }
24524 45144 : }
24525 :
24526 : /* Count the minimum number of instructions in BB. Return 4 if the
24527 : number of instructions >= 4. */
24528 :
24529 : static int
24530 42 : ix86_count_insn_bb (basic_block bb)
24531 : {
24532 42 : rtx_insn *insn;
24533 42 : int insn_count = 0;
24534 :
24535 : /* Count number of instructions in this block. Return 4 if the number
24536 : of instructions >= 4. */
24537 297 : FOR_BB_INSNS (bb, insn)
24538 : {
24539 : /* Only happen in exit blocks. */
24540 291 : if (JUMP_P (insn)
24541 291 : && ANY_RETURN_P (PATTERN (insn)))
24542 : break;
24543 :
24544 267 : if (NONDEBUG_INSN_P (insn)
24545 102 : && GET_CODE (PATTERN (insn)) != USE
24546 351 : && GET_CODE (PATTERN (insn)) != CLOBBER)
24547 : {
24548 84 : insn_count++;
24549 84 : if (insn_count >= 4)
24550 : return insn_count;
24551 : }
24552 : }
24553 :
24554 : return insn_count;
24555 : }
24556 :
24557 :
24558 : /* Count the minimum number of instructions in code path in BB.
24559 : Return 4 if the number of instructions >= 4. */
24560 :
24561 : static int
24562 62 : ix86_count_insn (basic_block bb)
24563 : {
24564 62 : edge e;
24565 62 : edge_iterator ei;
24566 62 : int min_prev_count;
24567 :
24568 : /* Only bother counting instructions along paths with no
24569 : more than 2 basic blocks between entry and exit. Given
24570 : that BB has an edge to exit, determine if a predecessor
24571 : of BB has an edge from entry. If so, compute the number
24572 : of instructions in the predecessor block. If there
24573 : happen to be multiple such blocks, compute the minimum. */
24574 62 : min_prev_count = 4;
24575 145 : FOR_EACH_EDGE (e, ei, bb->preds)
24576 : {
24577 109 : edge prev_e;
24578 109 : edge_iterator prev_ei;
24579 :
24580 109 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24581 : {
24582 26 : min_prev_count = 0;
24583 26 : break;
24584 : }
24585 182 : FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24586 : {
24587 109 : if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24588 : {
24589 10 : int count = ix86_count_insn_bb (e->src);
24590 10 : if (count < min_prev_count)
24591 83 : min_prev_count = count;
24592 : break;
24593 : }
24594 : }
24595 : }
24596 :
24597 62 : if (min_prev_count < 4)
24598 32 : min_prev_count += ix86_count_insn_bb (bb);
24599 :
24600 62 : return min_prev_count;
24601 : }
24602 :
24603 : /* Pad short function to 4 instructions. */
24604 :
24605 : static void
24606 63 : ix86_pad_short_function (void)
24607 : {
24608 63 : edge e;
24609 63 : edge_iterator ei;
24610 :
24611 128 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24612 : {
24613 65 : rtx_insn *ret = BB_END (e->src);
24614 65 : if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24615 : {
24616 62 : int insn_count = ix86_count_insn (e->src);
24617 :
24618 : /* Pad short function. */
24619 62 : if (insn_count < 4)
24620 : {
24621 : rtx_insn *insn = ret;
24622 :
24623 : /* Find epilogue. */
24624 : while (insn
24625 60 : && (!NOTE_P (insn)
24626 26 : || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24627 37 : insn = PREV_INSN (insn);
24628 :
24629 23 : if (!insn)
24630 0 : insn = ret;
24631 :
24632 : /* Two NOPs count as one instruction. */
24633 23 : insn_count = 2 * (4 - insn_count);
24634 23 : emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24635 : }
24636 : }
24637 : }
24638 63 : }
24639 :
24640 : /* Fix up a Windows system unwinder issue. If an EH region falls through into
24641 : the epilogue, the Windows system unwinder will apply epilogue logic and
24642 : produce incorrect offsets. This can be avoided by adding a nop between
24643 : the last insn that can throw and the first insn of the epilogue. */
24644 :
24645 : static void
24646 0 : ix86_seh_fixup_eh_fallthru (void)
24647 : {
24648 0 : edge e;
24649 0 : edge_iterator ei;
24650 :
24651 0 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24652 : {
24653 0 : rtx_insn *insn, *next;
24654 :
24655 : /* Find the beginning of the epilogue. */
24656 0 : for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24657 0 : if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24658 : break;
24659 0 : if (insn == NULL)
24660 0 : continue;
24661 :
24662 : /* We only care about preceding insns that can throw. */
24663 0 : insn = prev_active_insn (insn);
24664 0 : if (insn == NULL || !can_throw_internal (insn))
24665 0 : continue;
24666 :
24667 : /* Do not separate calls from their debug information. */
24668 0 : for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
24669 0 : if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24670 0 : insn = next;
24671 : else
24672 : break;
24673 :
24674 0 : emit_insn_after (gen_nops (const1_rtx), insn);
24675 : }
24676 0 : }
24677 : /* Split vector load from parm_decl to elemental loads to avoid STLF
24678 : stalls. */
24679 : static void
24680 978491 : ix86_split_stlf_stall_load ()
24681 : {
24682 978491 : rtx_insn* insn, *start = get_insns ();
24683 978491 : unsigned window = 0;
24684 :
24685 26707763 : for (insn = start; insn; insn = NEXT_INSN (insn))
24686 : {
24687 26706917 : if (!NONDEBUG_INSN_P (insn))
24688 15112353 : continue;
24689 11594564 : window++;
24690 : /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24691 : other, just emulate for pipeline) before stalled load, stlf stall
24692 : case is as fast as no stall cases on CLX.
24693 : Since CFG is freed before machine_reorg, just do a rough
24694 : calculation of the window according to the layout. */
24695 11594564 : if (window > (unsigned) x86_stlf_window_ninsns)
24696 : return;
24697 :
24698 11576657 : if (any_uncondjump_p (insn)
24699 11541305 : || ANY_RETURN_P (PATTERN (insn))
24700 22742489 : || CALL_P (insn))
24701 : return;
24702 :
24703 10616919 : rtx set = single_set (insn);
24704 10616919 : if (!set)
24705 434892 : continue;
24706 10182027 : rtx src = SET_SRC (set);
24707 20363708 : if (!MEM_P (src)
24708 : /* Only handle V2DFmode load since it doesn't need any scratch
24709 : register. */
24710 1453325 : || GET_MODE (src) != E_V2DFmode
24711 5487 : || !MEM_EXPR (src)
24712 10186022 : || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24713 10181681 : continue;
24714 :
24715 346 : rtx zero = CONST0_RTX (V2DFmode);
24716 346 : rtx dest = SET_DEST (set);
24717 346 : rtx m = adjust_address (src, DFmode, 0);
24718 346 : rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24719 346 : emit_insn_before (loadlpd, insn);
24720 346 : m = adjust_address (src, DFmode, 8);
24721 346 : rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24722 346 : if (dump_file && (dump_flags & TDF_DETAILS))
24723 : {
24724 0 : fputs ("Due to potential STLF stall, split instruction:\n",
24725 : dump_file);
24726 0 : print_rtl_single (dump_file, insn);
24727 0 : fputs ("To:\n", dump_file);
24728 0 : print_rtl_single (dump_file, loadlpd);
24729 0 : print_rtl_single (dump_file, loadhpd);
24730 : }
24731 346 : PATTERN (insn) = loadhpd;
24732 346 : INSN_CODE (insn) = -1;
24733 346 : gcc_assert (recog_memoized (insn) != -1);
24734 : }
24735 : }
24736 :
24737 : /* Implement machine specific optimizations. We implement padding of returns
24738 : for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24739 : static void
24740 1480118 : ix86_reorg (void)
24741 : {
24742 : /* We are freeing block_for_insn in the toplev to keep compatibility
24743 : with old MDEP_REORGS that are not CFG based. Recompute it now. */
24744 1480118 : compute_bb_for_insn ();
24745 :
24746 1480118 : if (TARGET_SEH && current_function_has_exception_handlers ())
24747 : ix86_seh_fixup_eh_fallthru ();
24748 :
24749 1480118 : if (optimize && optimize_function_for_speed_p (cfun))
24750 : {
24751 980792 : if (TARGET_SSE2)
24752 978491 : ix86_split_stlf_stall_load ();
24753 980792 : if (TARGET_PAD_SHORT_FUNCTION)
24754 63 : ix86_pad_short_function ();
24755 980729 : else if (TARGET_PAD_RETURNS)
24756 45144 : ix86_pad_returns ();
24757 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24758 980792 : if (TARGET_FOUR_JUMP_LIMIT)
24759 45424 : ix86_avoid_jump_mispredicts ();
24760 : #endif
24761 : }
24762 1480118 : }
24763 :
24764 : /* Return nonzero when QImode register that must be represented via REX prefix
24765 : is used. */
24766 : bool
24767 9086973 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24768 : {
24769 9086973 : int i;
24770 9086973 : extract_insn_cached (insn);
24771 34425531 : for (i = 0; i < recog_data.n_operands; i++)
24772 4691861 : if (GENERAL_REG_P (recog_data.operand[i])
24773 22555565 : && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24774 : return true;
24775 : return false;
24776 : }
24777 :
24778 : /* Return true when INSN mentions register that must be encoded using REX
24779 : prefix. */
24780 : bool
24781 196697858 : x86_extended_reg_mentioned_p (rtx insn)
24782 : {
24783 196697858 : subrtx_iterator::array_type array;
24784 1030771766 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24785 : {
24786 882603811 : const_rtx x = *iter;
24787 882603811 : if (REG_P (x)
24788 882603811 : && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24789 253505051 : || REX2_INT_REGNO_P (REGNO (x))))
24790 48529903 : return true;
24791 : }
24792 148167955 : return false;
24793 196697858 : }
24794 :
24795 : /* Return true when INSN mentions register that must be encoded using REX2
24796 : prefix. */
24797 : bool
24798 2094808 : x86_extended_rex2reg_mentioned_p (rtx insn)
24799 : {
24800 2094808 : subrtx_iterator::array_type array;
24801 9751672 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24802 : {
24803 7657535 : const_rtx x = *iter;
24804 7657535 : if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24805 671 : return true;
24806 : }
24807 2094137 : return false;
24808 2094808 : }
24809 :
24810 : /* Return true when rtx operands mentions register that must be encoded using
24811 : evex prefix. */
24812 : bool
24813 10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
24814 : {
24815 10 : int i;
24816 28 : for (i = 0; i < nops; i++)
24817 22 : if (EXT_REX_SSE_REG_P (operands[i])
24818 40 : || x86_extended_rex2reg_mentioned_p (operands[i]))
24819 4 : return true;
24820 : return false;
24821 : }
24822 :
24823 : /* If profitable, negate (without causing overflow) integer constant
24824 : of mode MODE at location LOC. Return true in this case. */
24825 : bool
24826 5876412 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24827 : {
24828 5876412 : HOST_WIDE_INT val;
24829 :
24830 5876412 : if (!CONST_INT_P (*loc))
24831 : return false;
24832 :
24833 4965477 : switch (mode)
24834 : {
24835 2816527 : case E_DImode:
24836 : /* DImode x86_64 constants must fit in 32 bits. */
24837 2816527 : gcc_assert (x86_64_immediate_operand (*loc, mode));
24838 :
24839 : mode = SImode;
24840 : break;
24841 :
24842 : case E_SImode:
24843 : case E_HImode:
24844 : case E_QImode:
24845 : break;
24846 :
24847 0 : default:
24848 0 : gcc_unreachable ();
24849 : }
24850 :
24851 : /* Avoid overflows. */
24852 4965477 : if (mode_signbit_p (mode, *loc))
24853 : return false;
24854 :
24855 4964964 : val = INTVAL (*loc);
24856 :
24857 : /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24858 : Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24859 4964964 : if ((val < 0 && val != -128)
24860 3263763 : || val == 128)
24861 : {
24862 1712488 : *loc = GEN_INT (-val);
24863 1712488 : return true;
24864 : }
24865 :
24866 : return false;
24867 : }
24868 :
24869 : /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24870 : optabs would emit if we didn't have TFmode patterns. */
24871 :
24872 : void
24873 4510 : x86_emit_floatuns (rtx operands[2])
24874 : {
24875 4510 : rtx_code_label *neglab, *donelab;
24876 4510 : rtx i0, i1, f0, in, out;
24877 4510 : machine_mode mode, inmode;
24878 :
24879 4510 : inmode = GET_MODE (operands[1]);
24880 4510 : gcc_assert (inmode == SImode || inmode == DImode);
24881 :
24882 4510 : out = operands[0];
24883 4510 : in = force_reg (inmode, operands[1]);
24884 4510 : mode = GET_MODE (out);
24885 4510 : neglab = gen_label_rtx ();
24886 4510 : donelab = gen_label_rtx ();
24887 4510 : f0 = gen_reg_rtx (mode);
24888 :
24889 4510 : emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24890 :
24891 4510 : expand_float (out, in, 0);
24892 :
24893 4510 : emit_jump_insn (gen_jump (donelab));
24894 4510 : emit_barrier ();
24895 :
24896 4510 : emit_label (neglab);
24897 :
24898 4510 : i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24899 : 1, OPTAB_DIRECT);
24900 4510 : i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24901 : 1, OPTAB_DIRECT);
24902 4510 : i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24903 :
24904 4510 : expand_float (f0, i0, 0);
24905 :
24906 4510 : emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24907 :
24908 4510 : emit_label (donelab);
24909 4510 : }
24910 :
24911 : /* Return the diagnostic message string if conversion from FROMTYPE to
24912 : TOTYPE is not allowed, NULL otherwise. */
24913 :
24914 : static const char *
24915 1082084384 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24916 : {
24917 1082084384 : machine_mode from_mode = element_mode (fromtype);
24918 1082084384 : machine_mode to_mode = element_mode (totype);
24919 :
24920 1082084384 : if (!TARGET_SSE2 && from_mode != to_mode)
24921 : {
24922 : /* Do no allow conversions to/from BFmode/HFmode scalar types
24923 : when TARGET_SSE2 is not available. */
24924 468008 : if (from_mode == BFmode)
24925 : return N_("invalid conversion from type %<__bf16%> "
24926 : "without option %<-msse2%>");
24927 468007 : if (from_mode == HFmode)
24928 : return N_("invalid conversion from type %<_Float16%> "
24929 : "without option %<-msse2%>");
24930 468007 : if (to_mode == BFmode)
24931 : return N_("invalid conversion to type %<__bf16%> "
24932 : "without option %<-msse2%>");
24933 468007 : if (to_mode == HFmode)
24934 : return N_("invalid conversion to type %<_Float16%> "
24935 : "without option %<-msse2%>");
24936 : }
24937 :
24938 : /* Warn for silent implicit conversion between __bf16 and short,
24939 : since __bfloat16 is refined as real __bf16 instead of short
24940 : since GCC13. */
24941 1082084382 : if (element_mode (fromtype) != element_mode (totype)
24942 1082084382 : && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24943 : {
24944 : /* Warn for silent implicit conversion where user may expect
24945 : a bitcast. */
24946 7767959 : if ((TYPE_MODE (fromtype) == BFmode
24947 279 : && TYPE_MODE (totype) == HImode)
24948 7768237 : || (TYPE_MODE (totype) == BFmode
24949 423 : && TYPE_MODE (fromtype) == HImode))
24950 1 : warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24951 : "to real %<__bf16%> since GCC 13.1, be careful of "
24952 : "implicit conversion between %<__bf16%> and %<short%>; "
24953 : "an explicit bitcast may be needed here");
24954 : }
24955 :
24956 : /* Conversion allowed. */
24957 : return NULL;
24958 : }
24959 :
24960 : /* Return the diagnostic message string if the unary operation OP is
24961 : not permitted on TYPE, NULL otherwise. */
24962 :
24963 : static const char *
24964 91040445 : ix86_invalid_unary_op (int op, const_tree type)
24965 : {
24966 91040445 : machine_mode mmode = element_mode (type);
24967 : /* Reject all single-operand operations on BFmode/HFmode except for &
24968 : when TARGET_SSE2 is not available. */
24969 91040445 : if (!TARGET_SSE2 && op != ADDR_EXPR)
24970 : {
24971 111098 : if (mmode == BFmode)
24972 : return N_("operation not permitted on type %<__bf16%> "
24973 : "without option %<-msse2%>");
24974 111098 : if (mmode == HFmode)
24975 0 : return N_("operation not permitted on type %<_Float16%> "
24976 : "without option %<-msse2%>");
24977 : }
24978 :
24979 : /* Operation allowed. */
24980 : return NULL;
24981 : }
24982 :
24983 : /* Return the diagnostic message string if the binary operation OP is
24984 : not permitted on TYPE1 and TYPE2, NULL otherwise. */
24985 :
24986 : static const char *
24987 160400145 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
24988 : const_tree type2)
24989 : {
24990 160400145 : machine_mode type1_mode = element_mode (type1);
24991 160400145 : machine_mode type2_mode = element_mode (type2);
24992 : /* Reject all 2-operand operations on BFmode or HFmode
24993 : when TARGET_SSE2 is not available. */
24994 160400145 : if (!TARGET_SSE2)
24995 : {
24996 1008823 : if (type1_mode == BFmode || type2_mode == BFmode)
24997 : return N_("operation not permitted on type %<__bf16%> "
24998 : "without option %<-msse2%>");
24999 :
25000 1008823 : if (type1_mode == HFmode || type2_mode == HFmode)
25001 0 : return N_("operation not permitted on type %<_Float16%> "
25002 : "without option %<-msse2%>");
25003 : }
25004 :
25005 : /* Operation allowed. */
25006 : return NULL;
25007 : }
25008 :
25009 :
25010 : /* Target hook for scalar_mode_supported_p. */
25011 : static bool
25012 4521866 : ix86_scalar_mode_supported_p (scalar_mode mode)
25013 : {
25014 4521866 : if (DECIMAL_FLOAT_MODE_P (mode))
25015 630517 : return default_decimal_float_supported_p ();
25016 3891349 : else if (mode == TFmode)
25017 : return true;
25018 3568988 : else if (mode == HFmode || mode == BFmode)
25019 : return true;
25020 : else
25021 2926249 : return default_scalar_mode_supported_p (mode);
25022 : }
25023 :
25024 : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
25025 : if MODE is HFmode, and punt to the generic implementation otherwise. */
25026 :
25027 : static bool
25028 2213833 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
25029 : {
25030 : /* NB: Always return TRUE for HFmode so that the _Float16 type will
25031 : be defined by the C front-end for AVX512FP16 intrinsics. We will
25032 : issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
25033 : enabled. */
25034 1892939 : return ((mode == HFmode || mode == BFmode)
25035 3785878 : ? true
25036 1572045 : : default_libgcc_floating_mode_supported_p (mode));
25037 : }
25038 :
25039 : /* Implements target hook vector_mode_supported_p. */
25040 : static bool
25041 1318079707 : ix86_vector_mode_supported_p (machine_mode mode)
25042 : {
25043 : /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
25044 : either. */
25045 1454115481 : if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
25046 : return false;
25047 1318079257 : if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
25048 : return true;
25049 1109366475 : if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
25050 : return true;
25051 494890408 : if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
25052 : return true;
25053 355515471 : if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
25054 : return true;
25055 221300836 : if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
25056 221244149 : && VALID_MMX_REG_MODE (mode))
25057 : return true;
25058 31902055 : if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
25059 31266068 : && VALID_MMX_REG_MODE_3DNOW (mode))
25060 : return true;
25061 22345104 : if (mode == V2QImode)
25062 24814 : return true;
25063 : return false;
25064 : }
25065 :
25066 : /* Target hook for c_mode_for_suffix. */
25067 : static machine_mode
25068 196611 : ix86_c_mode_for_suffix (char suffix)
25069 : {
25070 196611 : if (suffix == 'q')
25071 : return TFmode;
25072 37 : if (suffix == 'w')
25073 : return XFmode;
25074 :
25075 0 : return VOIDmode;
25076 : }
25077 :
25078 : /* Helper function to map common constraints to non-EGPR ones.
25079 : All related constraints have h prefix, and h plus Upper letter
25080 : means the constraint is strictly EGPR enabled, while h plus
25081 : lower letter indicates the constraint is strictly gpr16 only.
25082 :
25083 : Specially for "g" constraint, split it to rmi as there is
25084 : no corresponding general constraint define for backend.
25085 :
25086 : Here is the full list to map constraints that may involve
25087 : gpr to h prefixed.
25088 :
25089 : "g" -> "jrjmi"
25090 : "r" -> "jr"
25091 : "m" -> "jm"
25092 : "<" -> "j<"
25093 : ">" -> "j>"
25094 : "o" -> "jo"
25095 : "V" -> "jV"
25096 : "p" -> "jp"
25097 : "Bm" -> "ja"
25098 : */
25099 :
25100 50 : static void map_egpr_constraints (vec<const char *> &constraints)
25101 : {
25102 60 : for (size_t i = 0; i < constraints.length(); i++)
25103 : {
25104 10 : const char *cur = constraints[i];
25105 :
25106 10 : if (startswith (cur, "=@cc"))
25107 0 : continue;
25108 :
25109 10 : int len = strlen (cur);
25110 10 : auto_vec<char> buf;
25111 :
25112 24 : for (int j = 0; j < len; j++)
25113 : {
25114 14 : switch (cur[j])
25115 : {
25116 2 : case 'g':
25117 2 : buf.safe_push ('j');
25118 2 : buf.safe_push ('r');
25119 2 : buf.safe_push ('j');
25120 2 : buf.safe_push ('m');
25121 2 : buf.safe_push ('i');
25122 2 : break;
25123 8 : case 'r':
25124 8 : case 'm':
25125 8 : case '<':
25126 8 : case '>':
25127 8 : case 'o':
25128 8 : case 'V':
25129 8 : case 'p':
25130 8 : buf.safe_push ('j');
25131 8 : buf.safe_push (cur[j]);
25132 8 : break;
25133 0 : case 'B':
25134 0 : if (cur[j + 1] == 'm')
25135 : {
25136 0 : buf.safe_push ('j');
25137 0 : buf.safe_push ('a');
25138 0 : j++;
25139 : }
25140 : else
25141 : {
25142 0 : buf.safe_push (cur[j]);
25143 0 : buf.safe_push (cur[j + 1]);
25144 0 : j++;
25145 : }
25146 : break;
25147 0 : case 'T':
25148 0 : case 'Y':
25149 0 : case 'W':
25150 0 : case 'j':
25151 0 : buf.safe_push (cur[j]);
25152 0 : buf.safe_push (cur[j + 1]);
25153 0 : j++;
25154 0 : break;
25155 0 : case '{':
25156 0 : do
25157 : {
25158 0 : buf.safe_push (cur[j]);
25159 0 : } while (cur[j++] != '}');
25160 : break;
25161 4 : default:
25162 4 : buf.safe_push (cur[j]);
25163 4 : break;
25164 : }
25165 : }
25166 10 : buf.safe_push ('\0');
25167 20 : constraints[i] = xstrdup (buf.address ());
25168 10 : }
25169 50 : }
25170 :
25171 : /* Worker function for TARGET_MD_ASM_ADJUST.
25172 :
25173 : We implement asm flag outputs, and maintain source compatibility
25174 : with the old cc0-based compiler. */
25175 :
25176 : static rtx_insn *
25177 108263 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
25178 : vec<machine_mode> & /*input_modes*/,
25179 : vec<const char *> &constraints, vec<rtx> &/*uses*/,
25180 : vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
25181 : location_t loc)
25182 : {
25183 108263 : bool saw_asm_flag = false;
25184 :
25185 108263 : start_sequence ();
25186 :
25187 108263 : if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
25188 50 : map_egpr_constraints (constraints);
25189 :
25190 292550 : for (unsigned i = 0, n = outputs.length (); i < n; ++i)
25191 : {
25192 76858 : const char *con = constraints[i];
25193 76858 : if (!startswith (con, "=@cc"))
25194 76770 : continue;
25195 88 : con += 4;
25196 88 : if (strchr (con, ',') != NULL)
25197 : {
25198 1 : error_at (loc, "alternatives not allowed in %<asm%> flag output");
25199 1 : continue;
25200 : }
25201 :
25202 87 : bool invert = false;
25203 87 : if (con[0] == 'n')
25204 19 : invert = true, con++;
25205 :
25206 87 : machine_mode mode = CCmode;
25207 87 : rtx_code code = UNKNOWN;
25208 :
25209 87 : switch (con[0])
25210 : {
25211 15 : case 'a':
25212 15 : if (con[1] == 0)
25213 : mode = CCAmode, code = EQ;
25214 4 : else if (con[1] == 'e' && con[2] == 0)
25215 : mode = CCCmode, code = NE;
25216 : break;
25217 11 : case 'b':
25218 11 : if (con[1] == 0)
25219 : mode = CCCmode, code = EQ;
25220 6 : else if (con[1] == 'e' && con[2] == 0)
25221 : mode = CCAmode, code = NE;
25222 : break;
25223 14 : case 'c':
25224 14 : if (con[1] == 0)
25225 : mode = CCCmode, code = EQ;
25226 : break;
25227 8 : case 'e':
25228 8 : if (con[1] == 0)
25229 : mode = CCZmode, code = EQ;
25230 : break;
25231 11 : case 'g':
25232 11 : if (con[1] == 0)
25233 : mode = CCGCmode, code = GT;
25234 5 : else if (con[1] == 'e' && con[2] == 0)
25235 : mode = CCGCmode, code = GE;
25236 : break;
25237 10 : case 'l':
25238 10 : if (con[1] == 0)
25239 : mode = CCGCmode, code = LT;
25240 5 : else if (con[1] == 'e' && con[2] == 0)
25241 : mode = CCGCmode, code = LE;
25242 : break;
25243 4 : case 'o':
25244 4 : if (con[1] == 0)
25245 : mode = CCOmode, code = EQ;
25246 : break;
25247 4 : case 'p':
25248 4 : if (con[1] == 0)
25249 : mode = CCPmode, code = EQ;
25250 : break;
25251 4 : case 's':
25252 4 : if (con[1] == 0)
25253 : mode = CCSmode, code = EQ;
25254 : break;
25255 6 : case 'z':
25256 6 : if (con[1] == 0)
25257 : mode = CCZmode, code = EQ;
25258 : break;
25259 : }
25260 1 : if (code == UNKNOWN)
25261 : {
25262 1 : error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
25263 1 : continue;
25264 : }
25265 86 : if (invert)
25266 19 : code = reverse_condition (code);
25267 :
25268 86 : rtx dest = outputs[i];
25269 86 : if (!saw_asm_flag)
25270 : {
25271 : /* This is the first asm flag output. Here we put the flags
25272 : register in as the real output and adjust the condition to
25273 : allow it. */
25274 75 : constraints[i] = "=Bf";
25275 75 : outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
25276 75 : saw_asm_flag = true;
25277 : }
25278 : else
25279 : {
25280 : /* We don't need the flags register as output twice. */
25281 11 : constraints[i] = "=X";
25282 11 : outputs[i] = gen_rtx_SCRATCH (SImode);
25283 : }
25284 :
25285 86 : rtx x = gen_rtx_REG (mode, FLAGS_REG);
25286 86 : x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
25287 :
25288 86 : machine_mode dest_mode = GET_MODE (dest);
25289 86 : if (!SCALAR_INT_MODE_P (dest_mode))
25290 : {
25291 3 : error_at (loc, "invalid type for %<asm%> flag output");
25292 3 : continue;
25293 : }
25294 :
25295 83 : if (dest_mode == QImode)
25296 73 : emit_insn (gen_rtx_SET (dest, x));
25297 : else
25298 : {
25299 10 : rtx reg = gen_reg_rtx (QImode);
25300 10 : emit_insn (gen_rtx_SET (reg, x));
25301 :
25302 10 : reg = convert_to_mode (dest_mode, reg, 1);
25303 10 : emit_move_insn (dest, reg);
25304 : }
25305 : }
25306 :
25307 108263 : rtx_insn *seq = end_sequence ();
25308 :
25309 108263 : if (saw_asm_flag)
25310 : return seq;
25311 : else
25312 : {
25313 : /* If we had no asm flag outputs, clobber the flags. */
25314 108188 : clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
25315 108188 : SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
25316 108188 : return NULL;
25317 : }
25318 : }
25319 :
25320 : /* Implements target vector targetm.asm.encode_section_info. */
25321 :
25322 : static void ATTRIBUTE_UNUSED
25323 9909570 : ix86_encode_section_info (tree decl, rtx rtl, int first)
25324 : {
25325 9909570 : default_encode_section_info (decl, rtl, first);
25326 :
25327 9909570 : if (ix86_in_large_data_p (decl))
25328 32 : SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25329 9909570 : }
25330 :
25331 : /* Worker function for REVERSE_CONDITION. */
25332 :
25333 : enum rtx_code
25334 31754999 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
25335 : {
25336 31754999 : return (mode == CCFPmode
25337 31754999 : ? reverse_condition_maybe_unordered (code)
25338 27412115 : : reverse_condition (code));
25339 : }
25340 :
25341 : /* Output code to perform an x87 FP register move, from OPERANDS[1]
25342 : to OPERANDS[0]. */
25343 :
25344 : const char *
25345 651077 : output_387_reg_move (rtx_insn *insn, rtx *operands)
25346 : {
25347 651077 : if (REG_P (operands[0]))
25348 : {
25349 544119 : if (REG_P (operands[1])
25350 544119 : && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25351 : {
25352 296547 : if (REGNO (operands[0]) == FIRST_STACK_REG)
25353 275937 : return output_387_ffreep (operands, 0);
25354 : return "fstp\t%y0";
25355 : }
25356 247572 : if (STACK_TOP_P (operands[0]))
25357 247572 : return "fld%Z1\t%y1";
25358 : return "fst\t%y0";
25359 : }
25360 106958 : else if (MEM_P (operands[0]))
25361 : {
25362 106958 : gcc_assert (REG_P (operands[1]));
25363 106958 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25364 : return "fstp%Z0\t%y0";
25365 : else
25366 : {
25367 : /* There is no non-popping store to memory for XFmode.
25368 : So if we need one, follow the store with a load. */
25369 8390 : if (GET_MODE (operands[0]) == XFmode)
25370 : return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
25371 : else
25372 1882 : return "fst%Z0\t%y0";
25373 : }
25374 : }
25375 : else
25376 0 : gcc_unreachable();
25377 : }
25378 : #ifdef TARGET_SOLARIS
25379 : /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25380 :
25381 : static void
25382 : i386_solaris_elf_named_section (const char *name, unsigned int flags,
25383 : tree decl)
25384 : {
25385 : /* With Binutils 2.15, the "@unwind" marker must be specified on
25386 : every occurrence of the ".eh_frame" section, not just the first
25387 : one. */
25388 : if (TARGET_64BIT
25389 : && strcmp (name, ".eh_frame") == 0)
25390 : {
25391 : fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25392 : flags & SECTION_WRITE ? "aw" : "a");
25393 : return;
25394 : }
25395 :
25396 : #if HAVE_SOLARIS_AS
25397 : if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
25398 : {
25399 : solaris_elf_asm_comdat_section (name, flags, decl);
25400 : return;
25401 : }
25402 :
25403 : /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
25404 : SPARC assembler. One cannot mix single-letter flags and #exclude, so
25405 : only emit the latter here. */
25406 : if (flags & SECTION_EXCLUDE)
25407 : {
25408 : fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
25409 : return;
25410 : }
25411 : #endif
25412 :
25413 : default_elf_asm_named_section (name, flags, decl);
25414 : }
25415 : #endif /* TARGET_SOLARIS */
25416 :
25417 : /* Return the mangling of TYPE if it is an extended fundamental type. */
25418 :
25419 : static const char *
25420 1044701921 : ix86_mangle_type (const_tree type)
25421 : {
25422 1044701921 : type = TYPE_MAIN_VARIANT (type);
25423 :
25424 1044701921 : if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25425 : && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25426 : return NULL;
25427 :
25428 564811083 : if (type == float128_type_node || type == float64x_type_node)
25429 : return NULL;
25430 :
25431 564127518 : switch (TYPE_MODE (type))
25432 : {
25433 : case E_BFmode:
25434 : return "DF16b";
25435 295241 : case E_HFmode:
25436 : /* _Float16 is "DF16_".
25437 : Align with clang's decision in https://reviews.llvm.org/D33719. */
25438 295241 : return "DF16_";
25439 637962 : case E_TFmode:
25440 : /* __float128 is "g". */
25441 637962 : return "g";
25442 7848200 : case E_XFmode:
25443 : /* "long double" or __float80 is "e". */
25444 7848200 : return "e";
25445 : default:
25446 : return NULL;
25447 : }
25448 : }
25449 :
25450 : /* Create C++ tinfo symbols for only conditionally available fundamental
25451 : types. */
25452 :
25453 : static void
25454 5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
25455 : {
25456 5 : extern tree ix86_float16_type_node;
25457 5 : extern tree ix86_bf16_type_node;
25458 :
25459 5 : if (!TARGET_SSE2)
25460 : {
25461 0 : if (!float16_type_node)
25462 0 : float16_type_node = ix86_float16_type_node;
25463 0 : if (!bfloat16_type_node)
25464 0 : bfloat16_type_node = ix86_bf16_type_node;
25465 0 : callback (float16_type_node);
25466 0 : callback (bfloat16_type_node);
25467 0 : float16_type_node = NULL_TREE;
25468 0 : bfloat16_type_node = NULL_TREE;
25469 : }
25470 5 : }
25471 :
25472 : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
25473 :
25474 : static tree
25475 253 : ix86_stack_protect_guard (void)
25476 : {
25477 253 : if (TARGET_SSP_TLS_GUARD)
25478 : {
25479 250 : tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
25480 250 : int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
25481 250 : tree type = build_qualified_type (type_node, qual);
25482 250 : tree t;
25483 :
25484 250 : if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25485 : {
25486 1 : t = ix86_tls_stack_chk_guard_decl;
25487 :
25488 1 : if (t == NULL)
25489 : {
25490 1 : rtx x;
25491 :
25492 1 : t = build_decl
25493 1 : (UNKNOWN_LOCATION, VAR_DECL,
25494 : get_identifier (ix86_stack_protector_guard_symbol_str),
25495 : type);
25496 1 : TREE_STATIC (t) = 1;
25497 1 : TREE_PUBLIC (t) = 1;
25498 1 : DECL_EXTERNAL (t) = 1;
25499 1 : TREE_USED (t) = 1;
25500 1 : TREE_THIS_VOLATILE (t) = 1;
25501 1 : DECL_ARTIFICIAL (t) = 1;
25502 1 : DECL_IGNORED_P (t) = 1;
25503 :
25504 : /* Do not share RTL as the declaration is visible outside of
25505 : current function. */
25506 1 : x = DECL_RTL (t);
25507 1 : RTX_FLAG (x, used) = 1;
25508 :
25509 1 : ix86_tls_stack_chk_guard_decl = t;
25510 : }
25511 : }
25512 : else
25513 : {
25514 249 : tree asptrtype = build_pointer_type (type);
25515 :
25516 249 : t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25517 249 : t = build2 (MEM_REF, asptrtype, t,
25518 : build_int_cst (asptrtype, 0));
25519 249 : TREE_THIS_VOLATILE (t) = 1;
25520 : }
25521 :
25522 250 : return t;
25523 : }
25524 :
25525 3 : return default_stack_protect_guard ();
25526 : }
25527 :
25528 : static bool
25529 795 : ix86_stack_protect_runtime_enabled_p (void)
25530 : {
25531 : /* Naked functions should not enable stack protector. */
25532 795 : return !ix86_function_naked (current_function_decl);
25533 : }
25534 :
25535 : /* For 32-bit code we can save PIC register setup by using
25536 : __stack_chk_fail_local hidden function instead of calling
25537 : __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25538 : register, so it is better to call __stack_chk_fail directly. */
25539 :
25540 : static tree ATTRIBUTE_UNUSED
25541 286 : ix86_stack_protect_fail (void)
25542 : {
25543 286 : return TARGET_64BIT
25544 286 : ? default_external_stack_protect_fail ()
25545 1 : : default_hidden_stack_protect_fail ();
25546 : }
25547 :
25548 : /* Select a format to encode pointers in exception handling data. CODE
25549 : is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25550 : true if the symbol may be affected by dynamic relocations.
25551 :
25552 : ??? All x86 object file formats are capable of representing this.
25553 : After all, the relocation needed is the same as for the call insn.
25554 : Whether or not a particular assembler allows us to enter such, I
25555 : guess we'll have to see. */
25556 :
25557 : int
25558 787892 : asm_preferred_eh_data_format (int code, int global)
25559 : {
25560 : /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25561 787892 : if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25562 : {
25563 39070 : int type = DW_EH_PE_sdata8;
25564 39070 : if (ptr_mode == SImode
25565 25098 : || ix86_cmodel == CM_SMALL_PIC
25566 39156 : || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25567 : type = DW_EH_PE_sdata4;
25568 54682 : return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25569 : }
25570 :
25571 748822 : if (ix86_cmodel == CM_SMALL
25572 18678 : || (ix86_cmodel == CM_MEDIUM && code))
25573 730157 : return DW_EH_PE_udata4;
25574 :
25575 : return DW_EH_PE_absptr;
25576 : }
25577 :
25578 : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
25579 : from ix86_vector_costs::add_stmt_cost. */
25580 : static int
25581 14902911 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
25582 : machine_mode mode)
25583 : {
25584 14902911 : bool fp = FLOAT_MODE_P (mode);
25585 14902911 : int index;
25586 14902911 : switch (type_of_cost)
25587 : {
25588 2196910 : case scalar_stmt:
25589 2196910 : return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25590 :
25591 2044364 : case scalar_load:
25592 : /* load/store costs are relative to register move which is 2. Recompute
25593 : it to COSTS_N_INSNS so everything have same base. */
25594 4088728 : return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25595 2044364 : : ix86_cost->int_load [2]) / 2;
25596 :
25597 3876294 : case scalar_store:
25598 7752588 : return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25599 3876294 : : ix86_cost->int_store [2]) / 2;
25600 :
25601 965965 : case vector_stmt:
25602 1931930 : return ix86_vec_cost (mode,
25603 1931930 : fp ? ix86_cost->addss : ix86_cost->sse_op);
25604 :
25605 1721765 : case vector_load:
25606 1721765 : index = sse_store_index (mode);
25607 : /* See PR82713 - we may end up being called on non-vector type. */
25608 1721765 : if (index < 0)
25609 98843 : index = 2;
25610 1721765 : return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25611 :
25612 869328 : case vector_store:
25613 869328 : index = sse_store_index (mode);
25614 : /* See PR82713 - we may end up being called on non-vector type. */
25615 869328 : if (index < 0)
25616 90773 : index = 2;
25617 869328 : return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25618 :
25619 751363 : case vec_to_scalar:
25620 751363 : case scalar_to_vec:
25621 751363 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25622 :
25623 : /* We should have separate costs for unaligned loads and gather/scatter.
25624 : Do that incrementally. */
25625 394345 : case unaligned_load:
25626 394345 : index = sse_store_index (mode);
25627 : /* See PR82713 - we may end up being called on non-vector type. */
25628 394345 : if (index < 0)
25629 2728 : index = 2;
25630 394345 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25631 :
25632 788511 : case unaligned_store:
25633 788511 : index = sse_store_index (mode);
25634 : /* See PR82713 - we may end up being called on non-vector type. */
25635 788511 : if (index < 0)
25636 17010 : index = 2;
25637 788511 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25638 :
25639 0 : case vector_gather_load:
25640 0 : return ix86_vec_cost (mode,
25641 0 : COSTS_N_INSNS
25642 : (ix86_cost->gather_static
25643 : + ix86_cost->gather_per_elt
25644 0 : * GET_MODE_NUNITS (mode)) / 2);
25645 :
25646 0 : case vector_scatter_store:
25647 0 : return ix86_vec_cost (mode,
25648 0 : COSTS_N_INSNS
25649 : (ix86_cost->scatter_static
25650 : + ix86_cost->scatter_per_elt
25651 0 : * GET_MODE_NUNITS (mode)) / 2);
25652 :
25653 274998 : case cond_branch_taken:
25654 274998 : return ix86_cost->cond_taken_branch_cost;
25655 :
25656 5504 : case cond_branch_not_taken:
25657 5504 : return ix86_cost->cond_not_taken_branch_cost;
25658 :
25659 245723 : case vec_perm:
25660 245723 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25661 :
25662 69653 : case vec_promote_demote:
25663 69653 : if (fp)
25664 7927 : return vec_fp_conversion_cost (ix86_tune_cost, mode);
25665 61726 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25666 :
25667 698188 : case vec_construct:
25668 698188 : {
25669 698188 : int n = GET_MODE_NUNITS (mode);
25670 : /* N - 1 element inserts into an SSE vector, the possible
25671 : GPR -> XMM move is accounted for in add_stmt_cost. */
25672 1396376 : if (GET_MODE_BITSIZE (mode) <= 128)
25673 691819 : return (n - 1) * ix86_cost->sse_op;
25674 : /* One vinserti128 for combining two SSE vectors for AVX256. */
25675 12738 : else if (GET_MODE_BITSIZE (mode) == 256)
25676 5097 : return ((n - 2) * ix86_cost->sse_op
25677 5097 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25678 : /* One vinserti64x4 and two vinserti128 for combining SSE
25679 : and AVX256 vectors to AVX512. */
25680 2544 : else if (GET_MODE_BITSIZE (mode) == 512)
25681 : {
25682 1272 : machine_mode half_mode
25683 1272 : = mode_for_vector (GET_MODE_INNER (mode),
25684 2544 : GET_MODE_NUNITS (mode) / 2).require ();
25685 1272 : return ((n - 4) * ix86_cost->sse_op
25686 1272 : + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
25687 1272 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25688 : }
25689 0 : gcc_unreachable ();
25690 : }
25691 :
25692 0 : default:
25693 0 : gcc_unreachable ();
25694 : }
25695 : }
25696 :
25697 : /* Implement targetm.vectorize.builtin_vectorization_cost. */
25698 : static int
25699 9295332 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25700 : tree vectype, int)
25701 : {
25702 9295332 : machine_mode mode = TImode;
25703 9295332 : if (vectype != NULL)
25704 7671208 : mode = TYPE_MODE (vectype);
25705 9295332 : return ix86_default_vector_cost (type_of_cost, mode);
25706 : }
25707 :
25708 :
25709 : /* This function returns the calling abi specific va_list type node.
25710 : It returns the FNDECL specific va_list type. */
25711 :
25712 : static tree
25713 47586 : ix86_fn_abi_va_list (tree fndecl)
25714 : {
25715 47586 : if (!TARGET_64BIT)
25716 726 : return va_list_type_node;
25717 46860 : gcc_assert (fndecl != NULL_TREE);
25718 :
25719 46860 : if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
25720 12868 : return ms_va_list_type_node;
25721 : else
25722 33992 : return sysv_va_list_type_node;
25723 : }
25724 :
25725 : /* Returns the canonical va_list type specified by TYPE. If there
25726 : is no valid TYPE provided, it return NULL_TREE. */
25727 :
25728 : static tree
25729 246492 : ix86_canonical_va_list_type (tree type)
25730 : {
25731 246492 : if (TARGET_64BIT)
25732 : {
25733 245990 : if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
25734 5944 : return ms_va_list_type_node;
25735 :
25736 240046 : if ((TREE_CODE (type) == ARRAY_TYPE
25737 49923 : && integer_zerop (array_type_nelts_minus_one (type)))
25738 240046 : || POINTER_TYPE_P (type))
25739 : {
25740 188201 : tree elem_type = TREE_TYPE (type);
25741 188201 : if (TREE_CODE (elem_type) == RECORD_TYPE
25742 339588 : && lookup_attribute ("sysv_abi va_list",
25743 151387 : TYPE_ATTRIBUTES (elem_type)))
25744 151387 : return sysv_va_list_type_node;
25745 : }
25746 :
25747 88659 : return NULL_TREE;
25748 : }
25749 :
25750 502 : return std_canonical_va_list_type (type);
25751 : }
25752 :
25753 : /* Iterate through the target-specific builtin types for va_list.
25754 : IDX denotes the iterator, *PTREE is set to the result type of
25755 : the va_list builtin, and *PNAME to its internal type.
25756 : Returns zero if there is no element for this index, otherwise
25757 : IDX should be increased upon the next call.
25758 : Note, do not iterate a base builtin's name like __builtin_va_list.
25759 : Used from c_common_nodes_and_builtins. */
25760 :
25761 : static int
25762 618685 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25763 : {
25764 618685 : if (TARGET_64BIT)
25765 : {
25766 613305 : switch (idx)
25767 : {
25768 : default:
25769 : break;
25770 :
25771 204435 : case 0:
25772 204435 : *ptree = ms_va_list_type_node;
25773 204435 : *pname = "__builtin_ms_va_list";
25774 204435 : return 1;
25775 :
25776 204435 : case 1:
25777 204435 : *ptree = sysv_va_list_type_node;
25778 204435 : *pname = "__builtin_sysv_va_list";
25779 204435 : return 1;
25780 : }
25781 : }
25782 :
25783 : return 0;
25784 : }
25785 :
25786 : #undef TARGET_SCHED_DISPATCH
25787 : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25788 : #undef TARGET_SCHED_DISPATCH_DO
25789 : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25790 : #undef TARGET_SCHED_REASSOCIATION_WIDTH
25791 : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25792 : #undef TARGET_SCHED_REORDER
25793 : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25794 : #undef TARGET_SCHED_ADJUST_PRIORITY
25795 : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25796 : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25797 : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25798 : ix86_dependencies_evaluation_hook
25799 :
25800 :
25801 : /* Implementation of reassociation_width target hook used by
25802 : reassoc phase to identify parallelism level in reassociated
25803 : tree. Statements tree_code is passed in OPC. Arguments type
25804 : is passed in MODE. */
25805 :
25806 : static int
25807 28383 : ix86_reassociation_width (unsigned int op, machine_mode mode)
25808 : {
25809 28383 : int width = 1;
25810 : /* Vector part. */
25811 28383 : if (VECTOR_MODE_P (mode))
25812 : {
25813 8443 : int div = 1;
25814 8443 : if (INTEGRAL_MODE_P (mode))
25815 2672 : width = ix86_cost->reassoc_vec_int;
25816 5771 : else if (FLOAT_MODE_P (mode))
25817 5771 : width = ix86_cost->reassoc_vec_fp;
25818 :
25819 8443 : if (width == 1)
25820 : return 1;
25821 :
25822 : /* Znver1-4 Integer vector instructions execute in FP unit
25823 : and can execute 3 additions and one multiplication per cycle. */
25824 8438 : if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25825 8438 : || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
25826 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25827 : return 1;
25828 : /* Znver5 can do 2 integer multiplications per cycle with latency
25829 : of 3. */
25830 8438 : if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
25831 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25832 8438 : width = 6;
25833 :
25834 : /* Account for targets that splits wide vectors into multiple parts. */
25835 8438 : if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25836 0 : div = GET_MODE_BITSIZE (mode) / 256;
25837 8438 : else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25838 0 : div = GET_MODE_BITSIZE (mode) / 128;
25839 8438 : else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25840 0 : div = GET_MODE_BITSIZE (mode) / 64;
25841 8438 : width = (width + div - 1) / div;
25842 8438 : }
25843 : /* Scalar part. */
25844 : else if (INTEGRAL_MODE_P (mode))
25845 13972 : width = ix86_cost->reassoc_int;
25846 : else if (FLOAT_MODE_P (mode))
25847 5968 : width = ix86_cost->reassoc_fp;
25848 :
25849 : /* Avoid using too many registers in 32bit mode. */
25850 28378 : if (!TARGET_64BIT && width > 2)
25851 28383 : width = 2;
25852 : return width;
25853 : }
25854 :
25855 : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
25856 : place emms and femms instructions. */
25857 :
25858 : static machine_mode
25859 5241808 : ix86_preferred_simd_mode (scalar_mode mode)
25860 : {
25861 5241808 : if (!TARGET_SSE)
25862 862 : return word_mode;
25863 :
25864 5240946 : switch (mode)
25865 : {
25866 429171 : case E_QImode:
25867 429171 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25868 : return V64QImode;
25869 418784 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25870 : return V32QImode;
25871 : else
25872 396951 : return V16QImode;
25873 :
25874 199751 : case E_HImode:
25875 199751 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25876 : return V32HImode;
25877 189143 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25878 : return V16HImode;
25879 : else
25880 172655 : return V8HImode;
25881 :
25882 1558481 : case E_SImode:
25883 1558481 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25884 : return V16SImode;
25885 1491244 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25886 : return V8SImode;
25887 : else
25888 1334668 : return V4SImode;
25889 :
25890 1890139 : case E_DImode:
25891 1890139 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25892 : return V8DImode;
25893 1485713 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25894 : return V4DImode;
25895 : else
25896 1423276 : return V2DImode;
25897 :
25898 142600 : case E_HFmode:
25899 142600 : if (TARGET_AVX512FP16)
25900 : {
25901 141854 : if (TARGET_AVX512VL)
25902 : {
25903 68835 : if (TARGET_PREFER_AVX128)
25904 : return V8HFmode;
25905 68605 : else if (TARGET_PREFER_AVX256)
25906 : return V16HFmode;
25907 : }
25908 139419 : return V32HFmode;
25909 : }
25910 746 : return word_mode;
25911 :
25912 63115 : case E_BFmode:
25913 63115 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25914 : return V32BFmode;
25915 26590 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25916 : return V16BFmode;
25917 : else
25918 13523 : return V8BFmode;
25919 :
25920 623997 : case E_SFmode:
25921 623997 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25922 : return V16SFmode;
25923 423377 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25924 : return V8SFmode;
25925 : else
25926 355211 : return V4SFmode;
25927 :
25928 298250 : case E_DFmode:
25929 298250 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25930 : return V8DFmode;
25931 176055 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25932 : return V4DFmode;
25933 120166 : else if (TARGET_SSE2)
25934 : return V2DFmode;
25935 : /* FALLTHRU */
25936 :
25937 35498 : default:
25938 35498 : return word_mode;
25939 : }
25940 : }
25941 :
25942 : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
25943 : vectors. If AVX512F is enabled then try vectorizing with 512bit,
25944 : 256bit and 128bit vectors. */
25945 :
25946 : static unsigned int
25947 2198645 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25948 : {
25949 2198645 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25950 : {
25951 74836 : modes->safe_push (V64QImode);
25952 74836 : modes->safe_push (V32QImode);
25953 74836 : modes->safe_push (V16QImode);
25954 : }
25955 2123809 : else if (TARGET_AVX512F && all)
25956 : {
25957 558 : modes->safe_push (V32QImode);
25958 558 : modes->safe_push (V16QImode);
25959 558 : modes->safe_push (V64QImode);
25960 : }
25961 2123251 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25962 : {
25963 28741 : modes->safe_push (V32QImode);
25964 28741 : modes->safe_push (V16QImode);
25965 : }
25966 2094510 : else if (TARGET_AVX && all)
25967 : {
25968 24 : modes->safe_push (V16QImode);
25969 24 : modes->safe_push (V32QImode);
25970 : }
25971 2094486 : else if (TARGET_SSE2)
25972 2092221 : modes->safe_push (V16QImode);
25973 :
25974 2198645 : if (TARGET_MMX_WITH_SSE)
25975 1802679 : modes->safe_push (V8QImode);
25976 :
25977 2198645 : if (TARGET_SSE2)
25978 2196380 : modes->safe_push (V4QImode);
25979 :
25980 2198645 : return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
25981 : }
25982 :
25983 : /* Implemenation of targetm.vectorize.get_mask_mode. */
25984 :
25985 : static opt_machine_mode
25986 3057281 : ix86_get_mask_mode (machine_mode data_mode)
25987 : {
25988 3057281 : unsigned vector_size = GET_MODE_SIZE (data_mode);
25989 3057281 : unsigned nunits = GET_MODE_NUNITS (data_mode);
25990 3057281 : unsigned elem_size = vector_size / nunits;
25991 :
25992 : /* Scalar mask case. */
25993 317519 : if ((TARGET_AVX512F && vector_size == 64)
25994 2942376 : || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
25995 : /* AVX512FP16 only supports vector comparison
25996 : to kmask for _Float16. */
25997 2828542 : || (TARGET_AVX512VL && TARGET_AVX512FP16
25998 3213 : && GET_MODE_INNER (data_mode) == E_HFmode)
25999 5887429 : || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
26000 : {
26001 229364 : if (elem_size == 4
26002 229364 : || elem_size == 8
26003 103265 : || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
26004 201753 : return smallest_int_mode_for_size (nunits).require ();
26005 : }
26006 :
26007 2855528 : scalar_int_mode elem_mode
26008 2855528 : = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
26009 :
26010 2855528 : gcc_assert (elem_size * nunits == vector_size);
26011 :
26012 2855528 : return mode_for_vector (elem_mode, nunits);
26013 : }
26014 :
26015 :
26016 :
26017 : /* Return class of registers which could be used for pseudo of MODE
26018 : and of class RCLASS for spilling instead of memory. Return NO_REGS
26019 : if it is not possible or non-profitable. */
26020 :
26021 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26022 :
26023 : static reg_class_t
26024 6240600449 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
26025 : {
26026 6240600449 : if (0 && TARGET_GENERAL_REGS_SSE_SPILL
26027 : && TARGET_SSE2
26028 : && TARGET_INTER_UNIT_MOVES_TO_VEC
26029 : && TARGET_INTER_UNIT_MOVES_FROM_VEC
26030 : && (mode == SImode || (TARGET_64BIT && mode == DImode))
26031 : && INTEGER_CLASS_P (rclass))
26032 : return ALL_SSE_REGS;
26033 6240600449 : return NO_REGS;
26034 : }
26035 :
26036 : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
26037 : but returns a lower bound. */
26038 :
26039 : static unsigned int
26040 1867170 : ix86_max_noce_ifcvt_seq_cost (edge e)
26041 : {
26042 1867170 : bool predictable_p = predictable_edge_p (e);
26043 1867170 : if (predictable_p)
26044 : {
26045 145354 : if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
26046 8 : return param_max_rtl_if_conversion_predictable_cost;
26047 : }
26048 : else
26049 : {
26050 1721816 : if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
26051 73 : return param_max_rtl_if_conversion_unpredictable_cost;
26052 : }
26053 :
26054 : /* For modern machines with deeper pipeline, the penalty for branch
26055 : misprediction could be higher than before to reset the pipeline
26056 : slots. Add parameter br_mispredict_scale as a factor to describe
26057 : the impact of reseting the pipeline. */
26058 :
26059 1867089 : return BRANCH_COST (true, predictable_p)
26060 1867089 : * ix86_tune_cost->br_mispredict_scale;
26061 : }
26062 :
26063 : /* Return true if SEQ is a good candidate as a replacement for the
26064 : if-convertible sequence described in IF_INFO. */
26065 :
26066 : static bool
26067 196866 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
26068 : {
26069 196866 : if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
26070 : {
26071 : int cmov_cnt = 0;
26072 : /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
26073 : Maybe we should allow even more conditional moves as long as they
26074 : are used far enough not to stall the CPU, or also consider
26075 : IF_INFO->TEST_BB succ edge probabilities. */
26076 247 : for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
26077 : {
26078 205 : rtx set = single_set (insn);
26079 205 : if (!set)
26080 0 : continue;
26081 205 : if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
26082 163 : continue;
26083 42 : rtx src = SET_SRC (set);
26084 42 : machine_mode mode = GET_MODE (src);
26085 42 : if (GET_MODE_CLASS (mode) != MODE_INT
26086 0 : && GET_MODE_CLASS (mode) != MODE_FLOAT)
26087 0 : continue;
26088 42 : if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
26089 41 : || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
26090 1 : continue;
26091 : /* insn is CMOV or FCMOV. */
26092 41 : if (++cmov_cnt > 1)
26093 : return false;
26094 : }
26095 : }
26096 :
26097 : /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
26098 : for movdfcc/movsfcc, and could possibly fail cost comparison.
26099 : Increase branch cost will hurt performance for other modes, so
26100 : specially add some preference for floating point ifcvt. */
26101 196858 : if (!TARGET_SSE4_1 && if_info->x
26102 152846 : && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
26103 34090 : && if_info->speed_p)
26104 : {
26105 27048 : unsigned cost = seq_cost (seq, true);
26106 :
26107 27048 : if (cost <= if_info->original_cost)
26108 : return true;
26109 :
26110 25856 : return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
26111 : }
26112 :
26113 169810 : return default_noce_conversion_profitable_p (seq, if_info);
26114 : }
26115 :
26116 : /* x86-specific vector costs. */
26117 : class ix86_vector_costs : public vector_costs
26118 : {
26119 : public:
26120 : ix86_vector_costs (vec_info *, bool);
26121 :
26122 : unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
26123 : stmt_vec_info stmt_info, slp_tree node,
26124 : tree vectype, int misalign,
26125 : vect_cost_model_location where) override;
26126 : void finish_cost (const vector_costs *) override;
26127 :
26128 : private:
26129 :
26130 : /* Estimate register pressure of the vectorized code. */
26131 : void ix86_vect_estimate_reg_pressure ();
26132 : /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
26133 : estimation of register pressure.
26134 : ??? Currently it's only used by vec_construct/scalar_to_vec
26135 : where we know it's not loaded from memory. */
26136 : unsigned m_num_gpr_needed[3];
26137 : unsigned m_num_sse_needed[3];
26138 : /* Number of 256-bit vector permutation. */
26139 : unsigned m_num_avx256_vec_perm[3];
26140 : /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
26141 : unsigned m_num_reduc[X86_REDUC_LAST];
26142 : /* Don't do unroll if m_prefer_unroll is false, default is true. */
26143 : bool m_prefer_unroll;
26144 : };
26145 :
26146 1968094 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
26147 : : vector_costs (vinfo, costing_for_scalar),
26148 1968094 : m_num_gpr_needed (),
26149 1968094 : m_num_sse_needed (),
26150 1968094 : m_num_avx256_vec_perm (),
26151 1968094 : m_num_reduc (),
26152 1968094 : m_prefer_unroll (true)
26153 1968094 : {}
26154 :
26155 : /* Implement targetm.vectorize.create_costs. */
26156 :
26157 : static vector_costs *
26158 1968094 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
26159 : {
26160 1968094 : return new ix86_vector_costs (vinfo, costing_for_scalar);
26161 : }
26162 :
26163 : unsigned
26164 6670767 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
26165 : stmt_vec_info stmt_info, slp_tree node,
26166 : tree vectype, int,
26167 : vect_cost_model_location where)
26168 : {
26169 6670767 : unsigned retval = 0;
26170 6670767 : bool scalar_p
26171 : = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
26172 6670767 : int stmt_cost = - 1;
26173 :
26174 6670767 : bool fp = false;
26175 6670767 : machine_mode mode = scalar_p ? SImode : TImode;
26176 :
26177 6670767 : if (vectype != NULL)
26178 : {
26179 2977211 : fp = FLOAT_TYPE_P (vectype);
26180 2977211 : mode = TYPE_MODE (vectype);
26181 2977211 : if (scalar_p)
26182 238042 : mode = TYPE_MODE (TREE_TYPE (vectype));
26183 : }
26184 : /* When we are costing a scalar stmt use the scalar stmt to get at the
26185 : type of the operation. */
26186 3693556 : else if (scalar_p && stmt_info)
26187 3633093 : if (tree lhs = gimple_get_lhs (stmt_info->stmt))
26188 : {
26189 3461676 : fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
26190 3461676 : mode = TYPE_MODE (TREE_TYPE (lhs));
26191 : }
26192 :
26193 6670767 : if ((kind == vector_stmt || kind == scalar_stmt)
26194 1612474 : && stmt_info
26195 8277275 : && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
26196 : {
26197 1248633 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26198 : /*machine_mode inner_mode = mode;
26199 : if (VECTOR_MODE_P (mode))
26200 : inner_mode = GET_MODE_INNER (mode);*/
26201 :
26202 1248633 : switch (subcode)
26203 : {
26204 500004 : case PLUS_EXPR:
26205 500004 : case POINTER_PLUS_EXPR:
26206 500004 : case MINUS_EXPR:
26207 500004 : if (kind == scalar_stmt)
26208 : {
26209 324427 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26210 69143 : stmt_cost = ix86_cost->addss;
26211 255284 : else if (X87_FLOAT_MODE_P (mode))
26212 128 : stmt_cost = ix86_cost->fadd;
26213 : else
26214 255156 : stmt_cost = ix86_cost->add;
26215 : }
26216 : else
26217 175577 : stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
26218 : : ix86_cost->sse_op);
26219 : break;
26220 :
26221 178764 : case MULT_EXPR:
26222 : /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
26223 : take it as MULT_EXPR. */
26224 178764 : case MULT_HIGHPART_EXPR:
26225 178764 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26226 178764 : break;
26227 : /* There's no direct instruction for WIDEN_MULT_EXPR,
26228 : take emulation into account. */
26229 1018 : case WIDEN_MULT_EXPR:
26230 2036 : stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
26231 1018 : TYPE_UNSIGNED (vectype));
26232 1018 : break;
26233 :
26234 6214 : case NEGATE_EXPR:
26235 6214 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26236 1700 : stmt_cost = ix86_cost->sse_op;
26237 4514 : else if (X87_FLOAT_MODE_P (mode))
26238 0 : stmt_cost = ix86_cost->fchs;
26239 4514 : else if (VECTOR_MODE_P (mode))
26240 1888 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26241 : else
26242 2626 : stmt_cost = ix86_cost->add;
26243 : break;
26244 12393 : case TRUNC_DIV_EXPR:
26245 12393 : case CEIL_DIV_EXPR:
26246 12393 : case FLOOR_DIV_EXPR:
26247 12393 : case ROUND_DIV_EXPR:
26248 12393 : case TRUNC_MOD_EXPR:
26249 12393 : case CEIL_MOD_EXPR:
26250 12393 : case FLOOR_MOD_EXPR:
26251 12393 : case RDIV_EXPR:
26252 12393 : case ROUND_MOD_EXPR:
26253 12393 : case EXACT_DIV_EXPR:
26254 12393 : stmt_cost = ix86_division_cost (ix86_cost, mode);
26255 12393 : break;
26256 :
26257 55888 : case RSHIFT_EXPR:
26258 55888 : case LSHIFT_EXPR:
26259 55888 : case LROTATE_EXPR:
26260 55888 : case RROTATE_EXPR:
26261 55888 : {
26262 55888 : tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
26263 55888 : tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
26264 55888 : stmt_cost = ix86_shift_rotate_cost
26265 55888 : (ix86_cost,
26266 : (subcode == RSHIFT_EXPR
26267 32521 : && !TYPE_UNSIGNED (TREE_TYPE (op1)))
26268 : ? ASHIFTRT : LSHIFTRT, mode,
26269 55888 : TREE_CODE (op2) == INTEGER_CST,
26270 55888 : cst_and_fits_in_hwi (op2)
26271 33261 : ? int_cst_value (op2) : -1,
26272 : false, false, NULL, NULL);
26273 : }
26274 55888 : break;
26275 83149 : case NOP_EXPR:
26276 : /* Only sign-conversions are free. */
26277 83149 : if (tree_nop_conversion_p
26278 83149 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
26279 83149 : TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
26280 : stmt_cost = 0;
26281 83149 : else if (fp)
26282 6894 : stmt_cost = vec_fp_conversion_cost
26283 6894 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26284 : break;
26285 :
26286 13442 : case FLOAT_EXPR:
26287 13442 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26288 10344 : stmt_cost = ix86_cost->cvtsi2ss;
26289 3098 : else if (X87_FLOAT_MODE_P (mode))
26290 : /* TODO: We do not have cost tables for x87. */
26291 50 : stmt_cost = ix86_cost->fadd;
26292 : else
26293 3048 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26294 : break;
26295 :
26296 1706 : case FIX_TRUNC_EXPR:
26297 1706 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26298 0 : stmt_cost = ix86_cost->cvtss2si;
26299 1706 : else if (X87_FLOAT_MODE_P (mode))
26300 : /* TODO: We do not have cost tables for x87. */
26301 0 : stmt_cost = ix86_cost->fadd;
26302 : else
26303 1706 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26304 : break;
26305 :
26306 38539 : case COND_EXPR:
26307 38539 : {
26308 : /* SSE2 conditinal move sequence is:
26309 : pcmpgtd %xmm5, %xmm0 (accounted separately)
26310 : pand %xmm0, %xmm2
26311 : pandn %xmm1, %xmm0
26312 : por %xmm2, %xmm0
26313 : while SSE4 uses cmp + blend
26314 : and AVX512 masked moves.
26315 :
26316 : The condition is accounted separately since we usually have
26317 : p = a < b
26318 : c = p ? x : y
26319 : and we will account first statement as setcc. Exception is when
26320 : p is loaded from memory as bool and then we will not acocunt
26321 : the compare, but there is no way to check for this. */
26322 :
26323 38539 : int ninsns = TARGET_SSE4_1 ? 1 : 3;
26324 :
26325 : /* If one of parameters is 0 or -1 the sequence will be simplified:
26326 : (if_true & mask) | (if_false & ~mask) -> if_true & mask */
26327 19927 : if (ninsns > 1
26328 19927 : && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26329 19601 : || zerop (gimple_assign_rhs3 (stmt_info->stmt))
26330 11531 : || integer_minus_onep
26331 11531 : (gimple_assign_rhs2 (stmt_info->stmt))
26332 11105 : || integer_minus_onep
26333 11105 : (gimple_assign_rhs3 (stmt_info->stmt))))
26334 : ninsns = 1;
26335 :
26336 38539 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26337 2776 : stmt_cost = ninsns * ix86_cost->sse_op;
26338 35763 : else if (X87_FLOAT_MODE_P (mode))
26339 : /* x87 requires conditional branch. We don't have cost for
26340 : that. */
26341 : ;
26342 35754 : else if (VECTOR_MODE_P (mode))
26343 14729 : stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
26344 : else
26345 : /* compare (accounted separately) + cmov. */
26346 21025 : stmt_cost = ix86_cost->add;
26347 : }
26348 : break;
26349 :
26350 22127 : case MIN_EXPR:
26351 22127 : case MAX_EXPR:
26352 22127 : if (fp)
26353 : {
26354 1008 : if (X87_FLOAT_MODE_P (mode)
26355 384 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26356 : /* x87 requires conditional branch. We don't have cost for
26357 : that. */
26358 : ;
26359 : else
26360 : /* minss */
26361 1008 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26362 : }
26363 : else
26364 : {
26365 21119 : if (VECTOR_MODE_P (mode))
26366 : {
26367 4069 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26368 : /* vpmin was introduced in SSE3.
26369 : SSE2 needs pcmpgtd + pand + pandn + pxor.
26370 : If one of parameters is 0 or -1 the sequence is simplified
26371 : to pcmpgtd + pand. */
26372 4069 : if (!TARGET_SSSE3)
26373 : {
26374 3100 : if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26375 4434 : || integer_minus_onep
26376 1334 : (gimple_assign_rhs2 (stmt_info->stmt)))
26377 1766 : stmt_cost *= 2;
26378 : else
26379 1334 : stmt_cost *= 4;
26380 : }
26381 : }
26382 : else
26383 : /* cmp + cmov. */
26384 17050 : stmt_cost = ix86_cost->add * 2;
26385 : }
26386 : break;
26387 :
26388 904 : case ABS_EXPR:
26389 904 : case ABSU_EXPR:
26390 904 : if (fp)
26391 : {
26392 338 : if (X87_FLOAT_MODE_P (mode)
26393 126 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26394 : /* fabs. */
26395 0 : stmt_cost = ix86_cost->fabs;
26396 : else
26397 : /* andss of sign bit. */
26398 338 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26399 : }
26400 : else
26401 : {
26402 566 : if (VECTOR_MODE_P (mode))
26403 : {
26404 99 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26405 : /* vabs was introduced in SSE3.
26406 : SSE3 uses psrat + pxor + psub. */
26407 99 : if (!TARGET_SSSE3)
26408 75 : stmt_cost *= 3;
26409 : }
26410 : else
26411 : /* neg + cmov. */
26412 467 : stmt_cost = ix86_cost->add * 2;
26413 : }
26414 : break;
26415 :
26416 106851 : case BIT_IOR_EXPR:
26417 106851 : case BIT_XOR_EXPR:
26418 106851 : case BIT_AND_EXPR:
26419 106851 : case BIT_NOT_EXPR:
26420 106851 : gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
26421 : && !X87_FLOAT_MODE_P (mode));
26422 106851 : if (VECTOR_MODE_P (mode))
26423 35234 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26424 : else
26425 71617 : stmt_cost = ix86_cost->add;
26426 : break;
26427 :
26428 227634 : default:
26429 227634 : if (truth_value_p (subcode))
26430 : {
26431 73403 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26432 : /* CMPccS? insructions are cheap, so use sse_op. While they
26433 : produce a mask which may need to be turned to 0/1 by and,
26434 : expect that this will be optimized away in a common case. */
26435 0 : stmt_cost = ix86_cost->sse_op;
26436 73403 : else if (X87_FLOAT_MODE_P (mode))
26437 : /* fcmp + setcc. */
26438 0 : stmt_cost = ix86_cost->fadd + ix86_cost->add;
26439 73403 : else if (VECTOR_MODE_P (mode))
26440 14743 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26441 : else
26442 : /* setcc. */
26443 58660 : stmt_cost = ix86_cost->add;
26444 : break;
26445 : }
26446 : break;
26447 : }
26448 : }
26449 :
26450 : /* Record number of load/store/gather/scatter in vectorized body. */
26451 6670767 : if (where == vect_body && !m_costing_for_scalar)
26452 : {
26453 1698989 : int scale = 1;
26454 1698989 : if (vectype
26455 3389394 : && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
26456 59575 : && TARGET_AVX512_SPLIT_REGS)
26457 3380686 : || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26458 92429 : && TARGET_AVX256_SPLIT_REGS)))
26459 : scale = 2;
26460 :
26461 1698989 : switch (kind)
26462 : {
26463 : /* Emulated gather/scatter or any scalarization. */
26464 109037 : case scalar_load:
26465 109037 : case scalar_stmt:
26466 109037 : case scalar_store:
26467 109037 : case vector_gather_load:
26468 109037 : case vector_scatter_store:
26469 109037 : m_prefer_unroll = false;
26470 109037 : break;
26471 :
26472 473332 : case vector_stmt:
26473 473332 : case vec_to_scalar:
26474 : /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
26475 : unroll in the vectorizer will enable partial sum. */
26476 473332 : if (stmt_info
26477 473310 : && vect_is_reduction (stmt_info)
26478 520833 : && stmt_info->stmt)
26479 : {
26480 : /* Handle __builtin_fma. */
26481 47501 : if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
26482 : {
26483 6 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26484 6 : break;
26485 : }
26486 :
26487 47495 : if (!is_gimple_assign (stmt_info->stmt))
26488 : break;
26489 :
26490 45117 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26491 45117 : machine_mode inner_mode = GET_MODE_INNER (mode);
26492 45117 : tree rhs1, rhs2;
26493 45117 : bool native_vnni_p = true;
26494 45117 : gimple* def;
26495 45117 : machine_mode mode_rhs;
26496 45117 : switch (subcode)
26497 : {
26498 35235 : case PLUS_EXPR:
26499 35235 : case MINUS_EXPR:
26500 35235 : if (!fp || !flag_associative_math
26501 15940 : || flag_fp_contract_mode != FP_CONTRACT_FAST)
26502 : break;
26503 :
26504 : /* FMA condition for different modes. */
26505 15940 : if (((inner_mode == DFmode || inner_mode == SFmode)
26506 15928 : && !TARGET_FMA && !TARGET_AVX512VL)
26507 5776 : || (inner_mode == HFmode && !TARGET_AVX512FP16)
26508 5776 : || (inner_mode == BFmode && !TARGET_AVX10_2))
26509 : break;
26510 :
26511 : /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
26512 : to FMA/FNMA after vectorization. */
26513 5776 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26514 5776 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26515 5776 : if (subcode == PLUS_EXPR
26516 4538 : && TREE_CODE (rhs1) == SSA_NAME
26517 4538 : && (def = SSA_NAME_DEF_STMT (rhs1), true)
26518 4538 : && is_gimple_assign (def)
26519 8106 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26520 1402 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26521 4374 : else if (TREE_CODE (rhs2) == SSA_NAME
26522 4374 : && (def = SSA_NAME_DEF_STMT (rhs2), true)
26523 4374 : && is_gimple_assign (def)
26524 8716 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26525 4338 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26526 : break;
26527 :
26528 : /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
26529 : WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
26530 : SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
26531 374 : case DOT_PROD_EXPR:
26532 374 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26533 374 : mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
26534 374 : if (mode_rhs == QImode)
26535 : {
26536 211 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26537 211 : signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
26538 211 : signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
26539 :
26540 : /* vpdpbusd. */
26541 211 : if (signop1_p != signop2_p)
26542 53 : native_vnni_p
26543 53 : = (GET_MODE_SIZE (mode) == 64
26544 53 : ? TARGET_AVX512VNNI
26545 10 : : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
26546 53 : || TARGET_AVXVNNI));
26547 : else
26548 : /* vpdpbssd. */
26549 158 : native_vnni_p
26550 174 : = (GET_MODE_SIZE (mode) == 64
26551 158 : ? TARGET_AVX10_2
26552 142 : : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
26553 : }
26554 374 : m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
26555 :
26556 : /* Dislike to do unroll and partial sum for
26557 : emulated DOT_PROD_EXPR. */
26558 374 : if (!native_vnni_p)
26559 128 : m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
26560 : break;
26561 :
26562 80 : case SAD_EXPR:
26563 80 : m_num_reduc[X86_REDUC_SAD] += count * scale;
26564 80 : break;
26565 :
26566 : default:
26567 : break;
26568 : }
26569 : }
26570 :
26571 : default:
26572 : break;
26573 : }
26574 : }
26575 :
26576 :
26577 6670767 : combined_fn cfn;
26578 6670767 : if ((kind == vector_stmt || kind == scalar_stmt)
26579 1612474 : && stmt_info
26580 1606508 : && stmt_info->stmt
26581 8277275 : && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
26582 17518 : switch (cfn)
26583 : {
26584 63 : case CFN_FMA:
26585 63 : stmt_cost = ix86_vec_cost (mode,
26586 63 : mode == SFmode ? ix86_cost->fmass
26587 : : ix86_cost->fmasd);
26588 63 : break;
26589 24 : case CFN_MULH:
26590 24 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26591 24 : break;
26592 : default:
26593 : break;
26594 : }
26595 :
26596 6670767 : if (kind == vec_promote_demote)
26597 : {
26598 44963 : int outer_size
26599 : = tree_to_uhwi
26600 44963 : (TYPE_SIZE
26601 44963 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
26602 44963 : int inner_size
26603 : = tree_to_uhwi
26604 44963 : (TYPE_SIZE
26605 44963 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
26606 44963 : bool inner_fp = FLOAT_TYPE_P
26607 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
26608 :
26609 3831 : if (fp && inner_fp)
26610 3431 : stmt_cost = vec_fp_conversion_cost
26611 3431 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26612 41532 : else if (fp && !inner_fp)
26613 4106 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26614 37426 : else if (!fp && inner_fp)
26615 400 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26616 : else
26617 37026 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26618 : /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
26619 : greater than inner size we will end up doing two conversions and
26620 : packing them. We always pack pairs; if the size difference is greater
26621 : it is split into multiple demote operations. */
26622 44963 : if (inner_size > outer_size)
26623 17126 : stmt_cost = stmt_cost * 2
26624 17126 : + ix86_vec_cost (mode, ix86_cost->sse_op);
26625 : }
26626 :
26627 : /* If we do elementwise loads into a vector then we are bound by
26628 : latency and execution resources for the many scalar loads
26629 : (AGU and load ports). Try to account for this by scaling the
26630 : construction cost by the number of elements involved. */
26631 6670767 : if ((kind == vec_construct || kind == vec_to_scalar)
26632 6670767 : && ((node
26633 426191 : && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26634 437746 : || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
26635 36097 : && SLP_TREE_LANES (node) == 1))
26636 39082 : && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26637 : (SLP_TREE_REPRESENTATIVE (node))))
26638 : != INTEGER_CST))
26639 69306 : || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
26640 : {
26641 30728 : stmt_cost = ix86_default_vector_cost (kind, mode);
26642 30728 : stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
26643 : }
26644 6640039 : else if ((kind == vec_construct || kind == scalar_to_vec)
26645 443948 : && node
26646 413134 : && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26647 : {
26648 303571 : stmt_cost = ix86_default_vector_cost (kind, mode);
26649 303571 : unsigned i;
26650 303571 : tree op;
26651 1298849 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26652 691707 : if (TREE_CODE (op) == SSA_NAME)
26653 470257 : TREE_VISITED (op) = 0;
26654 995278 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26655 : {
26656 691707 : if (TREE_CODE (op) != SSA_NAME
26657 470257 : || TREE_VISITED (op))
26658 254971 : continue;
26659 436736 : TREE_VISITED (op) = 1;
26660 436736 : gimple *def = SSA_NAME_DEF_STMT (op);
26661 436736 : tree tem;
26662 : /* Look through a conversion. */
26663 436736 : if (is_gimple_assign (def)
26664 247814 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26665 27368 : && ((tem = gimple_assign_rhs1 (def)), true)
26666 464104 : && TREE_CODE (tem) == SSA_NAME)
26667 27157 : def = SSA_NAME_DEF_STMT (tem);
26668 : /* When the component is loaded from memory without sign-
26669 : or zero-extension we can move it to a vector register and/or
26670 : insert it via vpinsr with a memory operand. */
26671 436736 : if (gimple_assign_load_p (def)
26672 130183 : && tree_nop_conversion_p (TREE_TYPE (op),
26673 130183 : TREE_TYPE (gimple_assign_lhs (def)))
26674 690920 : && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
26675 5356 : || TARGET_SSE4_1))
26676 : ;
26677 : /* When the component is extracted from a vector it is already
26678 : in a vector register. */
26679 314003 : else if (is_gimple_assign (def)
26680 120796 : && gimple_assign_rhs_code (def) == BIT_FIELD_REF
26681 316851 : && VECTOR_TYPE_P (TREE_TYPE
26682 : (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
26683 : ;
26684 : else
26685 : {
26686 311570 : if (fp)
26687 : {
26688 : /* Scalar FP values residing in x87 registers need to be
26689 : spilled and reloaded. */
26690 13436 : auto mode2 = TYPE_MODE (TREE_TYPE (op));
26691 13436 : if (IS_STACK_MODE (mode2))
26692 : {
26693 971 : int cost
26694 : = (ix86_cost->hard_register.fp_store[mode2 == SFmode
26695 971 : ? 0 : 1]
26696 971 : + ix86_cost->sse_load[sse_store_index (mode2)]);
26697 971 : stmt_cost += COSTS_N_INSNS (cost) / 2;
26698 : }
26699 13436 : m_num_sse_needed[where]++;
26700 : }
26701 : else
26702 : {
26703 298134 : m_num_gpr_needed[where]++;
26704 :
26705 298134 : stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26706 : }
26707 : }
26708 : }
26709 995278 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26710 691707 : if (TREE_CODE (op) == SSA_NAME)
26711 470257 : TREE_VISITED (op) = 0;
26712 : }
26713 6670767 : if (stmt_cost == -1)
26714 5273280 : stmt_cost = ix86_default_vector_cost (kind, mode);
26715 :
26716 6670767 : if (kind == vec_perm && vectype
26717 178126 : && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26718 : /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body. */
26719 6674243 : && count != 0)
26720 : {
26721 3476 : bool real_perm = true;
26722 3476 : unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
26723 :
26724 3476 : if (node
26725 3473 : && SLP_TREE_LOAD_PERMUTATION (node).exists ()
26726 : /* Loop vectorization will have 4 times vec_perm
26727 : with index as {0, 0, 0, 0}.
26728 : But it actually generates
26729 : vec_perm_expr <vect, vect, 0, 0, 0, 0>
26730 : vec_perm_expr <vect, vect, 1, 1, 1, 1>
26731 : vec_perm_expr <vect, vect, 2, 2, 2, 2>
26732 : Need to be handled separately. */
26733 6304 : && is_a <bb_vec_info> (m_vinfo))
26734 : {
26735 39 : unsigned half = nunits / 2;
26736 39 : unsigned i = 0;
26737 39 : bool allsame = true;
26738 39 : unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
26739 39 : bool cross_lane_p = false;
26740 198 : for (i = 0 ; i != SLP_TREE_LANES (node); i++)
26741 : {
26742 197 : unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
26743 : /* allsame is just a broadcast. */
26744 197 : if (tmp != first)
26745 92 : allsame = false;
26746 :
26747 : /* 4 times vec_perm with number of lanes multiple of nunits. */
26748 197 : tmp = tmp & (nunits - 1);
26749 197 : unsigned index = i & (nunits - 1);
26750 197 : if ((index < half && tmp >= half)
26751 197 : || (index >= half && tmp < half))
26752 65 : cross_lane_p = true;
26753 :
26754 197 : if (!allsame && cross_lane_p)
26755 : break;
26756 : }
26757 :
26758 39 : if (i == SLP_TREE_LANES (node))
26759 : real_perm = false;
26760 : }
26761 :
26762 : if (real_perm)
26763 : {
26764 3475 : m_num_avx256_vec_perm[where] += count;
26765 3475 : if (dump_file && (dump_flags & TDF_DETAILS))
26766 : {
26767 231 : fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
26768 231 : if (stmt_info)
26769 228 : print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
26770 231 : fprintf (dump_file, " \n");
26771 : }
26772 : }
26773 : }
26774 :
26775 : /* Penalize DFmode vector operations for Bonnell. */
26776 6670767 : if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26777 6670829 : && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26778 12 : stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26779 :
26780 : /* Statements in an inner loop relative to the loop being
26781 : vectorized are weighted more heavily. The value here is
26782 : arbitrary and could potentially be improved with analysis. */
26783 6670767 : retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26784 :
26785 : /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26786 : for Silvermont as it has out of order integer pipeline and can execute
26787 : 2 scalar instruction per tick, but has in order SIMD pipeline. */
26788 6670767 : if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26789 6670767 : || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26790 1811 : && stmt_info && stmt_info->stmt)
26791 : {
26792 1595 : tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26793 1595 : if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26794 1198 : retval = (retval * 17) / 10;
26795 : }
26796 :
26797 6670767 : m_costs[where] += retval;
26798 :
26799 6670767 : return retval;
26800 : }
26801 :
26802 : void
26803 1694942 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26804 : {
26805 1694942 : unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26806 1694942 : unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26807 :
26808 : /* Any better way to have target available fp registers, currently use SSE_REGS. */
26809 1694942 : unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26810 6779768 : for (unsigned i = 0; i != 3; i++)
26811 : {
26812 5084826 : if (m_num_gpr_needed[i] > target_avail_regs)
26813 706 : m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26814 : /* Only measure sse registers pressure. */
26815 5084826 : if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26816 92 : m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26817 : }
26818 1694942 : }
26819 :
26820 : void
26821 1694942 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26822 : {
26823 1694942 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
26824 377938 : if (loop_vinfo && !m_costing_for_scalar)
26825 : {
26826 : /* We are currently not asking the vectorizer to compare costs
26827 : between different vector mode sizes. When using predication
26828 : that will end up always choosing the prefered mode size even
26829 : if there's a smaller mode covering all lanes. Test for this
26830 : situation and artificially reject the larger mode attempt.
26831 : ??? We currently lack masked ops for sub-SSE sized modes,
26832 : so we could restrict this rejection to AVX and AVX512 modes
26833 : but error on the safe side for now. */
26834 82636 : if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26835 22 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26836 15 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26837 82646 : && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26838 20 : > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26839 8 : m_costs[vect_body] = INT_MAX;
26840 :
26841 : /* We'd like to avoid using masking if there's an in-order reduction
26842 : to vectorize because that will also perform in-order adds of
26843 : masked elements (as neutral value, of course) here, but there
26844 : is currently no way to indicate to try un-masked with the same
26845 : mode. */
26846 :
26847 82636 : bool any_reduc_p = false;
26848 328400 : for (int i = 0; i != X86_REDUC_LAST; i++)
26849 246541 : if (m_num_reduc[i])
26850 : {
26851 : any_reduc_p = true;
26852 : break;
26853 : }
26854 :
26855 82636 : if (any_reduc_p
26856 : /* Not much gain for loop with gather and scatter. */
26857 777 : && m_prefer_unroll
26858 627 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
26859 : {
26860 956 : unsigned unroll_factor
26861 478 : = OPTION_SET_P (ix86_vect_unroll_limit)
26862 478 : ? ix86_vect_unroll_limit
26863 478 : : ix86_cost->vect_unroll_limit;
26864 :
26865 478 : if (unroll_factor > 1)
26866 : {
26867 1912 : for (int i = 0 ; i != X86_REDUC_LAST; i++)
26868 : {
26869 1434 : if (m_num_reduc[i])
26870 : {
26871 478 : unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
26872 : m_num_reduc[i]);
26873 1434 : unroll_factor = MIN (unroll_factor, tmp);
26874 : }
26875 : }
26876 :
26877 956 : m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
26878 : }
26879 : }
26880 :
26881 : }
26882 :
26883 1694942 : ix86_vect_estimate_reg_pressure ();
26884 :
26885 6779768 : for (int i = 0; i != 3; i++)
26886 5084826 : if (m_num_avx256_vec_perm[i]
26887 444 : && TARGET_AVX256_AVOID_VEC_PERM)
26888 7 : m_costs[i] = INT_MAX;
26889 :
26890 : /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26891 : a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26892 1694942 : if (loop_vinfo
26893 377938 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26894 46982 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26895 1695766 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26896 14 : m_suggested_epilogue_mode = V16QImode;
26897 : /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26898 : enable a 64bit SSE epilogue. */
26899 1694942 : if (loop_vinfo
26900 377938 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26901 46982 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26902 1697416 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26903 91 : m_suggested_epilogue_mode = V8QImode;
26904 :
26905 : /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
26906 : a masked epilogue if that doesn't seem detrimental. */
26907 1694942 : if (loop_vinfo
26908 377938 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26909 354447 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
26910 : /* Avoid a masked epilog if cascaded epilogues eventually get us
26911 : to one with VF 1 as that means no scalar epilog at all. */
26912 52309 : && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
26913 52309 : / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
26914 35 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26915 52308 : && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
26916 1695032 : && !OPTION_SET_P (param_vect_partial_vector_usage))
26917 : {
26918 84 : bool avoid = false;
26919 84 : if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26920 68 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
26921 : {
26922 68 : unsigned int peel_niter
26923 : = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
26924 68 : if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
26925 0 : peel_niter += 1;
26926 : /* When we know the number of scalar iterations of the epilogue,
26927 : avoid masking when a single vector epilog iteration handles
26928 : it in full. */
26929 68 : if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
26930 68 : % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
26931 : avoid = true;
26932 : }
26933 83 : if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
26934 7 : for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
26935 : {
26936 2 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
26937 : ;
26938 2 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
26939 : ;
26940 : else
26941 : {
26942 1 : int loop_depth
26943 2 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
26944 1 : DDR_LOOP_NEST (ddr));
26945 2 : if (DDR_NUM_DIST_VECTS (ddr) == 1
26946 1 : && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
26947 : {
26948 : /* Avoid the case when there's an outer loop that might
26949 : traverse a multi-dimensional array with the inner
26950 : loop just executing the masked epilogue with a
26951 : read-write where the next outer iteration might
26952 : read from the masked part of the previous write,
26953 : 'n' filling half a vector.
26954 : for (j = 0; j < m; ++j)
26955 : for (i = 0; i < n; ++i)
26956 : a[j][i] = c * a[j][i]; */
26957 : avoid = true;
26958 : break;
26959 : }
26960 : }
26961 : }
26962 : /* Avoid using masking if there's an in-order reduction
26963 : to vectorize because that will also perform in-order adds of
26964 : masked elements (as neutral value, of course). */
26965 84 : if (!avoid)
26966 : {
26967 331 : for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
26968 86 : if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
26969 86 : && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
26970 : == FOLD_LEFT_REDUCTION))
26971 : {
26972 : avoid = true;
26973 : break;
26974 : }
26975 : }
26976 82 : if (!avoid)
26977 : {
26978 81 : m_suggested_epilogue_mode = loop_vinfo->vector_mode;
26979 81 : m_masked_epilogue = 1;
26980 : }
26981 : }
26982 :
26983 1694942 : vector_costs::finish_cost (scalar_costs);
26984 1694942 : }
26985 :
26986 : /* Validate target specific memory model bits in VAL. */
26987 :
26988 : static unsigned HOST_WIDE_INT
26989 411093 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
26990 : {
26991 411093 : enum memmodel model = memmodel_from_int (val);
26992 411093 : bool strong;
26993 :
26994 411093 : if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
26995 : |MEMMODEL_MASK)
26996 411089 : || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
26997 : {
26998 4 : warning (OPT_Winvalid_memory_model,
26999 : "unknown architecture specific memory model");
27000 4 : return MEMMODEL_SEQ_CST;
27001 : }
27002 411089 : strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
27003 411089 : if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
27004 : {
27005 0 : warning (OPT_Winvalid_memory_model,
27006 : "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
27007 : "memory model");
27008 0 : return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
27009 : }
27010 411089 : if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
27011 : {
27012 0 : warning (OPT_Winvalid_memory_model,
27013 : "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
27014 : "memory model");
27015 0 : return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
27016 : }
27017 : return val;
27018 : }
27019 :
27020 : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
27021 : CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
27022 : CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
27023 : or number of vecsize_mangle variants that should be emitted. */
27024 :
27025 : static int
27026 7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
27027 : struct cgraph_simd_clone *clonei,
27028 : tree base_type, int num,
27029 : bool explicit_p)
27030 : {
27031 7593 : int ret = 1;
27032 :
27033 7593 : if (clonei->simdlen
27034 7593 : && (clonei->simdlen < 2
27035 1321 : || clonei->simdlen > 1024
27036 1321 : || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
27037 : {
27038 0 : if (explicit_p)
27039 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27040 : "unsupported simdlen %wd", clonei->simdlen.to_constant ());
27041 0 : return 0;
27042 : }
27043 :
27044 7593 : tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
27045 7593 : if (TREE_CODE (ret_type) != VOID_TYPE)
27046 6801 : switch (TYPE_MODE (ret_type))
27047 : {
27048 6801 : case E_QImode:
27049 6801 : case E_HImode:
27050 6801 : case E_SImode:
27051 6801 : case E_DImode:
27052 6801 : case E_SFmode:
27053 6801 : case E_DFmode:
27054 : /* case E_SCmode: */
27055 : /* case E_DCmode: */
27056 6801 : if (!AGGREGATE_TYPE_P (ret_type))
27057 : break;
27058 : /* FALLTHRU */
27059 2 : default:
27060 2 : if (explicit_p)
27061 2 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27062 : "unsupported return type %qT for simd", ret_type);
27063 2 : return 0;
27064 : }
27065 :
27066 7591 : tree t;
27067 7591 : int i;
27068 7591 : tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
27069 7591 : bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
27070 :
27071 7591 : for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
27072 20438 : t && t != void_list_node; t = TREE_CHAIN (t), i++)
27073 : {
27074 16678 : tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
27075 12852 : switch (TYPE_MODE (arg_type))
27076 : {
27077 12833 : case E_QImode:
27078 12833 : case E_HImode:
27079 12833 : case E_SImode:
27080 12833 : case E_DImode:
27081 12833 : case E_SFmode:
27082 12833 : case E_DFmode:
27083 : /* case E_SCmode: */
27084 : /* case E_DCmode: */
27085 12833 : if (!AGGREGATE_TYPE_P (arg_type))
27086 : break;
27087 : /* FALLTHRU */
27088 41 : default:
27089 41 : if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
27090 : break;
27091 5 : if (explicit_p)
27092 5 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27093 : "unsupported argument type %qT for simd", arg_type);
27094 : return 0;
27095 : }
27096 : }
27097 :
27098 7586 : if (!TREE_PUBLIC (node->decl) || !explicit_p)
27099 : {
27100 : /* If the function isn't exported, we can pick up just one ISA
27101 : for the clones. */
27102 114 : if (TARGET_AVX512F)
27103 0 : clonei->vecsize_mangle = 'e';
27104 114 : else if (TARGET_AVX2)
27105 1 : clonei->vecsize_mangle = 'd';
27106 113 : else if (TARGET_AVX)
27107 88 : clonei->vecsize_mangle = 'c';
27108 : else
27109 25 : clonei->vecsize_mangle = 'b';
27110 : ret = 1;
27111 : }
27112 : else
27113 : {
27114 7472 : clonei->vecsize_mangle = "bcde"[num];
27115 7472 : ret = 4;
27116 : }
27117 7586 : clonei->mask_mode = VOIDmode;
27118 7586 : switch (clonei->vecsize_mangle)
27119 : {
27120 1893 : case 'b':
27121 1893 : clonei->vecsize_int = 128;
27122 1893 : clonei->vecsize_float = 128;
27123 1893 : break;
27124 1956 : case 'c':
27125 1956 : clonei->vecsize_int = 128;
27126 1956 : clonei->vecsize_float = 256;
27127 1956 : break;
27128 1869 : case 'd':
27129 1869 : clonei->vecsize_int = 256;
27130 1869 : clonei->vecsize_float = 256;
27131 1869 : break;
27132 1868 : case 'e':
27133 1868 : clonei->vecsize_int = 512;
27134 1868 : clonei->vecsize_float = 512;
27135 1868 : if (TYPE_MODE (base_type) == QImode)
27136 19 : clonei->mask_mode = DImode;
27137 : else
27138 1849 : clonei->mask_mode = SImode;
27139 : break;
27140 : }
27141 7586 : if (clonei->simdlen == 0)
27142 : {
27143 6265 : if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
27144 3297 : clonei->simdlen = clonei->vecsize_int;
27145 : else
27146 2968 : clonei->simdlen = clonei->vecsize_float;
27147 6265 : clonei->simdlen = clonei->simdlen
27148 12530 : / GET_MODE_BITSIZE (TYPE_MODE (base_type));
27149 : }
27150 1321 : else if (clonei->simdlen > 16)
27151 : {
27152 : /* For compatibility with ICC, use the same upper bounds
27153 : for simdlen. In particular, for CTYPE below, use the return type,
27154 : unless the function returns void, in that case use the characteristic
27155 : type. If it is possible for given SIMDLEN to pass CTYPE value
27156 : in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
27157 : for 64-bit code), accept that SIMDLEN, otherwise warn and don't
27158 : emit corresponding clone. */
27159 12 : tree ctype = ret_type;
27160 12 : if (VOID_TYPE_P (ret_type))
27161 0 : ctype = base_type;
27162 24 : int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
27163 12 : if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
27164 8 : cnt /= clonei->vecsize_int;
27165 : else
27166 4 : cnt /= clonei->vecsize_float;
27167 12 : if (cnt > (TARGET_64BIT ? 16 : 8))
27168 : {
27169 0 : if (explicit_p)
27170 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27171 : "unsupported simdlen %wd",
27172 : clonei->simdlen.to_constant ());
27173 0 : return 0;
27174 : }
27175 : }
27176 : return ret;
27177 : }
27178 :
27179 : /* If SIMD clone NODE can't be used in a vectorized loop
27180 : in current function, return -1, otherwise return a badness of using it
27181 : (0 if it is most desirable from vecsize_mangle point of view, 1
27182 : slightly less desirable, etc.). */
27183 :
27184 : static int
27185 1768 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
27186 : {
27187 1768 : switch (node->simdclone->vecsize_mangle)
27188 : {
27189 621 : case 'b':
27190 621 : if (!TARGET_SSE2)
27191 : return -1;
27192 621 : if (!TARGET_AVX)
27193 : return 0;
27194 520 : return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
27195 627 : case 'c':
27196 627 : if (!TARGET_AVX)
27197 : return -1;
27198 582 : return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
27199 332 : case 'd':
27200 332 : if (!TARGET_AVX2)
27201 : return -1;
27202 139 : return TARGET_AVX512F ? 1 : 0;
27203 188 : case 'e':
27204 188 : if (!TARGET_AVX512F)
27205 130 : return -1;
27206 : return 0;
27207 0 : default:
27208 0 : gcc_unreachable ();
27209 : }
27210 : }
27211 :
27212 : /* This function adjusts the unroll factor based on
27213 : the hardware capabilities. For ex, bdver3 has
27214 : a loop buffer which makes unrolling of smaller
27215 : loops less important. This function decides the
27216 : unroll factor using number of memory references
27217 : (value 32 is used) as a heuristic. */
27218 :
27219 : static unsigned
27220 806777 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
27221 : {
27222 806777 : basic_block *bbs;
27223 806777 : rtx_insn *insn;
27224 806777 : unsigned i;
27225 806777 : unsigned mem_count = 0;
27226 :
27227 : /* Unroll small size loop when unroll factor is not explicitly
27228 : specified. */
27229 806777 : if (ix86_unroll_only_small_loops && !loop->unroll)
27230 : {
27231 763180 : if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
27232 70478 : return MIN (nunroll, ix86_cost->small_unroll_factor);
27233 : else
27234 : return 1;
27235 : }
27236 :
27237 43597 : if (!TARGET_ADJUST_UNROLL)
27238 : return nunroll;
27239 :
27240 : /* Count the number of memory references within the loop body.
27241 : This value determines the unrolling factor for bdver3 and bdver4
27242 : architectures. */
27243 7 : subrtx_iterator::array_type array;
27244 7 : bbs = get_loop_body (loop);
27245 21 : for (i = 0; i < loop->num_nodes; i++)
27246 102 : FOR_BB_INSNS (bbs[i], insn)
27247 88 : if (NONDEBUG_INSN_P (insn))
27248 464 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
27249 404 : if (const_rtx x = *iter)
27250 404 : if (MEM_P (x))
27251 : {
27252 25 : machine_mode mode = GET_MODE (x);
27253 50 : unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27254 25 : if (n_words > 4)
27255 0 : mem_count += 2;
27256 : else
27257 25 : mem_count += 1;
27258 : }
27259 7 : free (bbs);
27260 :
27261 7 : if (mem_count && mem_count <=32)
27262 7 : return MIN (nunroll, 32 / mem_count);
27263 :
27264 : return nunroll;
27265 7 : }
27266 :
27267 :
27268 : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
27269 :
27270 : static bool
27271 418570 : ix86_float_exceptions_rounding_supported_p (void)
27272 : {
27273 : /* For x87 floating point with standard excess precision handling,
27274 : there is no adddf3 pattern (since x87 floating point only has
27275 : XFmode operations) so the default hook implementation gets this
27276 : wrong. */
27277 418570 : return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
27278 : }
27279 :
27280 : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
27281 :
27282 : static void
27283 7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27284 : {
27285 7054 : if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
27286 : return;
27287 7054 : tree exceptions_var = create_tmp_var_raw (integer_type_node);
27288 7054 : if (TARGET_80387)
27289 : {
27290 7054 : tree fenv_index_type = build_index_type (size_int (6));
27291 7054 : tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
27292 7054 : tree fenv_var = create_tmp_var_raw (fenv_type);
27293 7054 : TREE_ADDRESSABLE (fenv_var) = 1;
27294 7054 : tree fenv_ptr = build_pointer_type (fenv_type);
27295 7054 : tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
27296 7054 : fenv_addr = fold_convert (ptr_type_node, fenv_addr);
27297 7054 : tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
27298 7054 : tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
27299 7054 : tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
27300 7054 : tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
27301 7054 : tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
27302 7054 : tree hold_fnclex = build_call_expr (fnclex, 0);
27303 7054 : fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
27304 : NULL_TREE, NULL_TREE);
27305 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
27306 : hold_fnclex);
27307 7054 : *clear = build_call_expr (fnclex, 0);
27308 7054 : tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
27309 7054 : tree fnstsw_call = build_call_expr (fnstsw, 0);
27310 7054 : tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
27311 : fnstsw_call, NULL_TREE, NULL_TREE);
27312 7054 : tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
27313 7054 : tree update_mod = build4 (TARGET_EXPR, integer_type_node,
27314 : exceptions_var, exceptions_x87,
27315 : NULL_TREE, NULL_TREE);
27316 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node,
27317 : sw_mod, update_mod);
27318 7054 : tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
27319 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
27320 : }
27321 7054 : if (TARGET_SSE && TARGET_SSE_MATH)
27322 : {
27323 7054 : tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
27324 7054 : tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
27325 7054 : tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
27326 7054 : tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
27327 7054 : tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
27328 7054 : tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
27329 : mxcsr_orig_var, stmxcsr_hold_call,
27330 : NULL_TREE, NULL_TREE);
27331 7054 : tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
27332 : mxcsr_orig_var,
27333 : build_int_cst (unsigned_type_node, 0x1f80));
27334 7054 : hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
27335 : build_int_cst (unsigned_type_node, 0xffffffc0));
27336 7054 : tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
27337 : mxcsr_mod_var, hold_mod_val,
27338 : NULL_TREE, NULL_TREE);
27339 7054 : tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27340 7054 : tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
27341 : hold_assign_orig, hold_assign_mod);
27342 7054 : hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
27343 : ldmxcsr_hold_call);
27344 7054 : if (*hold)
27345 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
27346 : else
27347 0 : *hold = hold_all;
27348 7054 : tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27349 7054 : if (*clear)
27350 7054 : *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
27351 : ldmxcsr_clear_call);
27352 : else
27353 0 : *clear = ldmxcsr_clear_call;
27354 7054 : tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
27355 7054 : tree exceptions_sse = fold_convert (integer_type_node,
27356 : stxmcsr_update_call);
27357 7054 : if (*update)
27358 : {
27359 7054 : tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
27360 : exceptions_var, exceptions_sse);
27361 7054 : tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
27362 : exceptions_var, exceptions_mod);
27363 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
27364 : exceptions_assign);
27365 : }
27366 : else
27367 0 : *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
27368 : exceptions_sse, NULL_TREE, NULL_TREE);
27369 7054 : tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
27370 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27371 : ldmxcsr_update_call);
27372 : }
27373 7054 : tree atomic_feraiseexcept
27374 7054 : = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
27375 7054 : tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
27376 : 1, exceptions_var);
27377 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27378 : atomic_feraiseexcept_call);
27379 : }
27380 :
27381 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27382 : /* For i386, common symbol is local only for non-PIE binaries. For
27383 : x86-64, common symbol is local only for non-PIE binaries or linker
27384 : supports copy reloc in PIE binaries. */
27385 :
27386 : static bool
27387 772199625 : ix86_binds_local_p (const_tree exp)
27388 : {
27389 772199625 : bool direct_extern_access
27390 772199625 : = (ix86_direct_extern_access
27391 1540893928 : && !(VAR_OR_FUNCTION_DECL_P (exp)
27392 768694303 : && lookup_attribute ("nodirect_extern_access",
27393 768694303 : DECL_ATTRIBUTES (exp))));
27394 772199625 : if (!direct_extern_access)
27395 1225 : ix86_has_no_direct_extern_access = true;
27396 772199625 : return default_binds_local_p_3 (exp, flag_shlib != 0, true,
27397 : direct_extern_access,
27398 : (direct_extern_access
27399 772198400 : && (!flag_pic
27400 132758366 : || (TARGET_64BIT
27401 772199625 : && HAVE_LD_PIE_COPYRELOC != 0))));
27402 : }
27403 :
27404 : /* If flag_pic or ix86_direct_extern_access is false, then neither
27405 : local nor global relocs should be placed in readonly memory. */
27406 :
27407 : static int
27408 5130748 : ix86_reloc_rw_mask (void)
27409 : {
27410 5130748 : return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
27411 : }
27412 : #endif
27413 :
27414 : /* Return true iff ADDR can be used as a symbolic base address. */
27415 :
27416 : static bool
27417 3154 : symbolic_base_address_p (rtx addr)
27418 : {
27419 0 : if (SYMBOL_REF_P (addr))
27420 : return true;
27421 :
27422 3130 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
27423 0 : return true;
27424 :
27425 : return false;
27426 : }
27427 :
27428 : /* Return true iff ADDR can be used as a base address. */
27429 :
27430 : static bool
27431 4718 : base_address_p (rtx addr)
27432 : {
27433 0 : if (REG_P (addr))
27434 : return true;
27435 :
27436 2936 : if (symbolic_base_address_p (addr))
27437 0 : return true;
27438 :
27439 : return false;
27440 : }
27441 :
27442 : /* If MEM is in the form of [(base+symbase)+offset], extract the three
27443 : parts of address and set to BASE, SYMBASE and OFFSET, otherwise
27444 : return false. */
27445 :
27446 : static bool
27447 3035 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
27448 : {
27449 3035 : rtx addr;
27450 :
27451 3035 : gcc_assert (MEM_P (mem));
27452 :
27453 3035 : addr = XEXP (mem, 0);
27454 :
27455 3035 : if (GET_CODE (addr) == CONST)
27456 10 : addr = XEXP (addr, 0);
27457 :
27458 3035 : if (base_address_p (addr))
27459 : {
27460 1352 : *base = addr;
27461 1352 : *symbase = const0_rtx;
27462 1352 : *offset = const0_rtx;
27463 1352 : return true;
27464 : }
27465 :
27466 1683 : if (GET_CODE (addr) == PLUS
27467 1683 : && base_address_p (XEXP (addr, 0)))
27468 : {
27469 454 : rtx addend = XEXP (addr, 1);
27470 :
27471 454 : if (GET_CODE (addend) == CONST)
27472 0 : addend = XEXP (addend, 0);
27473 :
27474 454 : if (CONST_INT_P (addend))
27475 : {
27476 236 : *base = XEXP (addr, 0);
27477 236 : *symbase = const0_rtx;
27478 236 : *offset = addend;
27479 236 : return true;
27480 : }
27481 :
27482 : /* Also accept REG + symbolic ref, with or without a CONST_INT
27483 : offset. */
27484 218 : if (REG_P (XEXP (addr, 0)))
27485 : {
27486 218 : if (symbolic_base_address_p (addend))
27487 : {
27488 0 : *base = XEXP (addr, 0);
27489 0 : *symbase = addend;
27490 0 : *offset = const0_rtx;
27491 0 : return true;
27492 : }
27493 :
27494 218 : if (GET_CODE (addend) == PLUS
27495 0 : && symbolic_base_address_p (XEXP (addend, 0))
27496 218 : && CONST_INT_P (XEXP (addend, 1)))
27497 : {
27498 0 : *base = XEXP (addr, 0);
27499 0 : *symbase = XEXP (addend, 0);
27500 0 : *offset = XEXP (addend, 1);
27501 0 : return true;
27502 : }
27503 : }
27504 : }
27505 :
27506 : return false;
27507 : }
27508 :
27509 : /* Given OPERANDS of consecutive load/store, check if we can merge
27510 : them into move multiple. LOAD is true if they are load instructions.
27511 : MODE is the mode of memory operands. */
27512 :
27513 : bool
27514 1693 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
27515 : machine_mode mode)
27516 : {
27517 1693 : HOST_WIDE_INT offval_1, offval_2, msize;
27518 1693 : rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
27519 : symbase_1, symbase_2, offset_1, offset_2;
27520 :
27521 1693 : if (load)
27522 : {
27523 1397 : mem_1 = operands[1];
27524 1397 : mem_2 = operands[3];
27525 1397 : reg_1 = operands[0];
27526 1397 : reg_2 = operands[2];
27527 : }
27528 : else
27529 : {
27530 296 : mem_1 = operands[0];
27531 296 : mem_2 = operands[2];
27532 296 : reg_1 = operands[1];
27533 296 : reg_2 = operands[3];
27534 : }
27535 :
27536 1693 : gcc_assert (REG_P (reg_1) && REG_P (reg_2));
27537 :
27538 1693 : if (REGNO (reg_1) != REGNO (reg_2))
27539 : return false;
27540 :
27541 : /* Check if the addresses are in the form of [base+offset]. */
27542 1693 : if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
27543 : return false;
27544 1342 : if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
27545 : return false;
27546 :
27547 : /* Check if the bases are the same. */
27548 246 : if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
27549 119 : return false;
27550 :
27551 127 : offval_1 = INTVAL (offset_1);
27552 127 : offval_2 = INTVAL (offset_2);
27553 127 : msize = GET_MODE_SIZE (mode);
27554 : /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
27555 127 : if (offval_1 + msize != offval_2)
27556 : return false;
27557 :
27558 : return true;
27559 : }
27560 :
27561 : /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27562 :
27563 : static bool
27564 341764 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
27565 : optimization_type opt_type)
27566 : {
27567 341764 : switch (op)
27568 : {
27569 216 : case asin_optab:
27570 216 : case acos_optab:
27571 216 : case log1p_optab:
27572 216 : case exp_optab:
27573 216 : case exp10_optab:
27574 216 : case exp2_optab:
27575 216 : case expm1_optab:
27576 216 : case ldexp_optab:
27577 216 : case scalb_optab:
27578 216 : case round_optab:
27579 216 : case lround_optab:
27580 216 : return opt_type == OPTIMIZE_FOR_SPEED;
27581 :
27582 263 : case rint_optab:
27583 263 : if (SSE_FLOAT_MODE_P (mode1)
27584 144 : && TARGET_SSE_MATH
27585 128 : && !flag_trapping_math
27586 21 : && !TARGET_SSE4_1
27587 : && mode1 != HFmode)
27588 21 : return opt_type == OPTIMIZE_FOR_SPEED;
27589 : return true;
27590 :
27591 1892 : case floor_optab:
27592 1892 : case ceil_optab:
27593 1892 : case btrunc_optab:
27594 1892 : if ((SSE_FLOAT_MODE_P (mode1)
27595 1594 : && TARGET_SSE_MATH
27596 1515 : && TARGET_SSE4_1)
27597 1825 : || mode1 == HFmode)
27598 : return true;
27599 1756 : return opt_type == OPTIMIZE_FOR_SPEED;
27600 :
27601 84 : case rsqrt_optab:
27602 84 : return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
27603 :
27604 : default:
27605 : return true;
27606 : }
27607 : }
27608 :
27609 : /* Address space support.
27610 :
27611 : This is not "far pointers" in the 16-bit sense, but an easy way
27612 : to use %fs and %gs segment prefixes. Therefore:
27613 :
27614 : (a) All address spaces have the same modes,
27615 : (b) All address spaces have the same addresss forms,
27616 : (c) While %fs and %gs are technically subsets of the generic
27617 : address space, they are probably not subsets of each other.
27618 : (d) Since we have no access to the segment base register values
27619 : without resorting to a system call, we cannot convert a
27620 : non-default address space to a default address space.
27621 : Therefore we do not claim %fs or %gs are subsets of generic.
27622 :
27623 : Therefore we can (mostly) use the default hooks. */
27624 :
27625 : /* All use of segmentation is assumed to make address 0 valid. */
27626 :
27627 : static bool
27628 67738141 : ix86_addr_space_zero_address_valid (addr_space_t as)
27629 : {
27630 67738141 : return as != ADDR_SPACE_GENERIC;
27631 : }
27632 :
27633 : static void
27634 778808 : ix86_init_libfuncs (void)
27635 : {
27636 778808 : if (TARGET_64BIT)
27637 : {
27638 763850 : set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
27639 763850 : set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
27640 : }
27641 : else
27642 : {
27643 14958 : set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
27644 14958 : set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
27645 : }
27646 :
27647 : #if TARGET_MACHO
27648 : darwin_rename_builtins ();
27649 : #endif
27650 778808 : }
27651 :
27652 : /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
27653 : FPU, assume that the fpcw is set to extended precision; when using
27654 : only SSE, rounding is correct; when using both SSE and the FPU,
27655 : the rounding precision is indeterminate, since either may be chosen
27656 : apparently at random. */
27657 :
27658 : static enum flt_eval_method
27659 89587275 : ix86_get_excess_precision (enum excess_precision_type type)
27660 : {
27661 89587275 : switch (type)
27662 : {
27663 85637612 : case EXCESS_PRECISION_TYPE_FAST:
27664 : /* The fastest type to promote to will always be the native type,
27665 : whether that occurs with implicit excess precision or
27666 : otherwise. */
27667 85637612 : return TARGET_AVX512FP16
27668 85637612 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
27669 85637612 : : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27670 3949582 : case EXCESS_PRECISION_TYPE_STANDARD:
27671 3949582 : case EXCESS_PRECISION_TYPE_IMPLICIT:
27672 : /* Otherwise, the excess precision we want when we are
27673 : in a standards compliant mode, and the implicit precision we
27674 : provide would be identical were it not for the unpredictable
27675 : cases. */
27676 3949582 : if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
27677 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27678 3943768 : else if (!TARGET_80387)
27679 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27680 3937646 : else if (!TARGET_MIX_SSE_I387)
27681 : {
27682 3937474 : if (!(TARGET_SSE && TARGET_SSE_MATH))
27683 : return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
27684 2950648 : else if (TARGET_SSE2)
27685 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27686 : }
27687 :
27688 : /* If we are in standards compliant mode, but we know we will
27689 : calculate in unpredictable precision, return
27690 : FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
27691 : excess precision if the target can't guarantee it will honor
27692 : it. */
27693 320 : return (type == EXCESS_PRECISION_TYPE_STANDARD
27694 320 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
27695 : : FLT_EVAL_METHOD_UNPREDICTABLE);
27696 81 : case EXCESS_PRECISION_TYPE_FLOAT16:
27697 81 : if (TARGET_80387
27698 75 : && !(TARGET_SSE_MATH && TARGET_SSE))
27699 4 : error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
27700 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27701 0 : default:
27702 0 : gcc_unreachable ();
27703 : }
27704 :
27705 : return FLT_EVAL_METHOD_UNPREDICTABLE;
27706 : }
27707 :
27708 : /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
27709 : bool
27710 350014 : ix86_bitint_type_info (int n, struct bitint_info *info)
27711 : {
27712 350014 : if (n <= 8)
27713 5535 : info->limb_mode = QImode;
27714 344479 : else if (n <= 16)
27715 1893 : info->limb_mode = HImode;
27716 342586 : else if (n <= 32 || (!TARGET_64BIT && n > 64))
27717 45555 : info->limb_mode = SImode;
27718 : else
27719 297031 : info->limb_mode = DImode;
27720 350014 : info->abi_limb_mode = info->limb_mode;
27721 350014 : info->big_endian = false;
27722 350014 : info->extended = bitint_ext_undef;
27723 350014 : return true;
27724 : }
27725 :
27726 : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
27727 : or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
27728 : based on long double bits, go with the default one for the others. */
27729 :
27730 : static machine_mode
27731 3656139 : ix86_c_mode_for_floating_type (enum tree_index ti)
27732 : {
27733 3656139 : if (ti == TI_LONG_DOUBLE_TYPE)
27734 610252 : return (TARGET_LONG_DOUBLE_64 ? DFmode
27735 610220 : : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
27736 3045887 : return default_mode_for_floating_type (ti);
27737 : }
27738 :
27739 : /* Returns modified FUNCTION_TYPE for cdtor callabi. */
27740 : tree
27741 13938 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
27742 : {
27743 13938 : if (TARGET_64BIT
27744 71 : || TARGET_RTD
27745 14009 : || ix86_function_type_abi (fntype) != MS_ABI)
27746 13938 : return fntype;
27747 : /* For 32-bit MS ABI add thiscall attribute. */
27748 0 : tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
27749 0 : TYPE_ATTRIBUTES (fntype));
27750 0 : return build_type_attribute_variant (fntype, attribs);
27751 : }
27752 :
27753 : /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
27754 : decrements by exactly 2 no matter what the position was, there is no pushb.
27755 :
27756 : But as CIE data alignment factor on this arch is -4 for 32bit targets
27757 : and -8 for 64bit targets, we need to make sure all stack pointer adjustments
27758 : are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
27759 :
27760 : poly_int64
27761 272626357 : ix86_push_rounding (poly_int64 bytes)
27762 : {
27763 352223118 : return ROUND_UP (bytes, UNITS_PER_WORD);
27764 : }
27765 :
27766 : /* Use 8 bits metadata start from bit48 for LAM_U48,
27767 : 6 bits metadat start from bit57 for LAM_U57. */
27768 : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
27769 : ? 48 \
27770 : : (ix86_lam_type == lam_u57 ? 57 : 0))
27771 : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
27772 : ? 8 \
27773 : : (ix86_lam_type == lam_u57 ? 6 : 0))
27774 :
27775 : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
27776 : bool
27777 6233777 : ix86_memtag_can_tag_addresses ()
27778 : {
27779 6233777 : return ix86_lam_type != lam_none && TARGET_LP64;
27780 : }
27781 :
27782 : /* Implement TARGET_MEMTAG_TAG_BITSIZE. */
27783 : unsigned char
27784 450 : ix86_memtag_tag_bitsize ()
27785 : {
27786 450 : return IX86_HWASAN_TAG_SIZE;
27787 : }
27788 :
27789 : /* Implement TARGET_MEMTAG_SET_TAG. */
27790 : rtx
27791 106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
27792 : {
27793 : /* default_memtag_insert_random_tag may
27794 : generate tag with value more than 6 bits. */
27795 106 : if (ix86_lam_type == lam_u57)
27796 : {
27797 106 : unsigned HOST_WIDE_INT and_imm
27798 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27799 :
27800 106 : emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27801 : }
27802 106 : tag = expand_simple_binop (Pmode, ASHIFT, tag,
27803 106 : GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27804 : /* unsignedp = */1, OPTAB_WIDEN);
27805 106 : rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27806 : /* unsignedp = */1, OPTAB_DIRECT);
27807 106 : return ret;
27808 : }
27809 :
27810 : /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27811 : rtx
27812 180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27813 : {
27814 180 : rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27815 180 : GEN_INT (IX86_HWASAN_SHIFT), target,
27816 : /* unsignedp = */0,
27817 : OPTAB_DIRECT);
27818 180 : rtx ret = gen_reg_rtx (QImode);
27819 : /* Mask off bit63 when LAM_U57. */
27820 180 : if (ix86_lam_type == lam_u57)
27821 : {
27822 180 : unsigned HOST_WIDE_INT and_imm
27823 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27824 180 : emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27825 180 : gen_int_mode (and_imm, QImode)));
27826 : }
27827 : else
27828 0 : emit_move_insn (ret, gen_lowpart (QImode, tag));
27829 180 : return ret;
27830 : }
27831 :
27832 : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27833 : rtx
27834 114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27835 : {
27836 : /* Leave bit63 alone. */
27837 114 : rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27838 114 : + (HOST_WIDE_INT_1U << 63) - 1),
27839 114 : Pmode);
27840 114 : rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27841 : tag_mask, target, true,
27842 : OPTAB_DIRECT);
27843 114 : gcc_assert (untagged_base);
27844 114 : return untagged_base;
27845 : }
27846 :
27847 : /* Implement TARGET_MEMTAG_ADD_TAG. */
27848 : rtx
27849 90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27850 : {
27851 90 : rtx base_tag = gen_reg_rtx (QImode);
27852 90 : rtx base_addr = gen_reg_rtx (Pmode);
27853 90 : rtx tagged_addr = gen_reg_rtx (Pmode);
27854 90 : rtx new_tag = gen_reg_rtx (QImode);
27855 180 : unsigned HOST_WIDE_INT and_imm
27856 90 : = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27857 :
27858 : /* When there's "overflow" in tag adding,
27859 : need to mask the most significant bit off. */
27860 90 : emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
27861 90 : emit_move_insn (base_addr,
27862 : ix86_memtag_untagged_pointer (base, NULL_RTX));
27863 90 : emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27864 90 : emit_move_insn (new_tag, base_tag);
27865 90 : emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27866 90 : emit_move_insn (tagged_addr,
27867 : ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
27868 90 : return plus_constant (Pmode, tagged_addr, offset);
27869 : }
27870 :
27871 : /* Implement TARGET_HAVE_CCMP. */
27872 : static bool
27873 8036359 : ix86_have_ccmp ()
27874 : {
27875 8036359 : return (bool) TARGET_APX_CCMP;
27876 : }
27877 :
27878 : /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27879 : static bool
27880 4547143 : ix86_mode_can_transfer_bits (machine_mode mode)
27881 : {
27882 4547143 : if (GET_MODE_CLASS (mode) == MODE_FLOAT
27883 4500650 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
27884 111510 : switch (GET_MODE_INNER (mode))
27885 : {
27886 53569 : case E_SFmode:
27887 53569 : case E_DFmode:
27888 : /* These suffer from normalization upon load when not using SSE. */
27889 53569 : return !(ix86_fpmath & FPMATH_387);
27890 : default:
27891 : return true;
27892 : }
27893 :
27894 : return true;
27895 : }
27896 :
27897 : /* Implement TARGET_REDZONE_CLOBBER. */
27898 : static rtx
27899 2 : ix86_redzone_clobber ()
27900 : {
27901 2 : cfun->machine->asm_redzone_clobber_seen = true;
27902 2 : if (ix86_using_red_zone ())
27903 : {
27904 2 : rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
27905 2 : rtx mem = gen_rtx_MEM (BLKmode, base);
27906 2 : set_mem_size (mem, RED_ZONE_SIZE);
27907 2 : return mem;
27908 : }
27909 : return NULL_RTX;
27910 : }
27911 :
27912 : /* Target-specific selftests. */
27913 :
27914 : #if CHECKING_P
27915 :
27916 : namespace selftest {
27917 :
27918 : /* Verify that hard regs are dumped as expected (in compact mode). */
27919 :
27920 : static void
27921 4 : ix86_test_dumping_hard_regs ()
27922 : {
27923 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
27924 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
27925 4 : }
27926 :
27927 : /* Test dumping an insn with repeated references to the same SCRATCH,
27928 : to verify the rtx_reuse code. */
27929 :
27930 : static void
27931 4 : ix86_test_dumping_memory_blockage ()
27932 : {
27933 4 : set_new_first_and_last_insn (NULL, NULL);
27934 :
27935 4 : rtx pat = gen_memory_blockage ();
27936 4 : rtx_reuse_manager r;
27937 4 : r.preprocess (pat);
27938 :
27939 : /* Verify that the repeated references to the SCRATCH show use
27940 : reuse IDS. The first should be prefixed with a reuse ID,
27941 : and the second should be dumped as a "reuse_rtx" of that ID.
27942 : The expected string assumes Pmode == DImode. */
27943 4 : if (Pmode == DImode)
27944 4 : ASSERT_RTL_DUMP_EQ_WITH_REUSE
27945 : ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
27946 : " (unspec:BLK [\n"
27947 : " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
27948 : " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
27949 4 : }
27950 :
27951 : /* Verify loading an RTL dump; specifically a dump of copying
27952 : a param on x86_64 from a hard reg into the frame.
27953 : This test is target-specific since the dump contains target-specific
27954 : hard reg names. */
27955 :
27956 : static void
27957 4 : ix86_test_loading_dump_fragment_1 ()
27958 : {
27959 4 : rtl_dump_test t (SELFTEST_LOCATION,
27960 4 : locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
27961 :
27962 4 : rtx_insn *insn = get_insn_by_uid (1);
27963 :
27964 : /* The block structure and indentation here is purely for
27965 : readability; it mirrors the structure of the rtx. */
27966 4 : tree mem_expr;
27967 4 : {
27968 4 : rtx pat = PATTERN (insn);
27969 4 : ASSERT_EQ (SET, GET_CODE (pat));
27970 4 : {
27971 4 : rtx dest = SET_DEST (pat);
27972 4 : ASSERT_EQ (MEM, GET_CODE (dest));
27973 : /* Verify the "/c" was parsed. */
27974 4 : ASSERT_TRUE (RTX_FLAG (dest, call));
27975 4 : ASSERT_EQ (SImode, GET_MODE (dest));
27976 4 : {
27977 4 : rtx addr = XEXP (dest, 0);
27978 4 : ASSERT_EQ (PLUS, GET_CODE (addr));
27979 4 : ASSERT_EQ (DImode, GET_MODE (addr));
27980 4 : {
27981 4 : rtx lhs = XEXP (addr, 0);
27982 : /* Verify that the "frame" REG was consolidated. */
27983 4 : ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
27984 : }
27985 4 : {
27986 4 : rtx rhs = XEXP (addr, 1);
27987 4 : ASSERT_EQ (CONST_INT, GET_CODE (rhs));
27988 4 : ASSERT_EQ (-4, INTVAL (rhs));
27989 : }
27990 : }
27991 : /* Verify the "[1 i+0 S4 A32]" was parsed. */
27992 4 : ASSERT_EQ (1, MEM_ALIAS_SET (dest));
27993 : /* "i" should have been handled by synthesizing a global int
27994 : variable named "i". */
27995 4 : mem_expr = MEM_EXPR (dest);
27996 4 : ASSERT_NE (mem_expr, NULL);
27997 4 : ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
27998 4 : ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
27999 4 : ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
28000 4 : ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
28001 : /* "+0". */
28002 4 : ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
28003 4 : ASSERT_EQ (0, MEM_OFFSET (dest));
28004 : /* "S4". */
28005 4 : ASSERT_EQ (4, MEM_SIZE (dest));
28006 : /* "A32. */
28007 4 : ASSERT_EQ (32, MEM_ALIGN (dest));
28008 : }
28009 4 : {
28010 4 : rtx src = SET_SRC (pat);
28011 4 : ASSERT_EQ (REG, GET_CODE (src));
28012 4 : ASSERT_EQ (SImode, GET_MODE (src));
28013 4 : ASSERT_EQ (5, REGNO (src));
28014 4 : tree reg_expr = REG_EXPR (src);
28015 : /* "i" here should point to the same var as for the MEM_EXPR. */
28016 4 : ASSERT_EQ (reg_expr, mem_expr);
28017 : }
28018 : }
28019 4 : }
28020 :
28021 : /* Verify that the RTL loader copes with a call_insn dump.
28022 : This test is target-specific since the dump contains a target-specific
28023 : hard reg name. */
28024 :
28025 : static void
28026 4 : ix86_test_loading_call_insn ()
28027 : {
28028 : /* The test dump includes register "xmm0", where requires TARGET_SSE
28029 : to exist. */
28030 4 : if (!TARGET_SSE)
28031 0 : return;
28032 :
28033 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
28034 :
28035 4 : rtx_insn *insn = get_insns ();
28036 4 : ASSERT_EQ (CALL_INSN, GET_CODE (insn));
28037 :
28038 : /* "/j". */
28039 4 : ASSERT_TRUE (RTX_FLAG (insn, jump));
28040 :
28041 4 : rtx pat = PATTERN (insn);
28042 4 : ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
28043 :
28044 : /* Verify REG_NOTES. */
28045 4 : {
28046 : /* "(expr_list:REG_CALL_DECL". */
28047 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
28048 4 : rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
28049 4 : ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
28050 :
28051 : /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
28052 4 : rtx_expr_list *note1 = note0->next ();
28053 4 : ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
28054 :
28055 4 : ASSERT_EQ (NULL, note1->next ());
28056 : }
28057 :
28058 : /* Verify CALL_INSN_FUNCTION_USAGE. */
28059 4 : {
28060 : /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
28061 4 : rtx_expr_list *usage
28062 4 : = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
28063 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
28064 4 : ASSERT_EQ (DFmode, GET_MODE (usage));
28065 4 : ASSERT_EQ (USE, GET_CODE (usage->element ()));
28066 4 : ASSERT_EQ (NULL, usage->next ());
28067 : }
28068 4 : }
28069 :
28070 : /* Verify that the RTL loader copes a dump from print_rtx_function.
28071 : This test is target-specific since the dump contains target-specific
28072 : hard reg names. */
28073 :
28074 : static void
28075 4 : ix86_test_loading_full_dump ()
28076 : {
28077 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
28078 :
28079 4 : ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28080 :
28081 4 : rtx_insn *insn_1 = get_insn_by_uid (1);
28082 4 : ASSERT_EQ (NOTE, GET_CODE (insn_1));
28083 :
28084 4 : rtx_insn *insn_7 = get_insn_by_uid (7);
28085 4 : ASSERT_EQ (INSN, GET_CODE (insn_7));
28086 4 : ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
28087 :
28088 4 : rtx_insn *insn_15 = get_insn_by_uid (15);
28089 4 : ASSERT_EQ (INSN, GET_CODE (insn_15));
28090 4 : ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
28091 :
28092 : /* Verify crtl->return_rtx. */
28093 4 : ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
28094 4 : ASSERT_EQ (0, REGNO (crtl->return_rtx));
28095 4 : ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
28096 4 : }
28097 :
28098 : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
28099 : In particular, verify that it correctly loads the 2nd operand.
28100 : This test is target-specific since these are machine-specific
28101 : operands (and enums). */
28102 :
28103 : static void
28104 4 : ix86_test_loading_unspec ()
28105 : {
28106 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
28107 :
28108 4 : ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28109 :
28110 4 : ASSERT_TRUE (cfun);
28111 :
28112 : /* Test of an UNSPEC. */
28113 4 : rtx_insn *insn = get_insns ();
28114 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28115 4 : rtx set = single_set (insn);
28116 4 : ASSERT_NE (NULL, set);
28117 4 : rtx dst = SET_DEST (set);
28118 4 : ASSERT_EQ (MEM, GET_CODE (dst));
28119 4 : rtx src = SET_SRC (set);
28120 4 : ASSERT_EQ (UNSPEC, GET_CODE (src));
28121 4 : ASSERT_EQ (BLKmode, GET_MODE (src));
28122 4 : ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
28123 :
28124 4 : rtx v0 = XVECEXP (src, 0, 0);
28125 :
28126 : /* Verify that the two uses of the first SCRATCH have pointer
28127 : equality. */
28128 4 : rtx scratch_a = XEXP (dst, 0);
28129 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
28130 :
28131 4 : rtx scratch_b = XEXP (v0, 0);
28132 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
28133 :
28134 4 : ASSERT_EQ (scratch_a, scratch_b);
28135 :
28136 : /* Verify that the two mems are thus treated as equal. */
28137 4 : ASSERT_TRUE (rtx_equal_p (dst, v0));
28138 :
28139 : /* Verify that the insn is recognized. */
28140 4 : ASSERT_NE(-1, recog_memoized (insn));
28141 :
28142 : /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
28143 4 : insn = NEXT_INSN (insn);
28144 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28145 :
28146 4 : set = single_set (insn);
28147 4 : ASSERT_NE (NULL, set);
28148 :
28149 4 : src = SET_SRC (set);
28150 4 : ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
28151 4 : ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
28152 4 : }
28153 :
28154 : /* Run all target-specific selftests. */
28155 :
28156 : static void
28157 4 : ix86_run_selftests (void)
28158 : {
28159 4 : ix86_test_dumping_hard_regs ();
28160 4 : ix86_test_dumping_memory_blockage ();
28161 :
28162 : /* Various tests of loading RTL dumps, here because they contain
28163 : ix86-isms (e.g. names of hard regs). */
28164 4 : ix86_test_loading_dump_fragment_1 ();
28165 4 : ix86_test_loading_call_insn ();
28166 4 : ix86_test_loading_full_dump ();
28167 4 : ix86_test_loading_unspec ();
28168 4 : }
28169 :
28170 : } // namespace selftest
28171 :
28172 : #endif /* CHECKING_P */
28173 :
28174 : static const scoped_attribute_specs *const ix86_attribute_table[] =
28175 : {
28176 : &ix86_gnu_attribute_table
28177 : };
28178 :
28179 : /* Initialize the GCC target structure. */
28180 : #undef TARGET_RETURN_IN_MEMORY
28181 : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28182 :
28183 : #undef TARGET_LEGITIMIZE_ADDRESS
28184 : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
28185 :
28186 : #undef TARGET_ATTRIBUTE_TABLE
28187 : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28188 : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
28189 : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
28190 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28191 : # undef TARGET_MERGE_DECL_ATTRIBUTES
28192 : # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28193 : #endif
28194 :
28195 : #undef TARGET_INVALID_CONVERSION
28196 : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
28197 :
28198 : #undef TARGET_INVALID_UNARY_OP
28199 : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
28200 :
28201 : #undef TARGET_INVALID_BINARY_OP
28202 : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
28203 :
28204 : #undef TARGET_COMP_TYPE_ATTRIBUTES
28205 : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28206 :
28207 : #undef TARGET_INIT_BUILTINS
28208 : #define TARGET_INIT_BUILTINS ix86_init_builtins
28209 : #undef TARGET_BUILTIN_DECL
28210 : #define TARGET_BUILTIN_DECL ix86_builtin_decl
28211 : #undef TARGET_EXPAND_BUILTIN
28212 : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28213 :
28214 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28215 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
28216 : ix86_builtin_vectorized_function
28217 :
28218 : #undef TARGET_VECTORIZE_BUILTIN_GATHER
28219 : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
28220 :
28221 : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
28222 : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
28223 :
28224 : #undef TARGET_BUILTIN_RECIPROCAL
28225 : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
28226 :
28227 : #undef TARGET_ASM_FUNCTION_EPILOGUE
28228 : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
28229 :
28230 : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
28231 : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
28232 : ix86_print_patchable_function_entry
28233 :
28234 : #undef TARGET_ENCODE_SECTION_INFO
28235 : #ifndef SUBTARGET_ENCODE_SECTION_INFO
28236 : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28237 : #else
28238 : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28239 : #endif
28240 :
28241 : #undef TARGET_ASM_OPEN_PAREN
28242 : #define TARGET_ASM_OPEN_PAREN ""
28243 : #undef TARGET_ASM_CLOSE_PAREN
28244 : #define TARGET_ASM_CLOSE_PAREN ""
28245 :
28246 : #undef TARGET_ASM_BYTE_OP
28247 : #define TARGET_ASM_BYTE_OP ASM_BYTE
28248 :
28249 : #undef TARGET_ASM_ALIGNED_HI_OP
28250 : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28251 : #undef TARGET_ASM_ALIGNED_SI_OP
28252 : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28253 : #ifdef ASM_QUAD
28254 : #undef TARGET_ASM_ALIGNED_DI_OP
28255 : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28256 : #endif
28257 :
28258 : #undef TARGET_PROFILE_BEFORE_PROLOGUE
28259 : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
28260 :
28261 : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
28262 : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
28263 :
28264 : #undef TARGET_ASM_UNALIGNED_HI_OP
28265 : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28266 : #undef TARGET_ASM_UNALIGNED_SI_OP
28267 : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28268 : #undef TARGET_ASM_UNALIGNED_DI_OP
28269 : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28270 :
28271 : #undef TARGET_PRINT_OPERAND
28272 : #define TARGET_PRINT_OPERAND ix86_print_operand
28273 : #undef TARGET_PRINT_OPERAND_ADDRESS
28274 : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
28275 : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
28276 : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
28277 : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
28278 : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
28279 :
28280 : #undef TARGET_SCHED_INIT_GLOBAL
28281 : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
28282 : #undef TARGET_SCHED_ADJUST_COST
28283 : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28284 : #undef TARGET_SCHED_ISSUE_RATE
28285 : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28286 : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28287 : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28288 : ia32_multipass_dfa_lookahead
28289 : #undef TARGET_SCHED_MACRO_FUSION_P
28290 : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
28291 : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
28292 : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
28293 :
28294 : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28295 : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28296 :
28297 : #undef TARGET_MEMMODEL_CHECK
28298 : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
28299 :
28300 : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
28301 : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
28302 :
28303 : #ifdef HAVE_AS_TLS
28304 : #undef TARGET_HAVE_TLS
28305 : #define TARGET_HAVE_TLS true
28306 : #endif
28307 : #undef TARGET_CANNOT_FORCE_CONST_MEM
28308 : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28309 : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28310 : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28311 :
28312 : #undef TARGET_DELEGITIMIZE_ADDRESS
28313 : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28314 :
28315 : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
28316 : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
28317 :
28318 : #undef TARGET_MS_BITFIELD_LAYOUT_P
28319 : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28320 :
28321 : #if TARGET_MACHO
28322 : #undef TARGET_BINDS_LOCAL_P
28323 : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28324 : #else
28325 : #undef TARGET_BINDS_LOCAL_P
28326 : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
28327 : #endif
28328 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28329 : #undef TARGET_BINDS_LOCAL_P
28330 : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28331 : #endif
28332 :
28333 : #undef TARGET_ASM_OUTPUT_MI_THUNK
28334 : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28335 : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28336 : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28337 :
28338 : #undef TARGET_ASM_FILE_START
28339 : #define TARGET_ASM_FILE_START x86_file_start
28340 :
28341 : #undef TARGET_OPTION_OVERRIDE
28342 : #define TARGET_OPTION_OVERRIDE ix86_option_override
28343 :
28344 : #undef TARGET_REGISTER_MOVE_COST
28345 : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
28346 : #undef TARGET_MEMORY_MOVE_COST
28347 : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
28348 : #undef TARGET_RTX_COSTS
28349 : #define TARGET_RTX_COSTS ix86_rtx_costs
28350 : #undef TARGET_INSN_COST
28351 : #define TARGET_INSN_COST ix86_insn_cost
28352 : #undef TARGET_ADDRESS_COST
28353 : #define TARGET_ADDRESS_COST ix86_address_cost
28354 :
28355 : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
28356 : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
28357 : ix86_use_by_pieces_infrastructure_p
28358 :
28359 : #undef TARGET_OVERLAP_OP_BY_PIECES_P
28360 : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
28361 :
28362 : #undef TARGET_FLAGS_REGNUM
28363 : #define TARGET_FLAGS_REGNUM FLAGS_REG
28364 : #undef TARGET_FIXED_CONDITION_CODE_REGS
28365 : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28366 : #undef TARGET_CC_MODES_COMPATIBLE
28367 : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28368 :
28369 : #undef TARGET_MACHINE_DEPENDENT_REORG
28370 : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28371 :
28372 : #undef TARGET_BUILD_BUILTIN_VA_LIST
28373 : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28374 :
28375 : #undef TARGET_FOLD_BUILTIN
28376 : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
28377 :
28378 : #undef TARGET_GIMPLE_FOLD_BUILTIN
28379 : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
28380 :
28381 : #undef TARGET_COMPARE_VERSION_PRIORITY
28382 : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
28383 :
28384 : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
28385 : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
28386 : ix86_generate_version_dispatcher_body
28387 :
28388 : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
28389 : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
28390 : ix86_get_function_versions_dispatcher
28391 :
28392 : #undef TARGET_ENUM_VA_LIST_P
28393 : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
28394 :
28395 : #undef TARGET_FN_ABI_VA_LIST
28396 : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28397 :
28398 : #undef TARGET_CANONICAL_VA_LIST_TYPE
28399 : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28400 :
28401 : #undef TARGET_EXPAND_BUILTIN_VA_START
28402 : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28403 :
28404 : #undef TARGET_MD_ASM_ADJUST
28405 : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
28406 :
28407 : #undef TARGET_C_EXCESS_PRECISION
28408 : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
28409 : #undef TARGET_C_BITINT_TYPE_INFO
28410 : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
28411 : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
28412 : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
28413 : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
28414 : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
28415 : #undef TARGET_PROMOTE_PROTOTYPES
28416 : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28417 : #undef TARGET_PUSH_ARGUMENT
28418 : #define TARGET_PUSH_ARGUMENT ix86_push_argument
28419 : #undef TARGET_SETUP_INCOMING_VARARGS
28420 : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28421 : #undef TARGET_MUST_PASS_IN_STACK
28422 : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28423 : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
28424 : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
28425 : #undef TARGET_FUNCTION_ARG_ADVANCE
28426 : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
28427 : #undef TARGET_FUNCTION_ARG
28428 : #define TARGET_FUNCTION_ARG ix86_function_arg
28429 : #undef TARGET_INIT_PIC_REG
28430 : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
28431 : #undef TARGET_USE_PSEUDO_PIC_REG
28432 : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
28433 : #undef TARGET_FUNCTION_ARG_BOUNDARY
28434 : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
28435 : #undef TARGET_PASS_BY_REFERENCE
28436 : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28437 : #undef TARGET_INTERNAL_ARG_POINTER
28438 : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28439 : #undef TARGET_UPDATE_STACK_BOUNDARY
28440 : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28441 : #undef TARGET_GET_DRAP_RTX
28442 : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28443 : #undef TARGET_STRICT_ARGUMENT_NAMING
28444 : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28445 : #undef TARGET_STATIC_CHAIN
28446 : #define TARGET_STATIC_CHAIN ix86_static_chain
28447 : #undef TARGET_TRAMPOLINE_INIT
28448 : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
28449 : #undef TARGET_RETURN_POPS_ARGS
28450 : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
28451 :
28452 : #undef TARGET_WARN_FUNC_RETURN
28453 : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
28454 :
28455 : #undef TARGET_LEGITIMATE_COMBINED_INSN
28456 : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
28457 :
28458 : #undef TARGET_ASAN_SHADOW_OFFSET
28459 : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
28460 :
28461 : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
28462 : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28463 :
28464 : #undef TARGET_SCALAR_MODE_SUPPORTED_P
28465 : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28466 :
28467 : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
28468 : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
28469 : ix86_libgcc_floating_mode_supported_p
28470 :
28471 : #undef TARGET_VECTOR_MODE_SUPPORTED_P
28472 : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28473 :
28474 : #undef TARGET_C_MODE_FOR_SUFFIX
28475 : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
28476 :
28477 : #ifdef HAVE_AS_TLS
28478 : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
28479 : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
28480 : #endif
28481 :
28482 : #ifdef SUBTARGET_INSERT_ATTRIBUTES
28483 : #undef TARGET_INSERT_ATTRIBUTES
28484 : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
28485 : #endif
28486 :
28487 : #undef TARGET_MANGLE_TYPE
28488 : #define TARGET_MANGLE_TYPE ix86_mangle_type
28489 :
28490 : #undef TARGET_EMIT_SUPPORT_TINFOS
28491 : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
28492 :
28493 : #undef TARGET_STACK_PROTECT_GUARD
28494 : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
28495 :
28496 : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
28497 : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
28498 : ix86_stack_protect_runtime_enabled_p
28499 :
28500 : #if !TARGET_MACHO
28501 : #undef TARGET_STACK_PROTECT_FAIL
28502 : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
28503 : #endif
28504 :
28505 : #undef TARGET_FUNCTION_VALUE
28506 : #define TARGET_FUNCTION_VALUE ix86_function_value
28507 :
28508 : #undef TARGET_FUNCTION_VALUE_REGNO_P
28509 : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
28510 :
28511 : #undef TARGET_ZERO_CALL_USED_REGS
28512 : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
28513 :
28514 : #undef TARGET_PROMOTE_FUNCTION_MODE
28515 : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
28516 :
28517 : #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
28518 : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
28519 :
28520 : #undef TARGET_MEMBER_TYPE_FORCES_BLK
28521 : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
28522 :
28523 : #undef TARGET_INSTANTIATE_DECLS
28524 : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
28525 :
28526 : #undef TARGET_SECONDARY_RELOAD
28527 : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
28528 : #undef TARGET_SECONDARY_MEMORY_NEEDED
28529 : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
28530 : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
28531 : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
28532 :
28533 : #undef TARGET_CLASS_MAX_NREGS
28534 : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
28535 :
28536 : #undef TARGET_PREFERRED_RELOAD_CLASS
28537 : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
28538 : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
28539 : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
28540 : /* When this hook returns true for MODE, the compiler allows
28541 : registers explicitly used in the rtl to be used as spill registers
28542 : but prevents the compiler from extending the lifetime of these
28543 : registers. */
28544 : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
28545 : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
28546 : #undef TARGET_CLASS_LIKELY_SPILLED_P
28547 : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
28548 : #undef TARGET_CALLEE_SAVE_COST
28549 : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
28550 :
28551 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
28552 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
28553 : ix86_builtin_vectorization_cost
28554 : #undef TARGET_VECTORIZE_VEC_PERM_CONST
28555 : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
28556 : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
28557 : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
28558 : ix86_preferred_simd_mode
28559 : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
28560 : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
28561 : ix86_split_reduction
28562 : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
28563 : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
28564 : ix86_autovectorize_vector_modes
28565 : #undef TARGET_VECTORIZE_GET_MASK_MODE
28566 : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
28567 : #undef TARGET_VECTORIZE_CREATE_COSTS
28568 : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
28569 :
28570 : #undef TARGET_SET_CURRENT_FUNCTION
28571 : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
28572 :
28573 : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
28574 : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
28575 :
28576 : #undef TARGET_OPTION_SAVE
28577 : #define TARGET_OPTION_SAVE ix86_function_specific_save
28578 :
28579 : #undef TARGET_OPTION_RESTORE
28580 : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
28581 :
28582 : #undef TARGET_OPTION_POST_STREAM_IN
28583 : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
28584 :
28585 : #undef TARGET_OPTION_PRINT
28586 : #define TARGET_OPTION_PRINT ix86_function_specific_print
28587 :
28588 : #undef TARGET_CAN_INLINE_P
28589 : #define TARGET_CAN_INLINE_P ix86_can_inline_p
28590 :
28591 : #undef TARGET_LEGITIMATE_ADDRESS_P
28592 : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
28593 :
28594 : #undef TARGET_REGISTER_PRIORITY
28595 : #define TARGET_REGISTER_PRIORITY ix86_register_priority
28596 :
28597 : #undef TARGET_REGISTER_USAGE_LEVELING_P
28598 : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
28599 :
28600 : #undef TARGET_LEGITIMATE_CONSTANT_P
28601 : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
28602 :
28603 : #undef TARGET_COMPUTE_FRAME_LAYOUT
28604 : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
28605 :
28606 : #undef TARGET_FRAME_POINTER_REQUIRED
28607 : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
28608 :
28609 : #undef TARGET_CAN_ELIMINATE
28610 : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
28611 :
28612 : #undef TARGET_EXTRA_LIVE_ON_ENTRY
28613 : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
28614 :
28615 : #undef TARGET_ASM_CODE_END
28616 : #define TARGET_ASM_CODE_END ix86_code_end
28617 :
28618 : #undef TARGET_CONDITIONAL_REGISTER_USAGE
28619 : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
28620 :
28621 : #undef TARGET_CANONICALIZE_COMPARISON
28622 : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
28623 :
28624 : #undef TARGET_LOOP_UNROLL_ADJUST
28625 : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
28626 :
28627 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
28628 : #undef TARGET_SPILL_CLASS
28629 : #define TARGET_SPILL_CLASS ix86_spill_class
28630 :
28631 : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
28632 : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
28633 : ix86_simd_clone_compute_vecsize_and_simdlen
28634 :
28635 : #undef TARGET_SIMD_CLONE_ADJUST
28636 : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
28637 :
28638 : #undef TARGET_SIMD_CLONE_USABLE
28639 : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
28640 :
28641 : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
28642 : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
28643 :
28644 : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
28645 : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
28646 : ix86_float_exceptions_rounding_supported_p
28647 :
28648 : #undef TARGET_MODE_EMIT
28649 : #define TARGET_MODE_EMIT ix86_emit_mode_set
28650 :
28651 : #undef TARGET_MODE_NEEDED
28652 : #define TARGET_MODE_NEEDED ix86_mode_needed
28653 :
28654 : #undef TARGET_MODE_AFTER
28655 : #define TARGET_MODE_AFTER ix86_mode_after
28656 :
28657 : #undef TARGET_MODE_ENTRY
28658 : #define TARGET_MODE_ENTRY ix86_mode_entry
28659 :
28660 : #undef TARGET_MODE_EXIT
28661 : #define TARGET_MODE_EXIT ix86_mode_exit
28662 :
28663 : #undef TARGET_MODE_PRIORITY
28664 : #define TARGET_MODE_PRIORITY ix86_mode_priority
28665 :
28666 : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
28667 : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
28668 :
28669 : #undef TARGET_OFFLOAD_OPTIONS
28670 : #define TARGET_OFFLOAD_OPTIONS \
28671 : ix86_offload_options
28672 :
28673 : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
28674 : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
28675 :
28676 : #undef TARGET_OPTAB_SUPPORTED_P
28677 : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
28678 :
28679 : #undef TARGET_HARD_REGNO_SCRATCH_OK
28680 : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
28681 :
28682 : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
28683 : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
28684 :
28685 : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
28686 : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
28687 :
28688 : #undef TARGET_INIT_LIBFUNCS
28689 : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
28690 :
28691 : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
28692 : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
28693 :
28694 : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
28695 : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
28696 :
28697 : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
28698 : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
28699 :
28700 : #undef TARGET_HARD_REGNO_NREGS
28701 : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
28702 : #undef TARGET_HARD_REGNO_MODE_OK
28703 : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
28704 :
28705 : #undef TARGET_MODES_TIEABLE_P
28706 : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
28707 :
28708 : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
28709 : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
28710 : ix86_hard_regno_call_part_clobbered
28711 :
28712 : #undef TARGET_INSN_CALLEE_ABI
28713 : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
28714 :
28715 : #undef TARGET_CAN_CHANGE_MODE_CLASS
28716 : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
28717 :
28718 : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
28719 : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
28720 :
28721 : #undef TARGET_STATIC_RTX_ALIGNMENT
28722 : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
28723 : #undef TARGET_CONSTANT_ALIGNMENT
28724 : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
28725 :
28726 : #undef TARGET_EMPTY_RECORD_P
28727 : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
28728 :
28729 : #undef TARGET_WARN_PARAMETER_PASSING_ABI
28730 : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
28731 :
28732 : #undef TARGET_GET_MULTILIB_ABI_NAME
28733 : #define TARGET_GET_MULTILIB_ABI_NAME \
28734 : ix86_get_multilib_abi_name
28735 :
28736 : #undef TARGET_IFUNC_REF_LOCAL_OK
28737 : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
28738 :
28739 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
28740 : # undef TARGET_ASM_RELOC_RW_MASK
28741 : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
28742 : #endif
28743 :
28744 : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
28745 : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
28746 :
28747 : #undef TARGET_MEMTAG_ADD_TAG
28748 : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
28749 :
28750 : #undef TARGET_MEMTAG_SET_TAG
28751 : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
28752 :
28753 : #undef TARGET_MEMTAG_EXTRACT_TAG
28754 : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
28755 :
28756 : #undef TARGET_MEMTAG_UNTAGGED_POINTER
28757 : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
28758 :
28759 : #undef TARGET_MEMTAG_TAG_BITSIZE
28760 : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
28761 :
28762 : #undef TARGET_GEN_CCMP_FIRST
28763 : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
28764 :
28765 : #undef TARGET_GEN_CCMP_NEXT
28766 : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
28767 :
28768 : #undef TARGET_HAVE_CCMP
28769 : #define TARGET_HAVE_CCMP ix86_have_ccmp
28770 :
28771 : #undef TARGET_MODE_CAN_TRANSFER_BITS
28772 : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
28773 :
28774 : #undef TARGET_REDZONE_CLOBBER
28775 : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
28776 :
28777 : static bool
28778 96365 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
28779 : {
28780 : #ifdef OPTION_GLIBC
28781 96365 : if (OPTION_GLIBC)
28782 96365 : return (built_in_function)fcode == BUILT_IN_MEMPCPY;
28783 : else
28784 : return false;
28785 : #else
28786 : return false;
28787 : #endif
28788 : }
28789 :
28790 : #undef TARGET_LIBC_HAS_FAST_FUNCTION
28791 : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
28792 :
28793 : static unsigned
28794 78698 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
28795 : bool boundary_p)
28796 : {
28797 : #ifdef OPTION_GLIBC
28798 78698 : bool glibc_p = OPTION_GLIBC;
28799 : #else
28800 : bool glibc_p = false;
28801 : #endif
28802 78698 : if (glibc_p)
28803 : {
28804 : /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28805 78698 : unsigned int libmvec_ret = 0;
28806 78698 : if (!flag_trapping_math
28807 8296 : && flag_unsafe_math_optimizations
28808 3374 : && flag_finite_math_only
28809 3348 : && !flag_signed_zeros
28810 3348 : && !flag_errno_math)
28811 3348 : switch (cfn)
28812 : {
28813 1396 : CASE_CFN_COS:
28814 1396 : CASE_CFN_COS_FN:
28815 1396 : CASE_CFN_SIN:
28816 1396 : CASE_CFN_SIN_FN:
28817 1396 : if (!boundary_p)
28818 : {
28819 : /* With non-default rounding modes, libmvec provides
28820 : complete garbage in results. E.g.
28821 : _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28822 : returns 0.00333309174f rather than 1.40129846e-45f. */
28823 587 : if (flag_rounding_math)
28824 : return ~0U;
28825 : /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28826 : claims libmvec maximum error is 4ulps.
28827 : My own random testing indicates 2ulps for SFmode and
28828 : 0.5ulps for DFmode, but let's go with the 4ulps. */
28829 : libmvec_ret = 4;
28830 : }
28831 : break;
28832 : default:
28833 : break;
28834 : }
28835 78698 : unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28836 : boundary_p);
28837 78698 : return MAX (ret, libmvec_ret);
28838 : }
28839 0 : return default_libm_function_max_error (cfn, mode, boundary_p);
28840 : }
28841 :
28842 : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
28843 : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28844 :
28845 : #if TARGET_MACHO
28846 : static bool
28847 : ix86_cannot_copy_insn_p (rtx_insn *insn)
28848 : {
28849 : if (TARGET_64BIT)
28850 : return false;
28851 :
28852 : rtx set = single_set (insn);
28853 : if (set)
28854 : {
28855 : rtx src = SET_SRC (set);
28856 : if (GET_CODE (src) == UNSPEC
28857 : && XINT (src, 1) == UNSPEC_SET_GOT)
28858 : return true;
28859 : }
28860 : return false;
28861 : }
28862 :
28863 : #undef TARGET_CANNOT_COPY_INSN_P
28864 : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28865 :
28866 : #endif
28867 :
28868 : #if CHECKING_P
28869 : #undef TARGET_RUN_TARGET_SELFTESTS
28870 : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28871 : #endif /* #if CHECKING_P */
28872 :
28873 : #undef TARGET_DOCUMENTATION_NAME
28874 : #define TARGET_DOCUMENTATION_NAME "x86"
28875 :
28876 : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28877 : sbitmap
28878 737546 : ix86_get_separate_components (void)
28879 : {
28880 737546 : HOST_WIDE_INT offset, to_allocate;
28881 737546 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28882 737546 : bitmap_clear (components);
28883 737546 : struct machine_function *m = cfun->machine;
28884 :
28885 737546 : offset = m->frame.stack_pointer_offset;
28886 737546 : to_allocate = offset - m->frame.sse_reg_save_offset;
28887 :
28888 : /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
28889 : Experiments show that APX PPX can speed up the prologue. If the function
28890 : does not exit early during actual execution, then using APX PPX is faster.
28891 : If the function always exits early during actual execution, then shrink
28892 : wrap separate reduces the number of MOV (PUSH/POP) instructions actually
28893 : executed, thus speeding up execution.
28894 : foo:
28895 : movl $1, %eax
28896 : testq %rdi, %rdi
28897 : jne.L60
28898 : ret ---> early return.
28899 : .L60:
28900 : subq $88, %rsp ---> belong to prologue.
28901 : xorl %eax, %eax
28902 : movq %rbx, 40 (%rsp) ---> belong to prologue.
28903 : movq 8 (%rdi), %rbx
28904 : movq %rbp, 48 (%rsp) ---> belong to prologue.
28905 : movq %rdi, %rbp
28906 : testq %rbx, %rbx
28907 : jne.L61
28908 : movq 40 (%rsp), %rbx
28909 : movq 48 (%rsp), %rbp
28910 : addq $88, %rsp
28911 : ret
28912 : .L61:
28913 : movq %r12, 56 (%rsp) ---> belong to prologue.
28914 : movq %r13, 64 (%rsp) ---> belong to prologue.
28915 : movq %r14, 72 (%rsp) ---> belong to prologue.
28916 : ... ...
28917 :
28918 : Disable shrink wrap separate when PPX is enabled. */
28919 737546 : if ((TARGET_APX_PPX && !crtl->calls_eh_return)
28920 737079 : || cfun->machine->func_type != TYPE_NORMAL
28921 : || TARGET_SEH
28922 736981 : || crtl->stack_realign_needed
28923 727320 : || m->call_ms2sysv)
28924 : return components;
28925 :
28926 : /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
28927 : Disable shrink wrap separate when MOV is prohibited. */
28928 725398 : if (save_regs_using_push_pop (to_allocate))
28929 : return components;
28930 :
28931 32673411 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28932 32322084 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
28933 : {
28934 : /* Skip registers with large offsets, where a pseudo may be needed. */
28935 602593 : if (IN_RANGE (offset, -0x8000, 0x7fff))
28936 601520 : bitmap_set_bit (components, regno);
28937 648467 : offset += UNITS_PER_WORD;
28938 : }
28939 :
28940 : /* Don't mess with the following registers. */
28941 351327 : if (frame_pointer_needed)
28942 6337 : bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
28943 :
28944 351327 : if (crtl->drap_reg)
28945 129 : bitmap_clear_bit (components, REGNO (crtl->drap_reg));
28946 :
28947 351327 : if (pic_offset_table_rtx)
28948 29860 : bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
28949 :
28950 : return components;
28951 : }
28952 :
28953 : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
28954 : sbitmap
28955 9415806 : ix86_components_for_bb (basic_block bb)
28956 : {
28957 9415806 : bitmap in = DF_LIVE_IN (bb);
28958 9415806 : bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
28959 9415806 : bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
28960 :
28961 9415806 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28962 9415806 : bitmap_clear (components);
28963 :
28964 9415806 : function_abi_aggregator callee_abis;
28965 9415806 : rtx_insn *insn;
28966 110427935 : FOR_BB_INSNS (bb, insn)
28967 101012129 : if (CALL_P (insn))
28968 3080298 : callee_abis.note_callee_abi (insn_callee_abi (insn));
28969 9415806 : HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
28970 :
28971 : /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
28972 875669958 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28973 866254152 : if (!fixed_regs[regno]
28974 866254152 : && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
28975 440299364 : || bitmap_bit_p (in, regno)
28976 414583942 : || bitmap_bit_p (gen, regno)
28977 402028717 : || bitmap_bit_p (kill, regno)))
28978 38535652 : bitmap_set_bit (components, regno);
28979 :
28980 9415806 : return components;
28981 : }
28982 :
28983 : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
28984 : void
28985 476221 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
28986 : {
28987 : /* Nothing to do for x86. */
28988 476221 : }
28989 :
28990 : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
28991 : void
28992 168833 : ix86_emit_prologue_components (sbitmap components)
28993 : {
28994 168833 : HOST_WIDE_INT cfa_offset;
28995 168833 : struct machine_function *m = cfun->machine;
28996 :
28997 168833 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
28998 168833 : - m->frame.stack_pointer_offset;
28999 15701469 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29000 15532636 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29001 : {
29002 769764 : if (bitmap_bit_p (components, regno))
29003 194584 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
29004 821003 : cfa_offset -= UNITS_PER_WORD;
29005 : }
29006 168833 : }
29007 :
29008 : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29009 : void
29010 150912 : ix86_emit_epilogue_components (sbitmap components)
29011 : {
29012 150912 : HOST_WIDE_INT cfa_offset;
29013 150912 : struct machine_function *m = cfun->machine;
29014 150912 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
29015 150912 : - m->frame.stack_pointer_offset;
29016 :
29017 14034816 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29018 13883904 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29019 : {
29020 690120 : if (bitmap_bit_p (components, regno))
29021 : {
29022 260929 : rtx reg = gen_rtx_REG (word_mode, regno);
29023 260929 : rtx mem;
29024 260929 : rtx_insn *insn;
29025 :
29026 260929 : mem = choose_baseaddr (cfa_offset, NULL);
29027 260929 : mem = gen_frame_mem (word_mode, mem);
29028 260929 : insn = emit_move_insn (reg, mem);
29029 :
29030 260929 : RTX_FRAME_RELATED_P (insn) = 1;
29031 260929 : add_reg_note (insn, REG_CFA_RESTORE, reg);
29032 : }
29033 748013 : cfa_offset -= UNITS_PER_WORD;
29034 : }
29035 150912 : }
29036 :
29037 : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29038 : void
29039 44753 : ix86_set_handled_components (sbitmap components)
29040 : {
29041 4162029 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29042 4117276 : if (bitmap_bit_p (components, regno))
29043 : {
29044 105000 : cfun->machine->reg_is_wrapped_separately[regno] = true;
29045 105000 : cfun->machine->use_fast_prologue_epilogue = true;
29046 105000 : cfun->machine->frame.save_regs_using_mov = true;
29047 : }
29048 44753 : }
29049 :
29050 : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
29051 : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
29052 : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
29053 : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
29054 : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
29055 : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
29056 : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
29057 : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
29058 : ix86_emit_prologue_components
29059 : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
29060 : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
29061 : ix86_emit_epilogue_components
29062 : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
29063 : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
29064 :
29065 : struct gcc_target targetm = TARGET_INITIALIZER;
29066 :
29067 : #include "gt-i386.h"
|