Line data Source code
1 : /* Subroutines used for code generation on IA-32.
2 : Copyright (C) 1988-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #define INCLUDE_STRING
21 : #define IN_TARGET_CODE 1
22 :
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "memmodel.h"
30 : #include "gimple.h"
31 : #include "cfghooks.h"
32 : #include "cfgloop.h"
33 : #include "df.h"
34 : #include "tm_p.h"
35 : #include "stringpool.h"
36 : #include "expmed.h"
37 : #include "optabs.h"
38 : #include "regs.h"
39 : #include "emit-rtl.h"
40 : #include "recog.h"
41 : #include "cgraph.h"
42 : #include "diagnostic.h"
43 : #include "cfgbuild.h"
44 : #include "alias.h"
45 : #include "fold-const.h"
46 : #include "attribs.h"
47 : #include "calls.h"
48 : #include "stor-layout.h"
49 : #include "varasm.h"
50 : #include "output.h"
51 : #include "insn-attr.h"
52 : #include "flags.h"
53 : #include "except.h"
54 : #include "explow.h"
55 : #include "expr.h"
56 : #include "cfgrtl.h"
57 : #include "common/common-target.h"
58 : #include "langhooks.h"
59 : #include "reload.h"
60 : #include "gimplify.h"
61 : #include "dwarf2.h"
62 : #include "tm-constrs.h"
63 : #include "cselib.h"
64 : #include "sched-int.h"
65 : #include "opts.h"
66 : #include "tree-pass.h"
67 : #include "context.h"
68 : #include "pass_manager.h"
69 : #include "target-globals.h"
70 : #include "gimple-iterator.h"
71 : #include "gimple-fold.h"
72 : #include "tree-vectorizer.h"
73 : #include "shrink-wrap.h"
74 : #include "builtins.h"
75 : #include "rtl-iter.h"
76 : #include "tree-iterator.h"
77 : #include "dbgcnt.h"
78 : #include "case-cfn-macros.h"
79 : #include "dojump.h"
80 : #include "fold-const-call.h"
81 : #include "tree-vrp.h"
82 : #include "tree-ssanames.h"
83 : #include "selftest.h"
84 : #include "selftest-rtl.h"
85 : #include "print-rtl.h"
86 : #include "intl.h"
87 : #include "ifcvt.h"
88 : #include "symbol-summary.h"
89 : #include "sreal.h"
90 : #include "ipa-cp.h"
91 : #include "ipa-prop.h"
92 : #include "ipa-fnsummary.h"
93 : #include "wide-int-bitmask.h"
94 : #include "tree-vector-builder.h"
95 : #include "debug.h"
96 : #include "dwarf2out.h"
97 : #include "i386-options.h"
98 : #include "i386-builtins.h"
99 : #include "i386-expand.h"
100 : #include "i386-features.h"
101 : #include "function-abi.h"
102 : #include "rtl-error.h"
103 : #include "gimple-pretty-print.h"
104 :
105 : /* This file should be included last. */
106 : #include "target-def.h"
107 :
108 : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109 : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110 :
111 :
112 : #ifndef CHECK_STACK_LIMIT
113 : #define CHECK_STACK_LIMIT (-1)
114 : #endif
115 :
116 : /* Return index of given mode in mult and division cost tables. */
117 : #define MODE_INDEX(mode) \
118 : ((mode) == QImode ? 0 \
119 : : (mode) == HImode ? 1 \
120 : : (mode) == SImode ? 2 \
121 : : (mode) == DImode ? 3 \
122 : : 4)
123 :
124 :
125 : /* Set by -mtune. */
126 : const struct processor_costs *ix86_tune_cost = NULL;
127 :
128 : /* Set by -mtune or -Os. */
129 : const struct processor_costs *ix86_cost = NULL;
130 :
131 : /* In case the average insn count for single function invocation is
132 : lower than this constant, emit fast (but longer) prologue and
133 : epilogue code. */
134 : #define FAST_PROLOGUE_INSN_COUNT 20
135 :
136 : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137 : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138 : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139 : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140 :
141 : /* Array of the smallest class containing reg number REGNO, indexed by
142 : REGNO. Used by REGNO_REG_CLASS in i386.h. */
143 :
144 : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145 : {
146 : /* ax, dx, cx, bx */
147 : AREG, DREG, CREG, BREG,
148 : /* si, di, bp, sp */
149 : SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 : /* FP registers */
151 : FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 : FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 : /* arg pointer, flags, fpsr, frame */
154 : NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 : /* SSE registers */
156 : SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 : /* MMX registers */
159 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 : /* REX registers */
162 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 : /* SSE REX registers */
165 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 : /* AVX-512 SSE registers */
168 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 : /* Mask registers. */
173 : ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 : MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 : /* REX2 registers */
176 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 : };
181 :
182 : /* The "default" register map used in 32bit mode. */
183 :
184 : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185 : {
186 : /* general regs */
187 : 0, 2, 1, 3, 6, 7, 4, 5,
188 : /* fp regs */
189 : 12, 13, 14, 15, 16, 17, 18, 19,
190 : /* arg, flags, fpsr, frame */
191 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 : /* SSE */
194 : 21, 22, 23, 24, 25, 26, 27, 28,
195 : /* MMX */
196 : 29, 30, 31, 32, 33, 34, 35, 36,
197 : /* extended integer registers */
198 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 : /* extended sse registers */
201 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 : /* AVX-512 registers 16-23 */
204 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 : /* AVX-512 registers 24-31 */
207 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 : /* Mask registers */
210 : 93, 94, 95, 96, 97, 98, 99, 100
211 : };
212 :
213 : /* The "default" register map used in 64bit mode. */
214 :
215 : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216 : {
217 : /* general regs */
218 : 0, 1, 2, 3, 4, 5, 6, 7,
219 : /* fp regs */
220 : 33, 34, 35, 36, 37, 38, 39, 40,
221 : /* arg, flags, fpsr, frame */
222 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 : /* SSE */
225 : 17, 18, 19, 20, 21, 22, 23, 24,
226 : /* MMX */
227 : 41, 42, 43, 44, 45, 46, 47, 48,
228 : /* extended integer registers */
229 : 8, 9, 10, 11, 12, 13, 14, 15,
230 : /* extended SSE registers */
231 : 25, 26, 27, 28, 29, 30, 31, 32,
232 : /* AVX-512 registers 16-23 */
233 : 67, 68, 69, 70, 71, 72, 73, 74,
234 : /* AVX-512 registers 24-31 */
235 : 75, 76, 77, 78, 79, 80, 81, 82,
236 : /* Mask registers */
237 : 118, 119, 120, 121, 122, 123, 124, 125,
238 : /* rex2 extend interger registers */
239 : 130, 131, 132, 133, 134, 135, 136, 137,
240 : 138, 139, 140, 141, 142, 143, 144, 145
241 : };
242 :
243 : /* Define the register numbers to be used in Dwarf debugging information.
244 : The SVR4 reference port C compiler uses the following register numbers
245 : in its Dwarf output code:
246 : 0 for %eax (gcc regno = 0)
247 : 1 for %ecx (gcc regno = 2)
248 : 2 for %edx (gcc regno = 1)
249 : 3 for %ebx (gcc regno = 3)
250 : 4 for %esp (gcc regno = 7)
251 : 5 for %ebp (gcc regno = 6)
252 : 6 for %esi (gcc regno = 4)
253 : 7 for %edi (gcc regno = 5)
254 : The following three DWARF register numbers are never generated by
255 : the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 : believed these numbers have these meanings.
257 : 8 for %eip (no gcc equivalent)
258 : 9 for %eflags (gcc regno = 17)
259 : 10 for %trapno (no gcc equivalent)
260 : It is not at all clear how we should number the FP stack registers
261 : for the x86 architecture. If the version of SDB on x86/svr4 were
262 : a bit less brain dead with respect to floating-point then we would
263 : have a precedent to follow with respect to DWARF register numbers
264 : for x86 FP registers, but the SDB on x86/svr4 was so completely
265 : broken with respect to FP registers that it is hardly worth thinking
266 : of it as something to strive for compatibility with.
267 : The version of x86/svr4 SDB I had does (partially)
268 : seem to believe that DWARF register number 11 is associated with
269 : the x86 register %st(0), but that's about all. Higher DWARF
270 : register numbers don't seem to be associated with anything in
271 : particular, and even for DWARF regno 11, SDB only seemed to under-
272 : stand that it should say that a variable lives in %st(0) (when
273 : asked via an `=' command) if we said it was in DWARF regno 11,
274 : but SDB still printed garbage when asked for the value of the
275 : variable in question (via a `/' command).
276 : (Also note that the labels SDB printed for various FP stack regs
277 : when doing an `x' command were all wrong.)
278 : Note that these problems generally don't affect the native SVR4
279 : C compiler because it doesn't allow the use of -O with -g and
280 : because when it is *not* optimizing, it allocates a memory
281 : location for each floating-point variable, and the memory
282 : location is what gets described in the DWARF AT_location
283 : attribute for the variable in question.
284 : Regardless of the severe mental illness of the x86/svr4 SDB, we
285 : do something sensible here and we use the following DWARF
286 : register numbers. Note that these are all stack-top-relative
287 : numbers.
288 : 11 for %st(0) (gcc regno = 8)
289 : 12 for %st(1) (gcc regno = 9)
290 : 13 for %st(2) (gcc regno = 10)
291 : 14 for %st(3) (gcc regno = 11)
292 : 15 for %st(4) (gcc regno = 12)
293 : 16 for %st(5) (gcc regno = 13)
294 : 17 for %st(6) (gcc regno = 14)
295 : 18 for %st(7) (gcc regno = 15)
296 : */
297 : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298 : {
299 : /* general regs */
300 : 0, 2, 1, 3, 6, 7, 5, 4,
301 : /* fp regs */
302 : 11, 12, 13, 14, 15, 16, 17, 18,
303 : /* arg, flags, fpsr, frame */
304 : IGNORED_DWARF_REGNUM, 9,
305 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 : /* SSE registers */
307 : 21, 22, 23, 24, 25, 26, 27, 28,
308 : /* MMX registers */
309 : 29, 30, 31, 32, 33, 34, 35, 36,
310 : /* extended integer registers */
311 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 : /* extended sse registers */
314 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 : /* AVX-512 registers 16-23 */
317 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 : /* AVX-512 registers 24-31 */
320 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 : /* Mask registers */
323 : 93, 94, 95, 96, 97, 98, 99, 100
324 : };
325 :
326 : /* Define parameter passing and return registers. */
327 :
328 : static int const x86_64_int_parameter_registers[6] =
329 : {
330 : DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331 : };
332 :
333 : static int const x86_64_ms_abi_int_parameter_registers[4] =
334 : {
335 : CX_REG, DX_REG, R8_REG, R9_REG
336 : };
337 :
338 : /* Similar as Clang's preserve_none function parameter passing.
339 : NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */
340 :
341 : static int const x86_64_preserve_none_int_parameter_registers[6] =
342 : {
343 : R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
344 : };
345 :
346 : static int const x86_64_int_return_registers[4] =
347 : {
348 : AX_REG, DX_REG, DI_REG, SI_REG
349 : };
350 :
351 : /* Define the structure for the machine field in struct function. */
352 :
353 : struct GTY(()) stack_local_entry {
354 : unsigned short mode;
355 : unsigned short n;
356 : rtx rtl;
357 : struct stack_local_entry *next;
358 : };
359 :
360 : /* Which cpu are we scheduling for. */
361 : enum attr_cpu ix86_schedule;
362 :
363 : /* Which cpu are we optimizing for. */
364 : enum processor_type ix86_tune;
365 :
366 : /* Which instruction set architecture to use. */
367 : enum processor_type ix86_arch;
368 :
369 : /* True if processor has SSE prefetch instruction. */
370 : unsigned char ix86_prefetch_sse;
371 :
372 : /* Preferred alignment for stack boundary in bits. */
373 : unsigned int ix86_preferred_stack_boundary;
374 :
375 : /* Alignment for incoming stack boundary in bits specified at
376 : command line. */
377 : unsigned int ix86_user_incoming_stack_boundary;
378 :
379 : /* Default alignment for incoming stack boundary in bits. */
380 : unsigned int ix86_default_incoming_stack_boundary;
381 :
382 : /* Alignment for incoming stack boundary in bits. */
383 : unsigned int ix86_incoming_stack_boundary;
384 :
385 : /* True if there is no direct access to extern symbols. */
386 : bool ix86_has_no_direct_extern_access;
387 :
388 : /* Calling abi specific va_list type nodes. */
389 : tree sysv_va_list_type_node;
390 : tree ms_va_list_type_node;
391 :
392 : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
393 : char internal_label_prefix[16];
394 : int internal_label_prefix_len;
395 :
396 : /* Fence to use after loop using movnt. */
397 : tree x86_mfence;
398 :
399 : /* Register class used for passing given 64bit part of the argument.
400 : These represent classes as documented by the PS ABI, with the exception
401 : of SSESF, SSEDF classes, that are basically SSE class, just gcc will
402 : use SF or DFmode move instead of DImode to avoid reformatting penalties.
403 :
404 : Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
405 : whenever possible (upper half does contain padding). */
406 : enum x86_64_reg_class
407 : {
408 : X86_64_NO_CLASS,
409 : X86_64_INTEGER_CLASS,
410 : X86_64_INTEGERSI_CLASS,
411 : X86_64_SSE_CLASS,
412 : X86_64_SSEHF_CLASS,
413 : X86_64_SSESF_CLASS,
414 : X86_64_SSEDF_CLASS,
415 : X86_64_SSEUP_CLASS,
416 : X86_64_X87_CLASS,
417 : X86_64_X87UP_CLASS,
418 : X86_64_COMPLEX_X87_CLASS,
419 : X86_64_MEMORY_CLASS
420 : };
421 :
422 : #define MAX_CLASSES 8
423 :
424 : /* Table of constants used by fldpi, fldln2, etc.... */
425 : static REAL_VALUE_TYPE ext_80387_constants_table [5];
426 : static bool ext_80387_constants_init;
427 :
428 :
429 : static rtx ix86_function_value (const_tree, const_tree, bool);
430 : static bool ix86_function_value_regno_p (const unsigned int);
431 : static unsigned int ix86_function_arg_boundary (machine_mode,
432 : const_tree);
433 : static rtx ix86_static_chain (const_tree, bool);
434 : static int ix86_function_regparm (const_tree, const_tree);
435 : static void ix86_compute_frame_layout (void);
436 : static tree ix86_canonical_va_list_type (tree);
437 : static unsigned int split_stack_prologue_scratch_regno (void);
438 : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
439 :
440 : static bool ix86_can_inline_p (tree, tree);
441 : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
442 :
443 : typedef enum ix86_flags_cc
444 : {
445 : X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
446 : X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
447 : X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
448 : X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
449 : } ix86_cc;
450 :
451 : static const char *ix86_ccmp_dfv_mapping[] =
452 : {
453 : "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
454 : "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
455 : "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
456 : "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
457 : };
458 :
459 :
460 : /* Whether -mtune= or -march= were specified */
461 : int ix86_tune_defaulted;
462 : int ix86_arch_specified;
463 :
464 : /* Return true if a red-zone is in use. We can't use red-zone when
465 : there are local indirect jumps, like "indirect_jump" or "tablejump",
466 : which jumps to another place in the function, since "call" in the
467 : indirect thunk pushes the return address onto stack, destroying
468 : red-zone.
469 :
470 : NB: Don't use red-zone for functions with no_caller_saved_registers
471 : and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
472 : for 31 GPRs or 15 GPRs + 16 XMM registers.
473 :
474 : TODO: If we can reserve the first 2 WORDs, for PUSH and, another
475 : for CALL, in red-zone, we can allow local indirect jumps with
476 : indirect thunk. */
477 :
478 : bool
479 9841732 : ix86_using_red_zone (void)
480 : {
481 9841732 : return (TARGET_RED_ZONE
482 8901924 : && !TARGET_64BIT_MS_ABI
483 8599446 : && ((!TARGET_APX_EGPR && !TARGET_SSE)
484 8576447 : || (cfun->machine->call_saved_registers
485 8576447 : != TYPE_NO_CALLER_SAVED_REGISTERS))
486 18441117 : && (!cfun->machine->has_local_indirect_jump
487 59127 : || cfun->machine->indirect_branch_type == indirect_branch_keep));
488 : }
489 :
490 : /* Return true, if profiling code should be emitted before
491 : prologue. Otherwise it returns false.
492 : Note: For x86 with "hotfix" it is sorried. */
493 : static bool
494 4467021 : ix86_profile_before_prologue (void)
495 : {
496 4467021 : return flag_fentry != 0;
497 : }
498 :
499 : /* Update register usage after having seen the compiler flags. */
500 :
501 : static void
502 822162 : ix86_conditional_register_usage (void)
503 : {
504 822162 : int i, c_mask;
505 :
506 : /* If there are no caller-saved registers, preserve all registers.
507 : except fixed_regs and registers used for function return value
508 : since aggregate_value_p checks call_used_regs[regno] on return
509 : value. */
510 822162 : if (cfun
511 63963 : && (cfun->machine->call_saved_registers
512 63963 : == TYPE_NO_CALLER_SAVED_REGISTERS))
513 407247 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
514 402868 : if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
515 363035 : call_used_regs[i] = 0;
516 :
517 : /* For 32-bit targets, disable the REX registers. */
518 822162 : if (! TARGET_64BIT)
519 : {
520 134586 : for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
521 119632 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
522 134586 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
523 119632 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
524 254218 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
525 239264 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
526 : }
527 :
528 : /* See the definition of CALL_USED_REGISTERS in i386.h. */
529 822162 : c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
530 :
531 822162 : CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
532 :
533 76461066 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
534 : {
535 : /* Set/reset conditionally defined registers from
536 : CALL_USED_REGISTERS initializer. */
537 75638904 : if (call_used_regs[i] > 1)
538 13093231 : call_used_regs[i] = !!(call_used_regs[i] & c_mask);
539 :
540 : /* Calculate registers of CLOBBERED_REGS register set
541 : as call used registers from GENERAL_REGS register set. */
542 75638904 : if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
543 75638904 : && call_used_regs[i])
544 22920265 : SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
545 : }
546 :
547 : /* If MMX is disabled, disable the registers. */
548 822162 : if (! TARGET_MMX)
549 398404 : accessible_reg_set &= ~reg_class_contents[MMX_REGS];
550 :
551 : /* If SSE is disabled, disable the registers. */
552 822162 : if (! TARGET_SSE)
553 392426 : accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
554 :
555 : /* If the FPU is disabled, disable the registers. */
556 822162 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
557 393650 : accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
558 :
559 : /* If AVX512F is disabled, disable the registers. */
560 822162 : if (! TARGET_AVX512F)
561 : {
562 9894051 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
563 9312048 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
564 :
565 1164006 : accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
566 : }
567 :
568 : /* If APX is disabled, disable the registers. */
569 822162 : if (! (TARGET_APX_EGPR && TARGET_64BIT))
570 : {
571 13965398 : for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
572 13143904 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
573 : }
574 822162 : }
575 :
576 : /* Canonicalize a comparison from one we don't have to one we do have. */
577 :
578 : static void
579 24283785 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
580 : bool op0_preserve_value)
581 : {
582 : /* The order of operands in x87 ficom compare is forced by combine in
583 : simplify_comparison () function. Float operator is treated as RTX_OBJ
584 : with a precedence over other operators and is always put in the first
585 : place. Swap condition and operands to match ficom instruction. */
586 24283785 : if (!op0_preserve_value
587 23467258 : && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
588 : {
589 6 : enum rtx_code scode = swap_condition ((enum rtx_code) *code);
590 :
591 : /* We are called only for compares that are split to SAHF instruction.
592 : Ensure that we have setcc/jcc insn for the swapped condition. */
593 6 : if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
594 : {
595 6 : std::swap (*op0, *op1);
596 6 : *code = (int) scode;
597 6 : return;
598 : }
599 : }
600 :
601 : /* SUB (a, b) underflows precisely when a < b. Convert
602 : (compare (minus (a b)) a) to (compare (a b))
603 : to match *sub<mode>_3 pattern. */
604 23467252 : if (!op0_preserve_value
605 23467252 : && (*code == GTU || *code == LEU)
606 1876137 : && GET_CODE (*op0) == MINUS
607 96085 : && rtx_equal_p (XEXP (*op0, 0), *op1))
608 : {
609 488 : *op1 = XEXP (*op0, 1);
610 488 : *op0 = XEXP (*op0, 0);
611 488 : *code = (int) swap_condition ((enum rtx_code) *code);
612 488 : return;
613 : }
614 :
615 : /* Swap operands of GTU comparison to canonicalize
616 : addcarry/subborrow comparison. */
617 24283291 : if (!op0_preserve_value
618 23466764 : && *code == GTU
619 874378 : && GET_CODE (*op0) == PLUS
620 339042 : && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
621 46842 : && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
622 24325929 : && GET_CODE (*op1) == ZERO_EXTEND)
623 : {
624 39332 : std::swap (*op0, *op1);
625 39332 : *code = (int) swap_condition ((enum rtx_code) *code);
626 39332 : return;
627 : }
628 : }
629 :
630 : /* Hook to determine if one function can safely inline another. */
631 :
632 : static bool
633 9565611 : ix86_can_inline_p (tree caller, tree callee)
634 : {
635 9565611 : tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
636 9565611 : tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
637 :
638 : /* Changes of those flags can be tolerated for always inlines. Lets hope
639 : user knows what he is doing. */
640 9565611 : unsigned HOST_WIDE_INT always_inline_safe_mask
641 : = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
642 : | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
643 : | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
644 : | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
645 : | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
646 : | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
647 : | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
648 :
649 :
650 9565611 : if (!callee_tree)
651 8971884 : callee_tree = target_option_default_node;
652 9565611 : if (!caller_tree)
653 8971940 : caller_tree = target_option_default_node;
654 9565611 : if (callee_tree == caller_tree)
655 : return true;
656 :
657 5283 : struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
658 5283 : struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
659 5283 : bool ret = false;
660 5283 : bool always_inline
661 5283 : = (DECL_DISREGARD_INLINE_LIMITS (callee)
662 9924 : && lookup_attribute ("always_inline",
663 4641 : DECL_ATTRIBUTES (callee)));
664 :
665 : /* If callee only uses GPRs, ignore MASK_80387. */
666 5283 : if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
667 1024 : always_inline_safe_mask |= MASK_80387;
668 :
669 5283 : cgraph_node *callee_node = cgraph_node::get (callee);
670 : /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
671 : function can inline a SSE2 function but a SSE2 function can't inline
672 : a SSE4 function. */
673 5283 : if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
674 : != callee_opts->x_ix86_isa_flags)
675 5050 : || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
676 : != callee_opts->x_ix86_isa_flags2))
677 : ret = false;
678 :
679 : /* See if we have the same non-isa options. */
680 5013 : else if ((!always_inline
681 388 : && caller_opts->x_target_flags != callee_opts->x_target_flags)
682 4969 : || (caller_opts->x_target_flags & ~always_inline_safe_mask)
683 4969 : != (callee_opts->x_target_flags & ~always_inline_safe_mask))
684 : ret = false;
685 :
686 4969 : else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
687 : /* If the calle doesn't use FP expressions differences in
688 : ix86_fpmath can be ignored. We are called from FEs
689 : for multi-versioning call optimization, so beware of
690 : ipa_fn_summaries not available. */
691 1241 : && (! ipa_fn_summaries
692 1241 : || ipa_fn_summaries->get (callee_node) == NULL
693 1241 : || ipa_fn_summaries->get (callee_node)->fp_expressions))
694 : ret = false;
695 :
696 : /* At this point we cannot identify whether arch or tune setting
697 : comes from target attribute or not. So the most conservative way
698 : is to allow the callee that uses default arch and tune string to
699 : be inlined. */
700 4695 : else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
701 1424 : && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
702 : ret = true;
703 :
704 : /* See if arch, tune, etc. are the same. As previous ISA flags already
705 : checks if callee's ISA is subset of caller's, do not block
706 : always_inline attribute for callee even it has different arch. */
707 3279 : else if (!always_inline && caller_opts->arch != callee_opts->arch)
708 : ret = false;
709 :
710 15 : else if (!always_inline && caller_opts->tune != callee_opts->tune)
711 : ret = false;
712 :
713 3279 : else if (!always_inline
714 15 : && caller_opts->branch_cost != callee_opts->branch_cost)
715 : ret = false;
716 :
717 : else
718 9565023 : ret = true;
719 :
720 : return ret;
721 : }
722 :
723 : /* Return true if this goes in large data/bss. */
724 :
725 : static bool
726 79961836 : ix86_in_large_data_p (tree exp)
727 : {
728 79961836 : if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
729 79961598 : && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
730 : return false;
731 :
732 1110 : if (exp == NULL_TREE)
733 : return false;
734 :
735 : /* Functions are never large data. */
736 1110 : if (TREE_CODE (exp) == FUNCTION_DECL)
737 : return false;
738 :
739 : /* Automatic variables are never large data. */
740 262 : if (VAR_P (exp) && !is_global_var (exp))
741 : return false;
742 :
743 262 : if (VAR_P (exp) && DECL_SECTION_NAME (exp))
744 : {
745 51 : const char *section = DECL_SECTION_NAME (exp);
746 51 : if (strcmp (section, ".ldata") == 0
747 51 : || strcmp (section, ".lbss") == 0)
748 : return true;
749 : return false;
750 : }
751 : else
752 : {
753 211 : HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
754 :
755 : /* If this is an incomplete type with size 0, then we can't put it
756 : in data because it might be too big when completed. Also,
757 : int_size_in_bytes returns -1 if size can vary or is larger than
758 : an integer in which case also it is safer to assume that it goes in
759 : large data. */
760 211 : if (size <= 0 || size > ix86_section_threshold)
761 : return true;
762 : }
763 :
764 : return false;
765 : }
766 :
767 : /* i386-specific section flag to mark large sections. */
768 : #define SECTION_LARGE SECTION_MACH_DEP
769 :
770 : /* Switch to the appropriate section for output of DECL.
771 : DECL is either a `VAR_DECL' node or a constant of some sort.
772 : RELOC indicates whether forming the initial value of DECL requires
773 : link-time relocations. */
774 :
775 : ATTRIBUTE_UNUSED static section *
776 1650103 : x86_64_elf_select_section (tree decl, int reloc,
777 : unsigned HOST_WIDE_INT align)
778 : {
779 1650103 : if (ix86_in_large_data_p (decl))
780 : {
781 6 : const char *sname = NULL;
782 6 : unsigned int flags = SECTION_WRITE | SECTION_LARGE;
783 6 : switch (categorize_decl_for_section (decl, reloc))
784 : {
785 1 : case SECCAT_DATA:
786 1 : sname = ".ldata";
787 1 : break;
788 0 : case SECCAT_DATA_REL:
789 0 : sname = ".ldata.rel";
790 0 : break;
791 0 : case SECCAT_DATA_REL_LOCAL:
792 0 : sname = ".ldata.rel.local";
793 0 : break;
794 0 : case SECCAT_DATA_REL_RO:
795 0 : sname = ".ldata.rel.ro";
796 0 : break;
797 0 : case SECCAT_DATA_REL_RO_LOCAL:
798 0 : sname = ".ldata.rel.ro.local";
799 0 : break;
800 0 : case SECCAT_BSS:
801 0 : sname = ".lbss";
802 0 : flags |= SECTION_BSS;
803 0 : break;
804 : case SECCAT_RODATA:
805 : case SECCAT_RODATA_MERGE_STR:
806 : case SECCAT_RODATA_MERGE_STR_INIT:
807 : case SECCAT_RODATA_MERGE_CONST:
808 : sname = ".lrodata";
809 : flags &= ~SECTION_WRITE;
810 : break;
811 0 : case SECCAT_SRODATA:
812 0 : case SECCAT_SDATA:
813 0 : case SECCAT_SBSS:
814 0 : gcc_unreachable ();
815 : case SECCAT_TEXT:
816 : case SECCAT_TDATA:
817 : case SECCAT_TBSS:
818 : /* We don't split these for medium model. Place them into
819 : default sections and hope for best. */
820 : break;
821 : }
822 1 : if (sname)
823 : {
824 : /* We might get called with string constants, but get_named_section
825 : doesn't like them as they are not DECLs. Also, we need to set
826 : flags in that case. */
827 6 : if (!DECL_P (decl))
828 3 : return get_section (sname, flags, NULL);
829 3 : return get_named_section (decl, sname, reloc);
830 : }
831 : }
832 1650097 : return default_elf_select_section (decl, reloc, align);
833 : }
834 :
835 : /* Select a set of attributes for section NAME based on the properties
836 : of DECL and whether or not RELOC indicates that DECL's initializer
837 : might contain runtime relocations. */
838 :
839 : static unsigned int ATTRIBUTE_UNUSED
840 66667127 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
841 : {
842 66667127 : unsigned int flags = default_section_type_flags (decl, name, reloc);
843 :
844 66667127 : if (ix86_in_large_data_p (decl))
845 7 : flags |= SECTION_LARGE;
846 :
847 66667127 : if (decl == NULL_TREE
848 370 : && (strcmp (name, ".ldata.rel.ro") == 0
849 370 : || strcmp (name, ".ldata.rel.ro.local") == 0))
850 0 : flags |= SECTION_RELRO;
851 :
852 66667127 : if (strcmp (name, ".lbss") == 0
853 66667123 : || startswith (name, ".lbss.")
854 133334247 : || startswith (name, ".gnu.linkonce.lb."))
855 7 : flags |= SECTION_BSS;
856 :
857 66667127 : return flags;
858 : }
859 :
860 : /* Build up a unique section name, expressed as a
861 : STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
862 : RELOC indicates whether the initial value of EXP requires
863 : link-time relocations. */
864 :
865 : static void ATTRIBUTE_UNUSED
866 1797885 : x86_64_elf_unique_section (tree decl, int reloc)
867 : {
868 1797885 : if (ix86_in_large_data_p (decl))
869 : {
870 3 : const char *prefix = NULL;
871 : /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
872 3 : bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
873 :
874 3 : switch (categorize_decl_for_section (decl, reloc))
875 : {
876 0 : case SECCAT_DATA:
877 0 : case SECCAT_DATA_REL:
878 0 : case SECCAT_DATA_REL_LOCAL:
879 0 : case SECCAT_DATA_REL_RO:
880 0 : case SECCAT_DATA_REL_RO_LOCAL:
881 0 : prefix = one_only ? ".ld" : ".ldata";
882 : break;
883 3 : case SECCAT_BSS:
884 3 : prefix = one_only ? ".lb" : ".lbss";
885 : break;
886 : case SECCAT_RODATA:
887 : case SECCAT_RODATA_MERGE_STR:
888 : case SECCAT_RODATA_MERGE_STR_INIT:
889 : case SECCAT_RODATA_MERGE_CONST:
890 : prefix = one_only ? ".lr" : ".lrodata";
891 : break;
892 0 : case SECCAT_SRODATA:
893 0 : case SECCAT_SDATA:
894 0 : case SECCAT_SBSS:
895 0 : gcc_unreachable ();
896 : case SECCAT_TEXT:
897 : case SECCAT_TDATA:
898 : case SECCAT_TBSS:
899 : /* We don't split these for medium model. Place them into
900 : default sections and hope for best. */
901 : break;
902 : }
903 3 : if (prefix)
904 : {
905 3 : const char *name, *linkonce;
906 3 : char *string;
907 :
908 3 : name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
909 3 : name = targetm.strip_name_encoding (name);
910 :
911 : /* If we're using one_only, then there needs to be a .gnu.linkonce
912 : prefix to the section name. */
913 3 : linkonce = one_only ? ".gnu.linkonce" : "";
914 :
915 3 : string = ACONCAT ((linkonce, prefix, ".", name, NULL));
916 :
917 3 : set_decl_section_name (decl, string);
918 3 : return;
919 : }
920 : }
921 1797882 : default_unique_section (decl, reloc);
922 : }
923 :
924 : /* Return true if TYPE has no_callee_saved_registers or preserve_none
925 : attribute. */
926 :
927 : bool
928 7484577 : ix86_type_no_callee_saved_registers_p (const_tree type)
929 : {
930 14969154 : return (lookup_attribute ("no_callee_saved_registers",
931 7484577 : TYPE_ATTRIBUTES (type)) != NULL
932 14969023 : || lookup_attribute ("preserve_none",
933 7484446 : TYPE_ATTRIBUTES (type)) != NULL);
934 : }
935 :
936 : #ifdef COMMON_ASM_OP
937 :
938 : #ifndef LARGECOMM_SECTION_ASM_OP
939 : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
940 : #endif
941 :
942 : /* This says how to output assembler code to declare an
943 : uninitialized external linkage data object.
944 :
945 : For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
946 : large objects. */
947 : void
948 170060 : x86_elf_aligned_decl_common (FILE *file, tree decl,
949 : const char *name, unsigned HOST_WIDE_INT size,
950 : unsigned align)
951 : {
952 170060 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
953 170054 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
954 6 : && size > (unsigned int)ix86_section_threshold)
955 : {
956 1 : switch_to_section (get_named_section (decl, ".lbss", 0));
957 1 : fputs (LARGECOMM_SECTION_ASM_OP, file);
958 : }
959 : else
960 170059 : fputs (COMMON_ASM_OP, file);
961 170060 : assemble_name (file, name);
962 170060 : fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
963 : size, align / BITS_PER_UNIT);
964 170060 : }
965 : #endif
966 :
967 : /* Utility function for targets to use in implementing
968 : ASM_OUTPUT_ALIGNED_BSS. */
969 :
970 : void
971 766928 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
972 : unsigned HOST_WIDE_INT size, unsigned align)
973 : {
974 766928 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
975 766918 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
976 37 : && size > (unsigned int)ix86_section_threshold)
977 3 : switch_to_section (get_named_section (decl, ".lbss", 0));
978 : else
979 766925 : switch_to_section (bss_section);
980 920154 : ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
981 : #ifdef ASM_DECLARE_OBJECT_NAME
982 766928 : last_assemble_variable_decl = decl;
983 766928 : ASM_DECLARE_OBJECT_NAME (file, name, decl);
984 : #else
985 : /* Standard thing is just output label for the object. */
986 : ASM_OUTPUT_LABEL (file, name);
987 : #endif /* ASM_DECLARE_OBJECT_NAME */
988 766928 : ASM_OUTPUT_SKIP (file, size ? size : 1);
989 766928 : }
990 :
991 : /* Decide whether we must probe the stack before any space allocation
992 : on this target. It's essentially TARGET_STACK_PROBE except when
993 : -fstack-check causes the stack to be already probed differently. */
994 :
995 : bool
996 865886 : ix86_target_stack_probe (void)
997 : {
998 : /* Do not probe the stack twice if static stack checking is enabled. */
999 865886 : if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
1000 : return false;
1001 :
1002 865886 : return TARGET_STACK_PROBE;
1003 : }
1004 :
1005 : /* Decide whether we can make a sibling call to a function. DECL is the
1006 : declaration of the function being targeted by the call and EXP is the
1007 : CALL_EXPR representing the call. */
1008 :
1009 : static bool
1010 139153 : ix86_function_ok_for_sibcall (tree decl, tree exp)
1011 : {
1012 139153 : tree type, decl_or_type;
1013 139153 : rtx a, b;
1014 139153 : bool bind_global = decl && !targetm.binds_local_p (decl);
1015 :
1016 139153 : if (ix86_function_naked (current_function_decl))
1017 : return false;
1018 :
1019 : /* Sibling call isn't OK if there are no caller-saved registers
1020 : since all registers must be preserved before return. */
1021 139151 : if (cfun->machine->call_saved_registers
1022 139151 : == TYPE_NO_CALLER_SAVED_REGISTERS)
1023 : return false;
1024 :
1025 : /* If we are generating position-independent code, we cannot sibcall
1026 : optimize direct calls to global functions, as the PLT requires
1027 : %ebx be live. (Darwin does not have a PLT.) */
1028 139122 : if (!TARGET_MACHO
1029 139122 : && !TARGET_64BIT
1030 11317 : && flag_pic
1031 8392 : && flag_plt
1032 8392 : && bind_global)
1033 : return false;
1034 :
1035 : /* If we need to align the outgoing stack, then sibcalling would
1036 : unalign the stack, which may break the called function. */
1037 134484 : if (ix86_minimum_incoming_stack_boundary (true)
1038 134484 : < PREFERRED_STACK_BOUNDARY)
1039 : return false;
1040 :
1041 133903 : if (decl)
1042 : {
1043 122881 : decl_or_type = decl;
1044 122881 : type = TREE_TYPE (decl);
1045 : }
1046 : else
1047 : {
1048 : /* We're looking at the CALL_EXPR, we need the type of the function. */
1049 11022 : type = CALL_EXPR_FN (exp); /* pointer expression */
1050 11022 : type = TREE_TYPE (type); /* pointer type */
1051 11022 : type = TREE_TYPE (type); /* function type */
1052 11022 : decl_or_type = type;
1053 : }
1054 :
1055 : /* Sibling call isn't OK if callee has no callee-saved registers
1056 : and the calling function has callee-saved registers. */
1057 133903 : if ((cfun->machine->call_saved_registers
1058 133903 : != TYPE_NO_CALLEE_SAVED_REGISTERS)
1059 133903 : && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
1060 133903 : && ix86_type_no_callee_saved_registers_p (type))
1061 : return false;
1062 :
1063 : /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1064 133887 : if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1065 133887 : != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1066 267027 : || (REG_PARM_STACK_SPACE (decl_or_type)
1067 133140 : != REG_PARM_STACK_SPACE (current_function_decl)))
1068 : {
1069 747 : maybe_complain_about_tail_call (exp,
1070 : "inconsistent size of stack space"
1071 : " allocated for arguments which are"
1072 : " passed in registers");
1073 747 : return false;
1074 : }
1075 :
1076 : /* Check that the return value locations are the same. Like
1077 : if we are returning floats on the 80387 register stack, we cannot
1078 : make a sibcall from a function that doesn't return a float to a
1079 : function that does or, conversely, from a function that does return
1080 : a float to a function that doesn't; the necessary stack adjustment
1081 : would not be executed. This is also the place we notice
1082 : differences in the return value ABI. Note that it is ok for one
1083 : of the functions to have void return type as long as the return
1084 : value of the other is passed in a register. */
1085 133140 : a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1086 133140 : b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1087 133140 : cfun->decl, false);
1088 133140 : if (STACK_REG_P (a) || STACK_REG_P (b))
1089 : {
1090 1019 : if (!rtx_equal_p (a, b))
1091 : return false;
1092 : }
1093 132121 : else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1094 : ;
1095 24920 : else if (!rtx_equal_p (a, b))
1096 : return false;
1097 :
1098 132760 : if (TARGET_64BIT)
1099 : {
1100 : /* The SYSV ABI has more call-clobbered registers;
1101 : disallow sibcalls from MS to SYSV. */
1102 126081 : if (cfun->machine->call_abi == MS_ABI
1103 126081 : && ix86_function_type_abi (type) == SYSV_ABI)
1104 : return false;
1105 : }
1106 : else
1107 : {
1108 : /* If this call is indirect, we'll need to be able to use a
1109 : call-clobbered register for the address of the target function.
1110 : Make sure that all such registers are not used for passing
1111 : parameters. Note that DLLIMPORT functions and call to global
1112 : function via GOT slot are indirect. */
1113 6679 : if (!decl
1114 4769 : || (bind_global && flag_pic && !flag_plt)
1115 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1116 4769 : || flag_force_indirect_call)
1117 : {
1118 : /* Check if regparm >= 3 since arg_reg_available is set to
1119 : false if regparm == 0. If regparm is 1 or 2, there is
1120 : always a call-clobbered register available.
1121 :
1122 : ??? The symbol indirect call doesn't need a call-clobbered
1123 : register. But we don't know if this is a symbol indirect
1124 : call or not here. */
1125 1910 : if (ix86_function_regparm (type, decl) >= 3
1126 1910 : && !cfun->machine->arg_reg_available)
1127 : return false;
1128 : }
1129 : }
1130 :
1131 132760 : if (decl && ix86_use_pseudo_pic_reg ())
1132 : {
1133 : /* When PIC register is used, it must be restored after ifunc
1134 : function returns. */
1135 2057 : cgraph_node *node = cgraph_node::get (decl);
1136 2057 : if (node && node->ifunc_resolver)
1137 : return false;
1138 : }
1139 :
1140 : /* Disable sibcall if callee has indirect_return attribute and
1141 : caller doesn't since callee will return to the caller's caller
1142 : via an indirect jump. */
1143 132760 : if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1144 : == (CF_RETURN | CF_BRANCH))
1145 54147 : && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1146 132764 : && !lookup_attribute ("indirect_return",
1147 4 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1148 : return false;
1149 :
1150 : /* Otherwise okay. That also includes certain types of indirect calls. */
1151 : return true;
1152 : }
1153 :
1154 : /* This function determines from TYPE the calling-convention. */
1155 :
1156 : unsigned int
1157 6165414 : ix86_get_callcvt (const_tree type)
1158 : {
1159 6165414 : unsigned int ret = 0;
1160 6165414 : bool is_stdarg;
1161 6165414 : tree attrs;
1162 :
1163 6165414 : if (TARGET_64BIT)
1164 : return IX86_CALLCVT_CDECL;
1165 :
1166 3259552 : attrs = TYPE_ATTRIBUTES (type);
1167 3259552 : if (attrs != NULL_TREE)
1168 : {
1169 65901 : if (lookup_attribute ("cdecl", attrs))
1170 : ret |= IX86_CALLCVT_CDECL;
1171 65901 : else if (lookup_attribute ("stdcall", attrs))
1172 : ret |= IX86_CALLCVT_STDCALL;
1173 65901 : else if (lookup_attribute ("fastcall", attrs))
1174 : ret |= IX86_CALLCVT_FASTCALL;
1175 65892 : else if (lookup_attribute ("thiscall", attrs))
1176 : ret |= IX86_CALLCVT_THISCALL;
1177 :
1178 : /* Regparam isn't allowed for thiscall and fastcall. */
1179 : if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1180 : {
1181 65892 : if (lookup_attribute ("regparm", attrs))
1182 15823 : ret |= IX86_CALLCVT_REGPARM;
1183 65892 : if (lookup_attribute ("sseregparm", attrs))
1184 0 : ret |= IX86_CALLCVT_SSEREGPARM;
1185 : }
1186 :
1187 65901 : if (IX86_BASE_CALLCVT(ret) != 0)
1188 9 : return ret;
1189 : }
1190 :
1191 3259543 : is_stdarg = stdarg_p (type);
1192 3259543 : if (TARGET_RTD && !is_stdarg)
1193 0 : return IX86_CALLCVT_STDCALL | ret;
1194 :
1195 3259543 : if (ret != 0
1196 3259543 : || is_stdarg
1197 3234734 : || TREE_CODE (type) != METHOD_TYPE
1198 3390949 : || ix86_function_type_abi (type) != MS_ABI)
1199 3259543 : return IX86_CALLCVT_CDECL | ret;
1200 :
1201 : return IX86_CALLCVT_THISCALL;
1202 : }
1203 :
1204 : /* Return 0 if the attributes for two types are incompatible, 1 if they
1205 : are compatible, and 2 if they are nearly compatible (which causes a
1206 : warning to be generated). */
1207 :
1208 : static int
1209 1470653 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
1210 : {
1211 1470653 : unsigned int ccvt1, ccvt2;
1212 :
1213 1470653 : if (TREE_CODE (type1) != FUNCTION_TYPE
1214 1470653 : && TREE_CODE (type1) != METHOD_TYPE)
1215 : return 1;
1216 :
1217 1464050 : ccvt1 = ix86_get_callcvt (type1);
1218 1464050 : ccvt2 = ix86_get_callcvt (type2);
1219 1464050 : if (ccvt1 != ccvt2)
1220 : return 0;
1221 2905982 : if (ix86_function_regparm (type1, NULL)
1222 1452991 : != ix86_function_regparm (type2, NULL))
1223 : return 0;
1224 :
1225 1415230 : if (ix86_type_no_callee_saved_registers_p (type1)
1226 707615 : != ix86_type_no_callee_saved_registers_p (type2))
1227 : return 0;
1228 :
1229 : /* preserve_none attribute uses a different calling convention is
1230 : only for 64-bit. */
1231 707489 : if (TARGET_64BIT
1232 1414918 : && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
1233 707429 : != lookup_attribute ("preserve_none",
1234 707429 : TYPE_ATTRIBUTES (type2))))
1235 : return 0;
1236 :
1237 : return 1;
1238 : }
1239 :
1240 : /* Return the regparm value for a function with the indicated TYPE and DECL.
1241 : DECL may be NULL when calling function indirectly
1242 : or considering a libcall. */
1243 :
1244 : static int
1245 4173235 : ix86_function_regparm (const_tree type, const_tree decl)
1246 : {
1247 4173235 : tree attr;
1248 4173235 : int regparm;
1249 4173235 : unsigned int ccvt;
1250 :
1251 4173235 : if (TARGET_64BIT)
1252 2905862 : return (ix86_function_type_abi (type) == SYSV_ABI
1253 2905862 : ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1254 1267373 : ccvt = ix86_get_callcvt (type);
1255 1267373 : regparm = ix86_regparm;
1256 :
1257 1267373 : if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1258 : {
1259 2020 : attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1260 2020 : if (attr)
1261 : {
1262 2020 : regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1263 2020 : return regparm;
1264 : }
1265 : }
1266 1265353 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1267 : return 2;
1268 1265353 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1269 : return 1;
1270 :
1271 : /* Use register calling convention for local functions when possible. */
1272 1265353 : if (decl
1273 1201449 : && TREE_CODE (decl) == FUNCTION_DECL)
1274 : {
1275 1191354 : cgraph_node *target = cgraph_node::get (decl);
1276 1191354 : if (target)
1277 1183869 : target = target->function_symbol ();
1278 :
1279 : /* Caller and callee must agree on the calling convention, so
1280 : checking here just optimize means that with
1281 : __attribute__((optimize (...))) caller could use regparm convention
1282 : and callee not, or vice versa. Instead look at whether the callee
1283 : is optimized or not. */
1284 1183869 : if (target && opt_for_fn (target->decl, optimize)
1285 2366846 : && !(profile_flag && !flag_fentry))
1286 : {
1287 1182977 : if (target->local && target->can_change_signature)
1288 : {
1289 139228 : int local_regparm, globals = 0, regno;
1290 :
1291 : /* Make sure no regparm register is taken by a
1292 : fixed register variable. */
1293 139228 : for (local_regparm = 0; local_regparm < REGPARM_MAX;
1294 : local_regparm++)
1295 104421 : if (fixed_regs[local_regparm])
1296 : break;
1297 :
1298 : /* We don't want to use regparm(3) for nested functions as
1299 : these use a static chain pointer in the third argument. */
1300 34807 : if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1301 : local_regparm = 2;
1302 :
1303 : /* Save a register for the split stack. */
1304 34807 : if (flag_split_stack)
1305 : {
1306 20760 : if (local_regparm == 3)
1307 : local_regparm = 2;
1308 707 : else if (local_regparm == 2
1309 707 : && DECL_STATIC_CHAIN (target->decl))
1310 : local_regparm = 1;
1311 : }
1312 :
1313 : /* Each fixed register usage increases register pressure,
1314 : so less registers should be used for argument passing.
1315 : This functionality can be overriden by an explicit
1316 : regparm value. */
1317 243649 : for (regno = AX_REG; regno <= DI_REG; regno++)
1318 208842 : if (fixed_regs[regno])
1319 0 : globals++;
1320 :
1321 34807 : local_regparm
1322 34807 : = globals < local_regparm ? local_regparm - globals : 0;
1323 :
1324 34807 : if (local_regparm > regparm)
1325 4173235 : regparm = local_regparm;
1326 : }
1327 : }
1328 : }
1329 :
1330 : return regparm;
1331 : }
1332 :
1333 : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1334 : DFmode (2) arguments in SSE registers for a function with the
1335 : indicated TYPE and DECL. DECL may be NULL when calling function
1336 : indirectly or considering a libcall. Return -1 if any FP parameter
1337 : should be rejected by error. This is used in siutation we imply SSE
1338 : calling convetion but the function is called from another function with
1339 : SSE disabled. Otherwise return 0. */
1340 :
1341 : static int
1342 1072484 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1343 : {
1344 1072484 : gcc_assert (!TARGET_64BIT);
1345 :
1346 : /* Use SSE registers to pass SFmode and DFmode arguments if requested
1347 : by the sseregparm attribute. */
1348 1072484 : if (TARGET_SSEREGPARM
1349 1072484 : || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1350 : {
1351 0 : if (!TARGET_SSE)
1352 : {
1353 0 : if (warn)
1354 : {
1355 0 : if (decl)
1356 0 : error ("calling %qD with attribute sseregparm without "
1357 : "SSE/SSE2 enabled", decl);
1358 : else
1359 0 : error ("calling %qT with attribute sseregparm without "
1360 : "SSE/SSE2 enabled", type);
1361 : }
1362 0 : return 0;
1363 : }
1364 :
1365 : return 2;
1366 : }
1367 :
1368 1072484 : if (!decl)
1369 : return 0;
1370 :
1371 975223 : cgraph_node *target = cgraph_node::get (decl);
1372 975223 : if (target)
1373 967741 : target = target->function_symbol ();
1374 :
1375 : /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1376 : (and DFmode for SSE2) arguments in SSE registers. */
1377 967741 : if (target
1378 : /* TARGET_SSE_MATH */
1379 967741 : && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1380 1296 : && opt_for_fn (target->decl, optimize)
1381 969037 : && !(profile_flag && !flag_fentry))
1382 : {
1383 1296 : if (target->local && target->can_change_signature)
1384 : {
1385 : /* Refuse to produce wrong code when local function with SSE enabled
1386 : is called from SSE disabled function.
1387 : FIXME: We need a way to detect these cases cross-ltrans partition
1388 : and avoid using SSE calling conventions on local functions called
1389 : from function with SSE disabled. For now at least delay the
1390 : warning until we know we are going to produce wrong code.
1391 : See PR66047 */
1392 0 : if (!TARGET_SSE && warn)
1393 : return -1;
1394 0 : return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1395 0 : ->x_ix86_isa_flags) ? 2 : 1;
1396 : }
1397 : }
1398 :
1399 : return 0;
1400 : }
1401 :
1402 : /* Return true if EAX is live at the start of the function. Used by
1403 : ix86_expand_prologue to determine if we need special help before
1404 : calling allocate_stack_worker. */
1405 :
1406 : static bool
1407 7090 : ix86_eax_live_at_start_p (void)
1408 : {
1409 : /* Cheat. Don't bother working forward from ix86_function_regparm
1410 : to the function type to whether an actual argument is located in
1411 : eax. Instead just look at cfg info, which is still close enough
1412 : to correct at this point. This gives false positives for broken
1413 : functions that might use uninitialized data that happens to be
1414 : allocated in eax, but who cares? */
1415 7090 : return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1416 : }
1417 :
1418 : static bool
1419 159550 : ix86_keep_aggregate_return_pointer (tree fntype)
1420 : {
1421 159550 : tree attr;
1422 :
1423 159550 : if (!TARGET_64BIT)
1424 : {
1425 159550 : attr = lookup_attribute ("callee_pop_aggregate_return",
1426 159550 : TYPE_ATTRIBUTES (fntype));
1427 159550 : if (attr)
1428 0 : return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1429 :
1430 : /* For 32-bit MS-ABI the default is to keep aggregate
1431 : return pointer. */
1432 159550 : if (ix86_function_type_abi (fntype) == MS_ABI)
1433 : return true;
1434 : }
1435 : return KEEP_AGGREGATE_RETURN_POINTER != 0;
1436 : }
1437 :
1438 : /* Value is the number of bytes of arguments automatically
1439 : popped when returning from a subroutine call.
1440 : FUNDECL is the declaration node of the function (as a tree),
1441 : FUNTYPE is the data type of the function (as a tree),
1442 : or for a library call it is an identifier node for the subroutine name.
1443 : SIZE is the number of bytes of arguments passed on the stack.
1444 :
1445 : On the 80386, the RTD insn may be used to pop them if the number
1446 : of args is fixed, but if the number is variable then the caller
1447 : must pop them all. RTD can't be used for library calls now
1448 : because the library is compiled with the Unix compiler.
1449 : Use of RTD is a selectable option, since it is incompatible with
1450 : standard Unix calling sequences. If the option is not selected,
1451 : the caller must always pop the args.
1452 :
1453 : The attribute stdcall is equivalent to RTD on a per module basis. */
1454 :
1455 : static poly_int64
1456 7536512 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1457 : {
1458 7536512 : unsigned int ccvt;
1459 :
1460 : /* None of the 64-bit ABIs pop arguments. */
1461 7536512 : if (TARGET_64BIT)
1462 6665343 : return 0;
1463 :
1464 871169 : ccvt = ix86_get_callcvt (funtype);
1465 :
1466 871169 : if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1467 : | IX86_CALLCVT_THISCALL)) != 0
1468 871169 : && ! stdarg_p (funtype))
1469 3 : return size;
1470 :
1471 : /* Lose any fake structure return argument if it is passed on the stack. */
1472 871166 : if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1473 871166 : && !ix86_keep_aggregate_return_pointer (funtype))
1474 : {
1475 159550 : int nregs = ix86_function_regparm (funtype, fundecl);
1476 159550 : if (nregs == 0)
1477 457779 : return GET_MODE_SIZE (Pmode);
1478 : }
1479 :
1480 718573 : return 0;
1481 : }
1482 :
1483 : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1484 :
1485 : static bool
1486 10076308 : ix86_legitimate_combined_insn (rtx_insn *insn)
1487 : {
1488 10076308 : int i;
1489 :
1490 : /* Check operand constraints in case hard registers were propagated
1491 : into insn pattern. This check prevents combine pass from
1492 : generating insn patterns with invalid hard register operands.
1493 : These invalid insns can eventually confuse reload to error out
1494 : with a spill failure. See also PRs 46829 and 46843. */
1495 :
1496 10076308 : gcc_assert (INSN_CODE (insn) >= 0);
1497 :
1498 10076308 : extract_insn (insn);
1499 10076308 : preprocess_constraints (insn);
1500 :
1501 10076308 : int n_operands = recog_data.n_operands;
1502 10076308 : int n_alternatives = recog_data.n_alternatives;
1503 34431669 : for (i = 0; i < n_operands; i++)
1504 : {
1505 24358880 : rtx op = recog_data.operand[i];
1506 24358880 : machine_mode mode = GET_MODE (op);
1507 24358880 : const operand_alternative *op_alt;
1508 24358880 : int offset = 0;
1509 24358880 : bool win;
1510 24358880 : int j;
1511 :
1512 : /* A unary operator may be accepted by the predicate, but it
1513 : is irrelevant for matching constraints. */
1514 24358880 : if (UNARY_P (op))
1515 51608 : op = XEXP (op, 0);
1516 :
1517 24358880 : if (SUBREG_P (op))
1518 : {
1519 870818 : if (REG_P (SUBREG_REG (op))
1520 870818 : && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1521 55 : offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1522 55 : GET_MODE (SUBREG_REG (op)),
1523 55 : SUBREG_BYTE (op),
1524 55 : GET_MODE (op));
1525 870818 : op = SUBREG_REG (op);
1526 : }
1527 :
1528 24358880 : if (!(REG_P (op) && HARD_REGISTER_P (op)))
1529 24051530 : continue;
1530 :
1531 307350 : op_alt = recog_op_alt;
1532 :
1533 : /* Operand has no constraints, anything is OK. */
1534 307350 : win = !n_alternatives;
1535 :
1536 307350 : alternative_mask preferred = get_preferred_alternatives (insn);
1537 839613 : for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1538 : {
1539 528588 : if (!TEST_BIT (preferred, j))
1540 137692 : continue;
1541 390896 : if (op_alt[i].anything_ok
1542 203668 : || (op_alt[i].matches != -1
1543 33475 : && operands_match_p
1544 33475 : (recog_data.operand[i],
1545 33475 : recog_data.operand[op_alt[i].matches]))
1546 590345 : || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1547 : {
1548 : win = true;
1549 : break;
1550 : }
1551 : }
1552 :
1553 307350 : if (!win)
1554 : return false;
1555 : }
1556 :
1557 : return true;
1558 : }
1559 :
1560 : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1561 :
1562 : static unsigned HOST_WIDE_INT
1563 4574 : ix86_asan_shadow_offset (void)
1564 : {
1565 4574 : return SUBTARGET_SHADOW_OFFSET;
1566 : }
1567 :
1568 : /* Argument support functions. */
1569 :
1570 : /* Return true when register may be used to pass function parameters. */
1571 : bool
1572 1472711549 : ix86_function_arg_regno_p (int regno)
1573 : {
1574 1472711549 : int i;
1575 1472711549 : enum calling_abi call_abi;
1576 1472711549 : const int *parm_regs;
1577 :
1578 1469265686 : if (TARGET_SSE && SSE_REGNO_P (regno)
1579 2435333035 : && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1580 : return true;
1581 :
1582 1353832268 : if (!TARGET_64BIT)
1583 128937130 : return (regno < REGPARM_MAX
1584 128937130 : || (TARGET_MMX && MMX_REGNO_P (regno)
1585 11588488 : && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1586 :
1587 : /* TODO: The function should depend on current function ABI but
1588 : builtins.cc would need updating then. Therefore we use the
1589 : default ABI. */
1590 1224895138 : call_abi = ix86_cfun_abi ();
1591 :
1592 : /* RAX is used as hidden argument to va_arg functions. */
1593 1224895138 : if (call_abi == SYSV_ABI && regno == AX_REG)
1594 : return true;
1595 :
1596 1210743161 : if (cfun
1597 1210742829 : && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
1598 : parm_regs = x86_64_preserve_none_int_parameter_registers;
1599 1210724901 : else if (call_abi == MS_ABI)
1600 : parm_regs = x86_64_ms_abi_int_parameter_registers;
1601 : else
1602 1174762613 : parm_regs = x86_64_int_parameter_registers;
1603 :
1604 16203490160 : for (i = 0; i < (call_abi == MS_ABI
1605 8101745080 : ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1606 6977598547 : if (regno == parm_regs[i])
1607 : return true;
1608 : return false;
1609 : }
1610 :
1611 : /* Return if we do not know how to pass ARG solely in registers. */
1612 :
1613 : static bool
1614 427359253 : ix86_must_pass_in_stack (const function_arg_info &arg)
1615 : {
1616 427359253 : if (must_pass_in_stack_var_size_or_pad (arg))
1617 : return true;
1618 :
1619 : /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1620 : The layout_type routine is crafty and tries to trick us into passing
1621 : currently unsupported vector types on the stack by using TImode. */
1622 1766503 : return (!TARGET_64BIT && arg.mode == TImode
1623 427359216 : && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1624 : }
1625 :
1626 : /* It returns the size, in bytes, of the area reserved for arguments passed
1627 : in registers for the function represented by fndecl dependent to the used
1628 : abi format. */
1629 : int
1630 10615491 : ix86_reg_parm_stack_space (const_tree fndecl)
1631 : {
1632 10615491 : enum calling_abi call_abi = SYSV_ABI;
1633 10615491 : if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1634 10299472 : call_abi = ix86_function_abi (fndecl);
1635 : else
1636 316019 : call_abi = ix86_function_type_abi (fndecl);
1637 10615491 : if (TARGET_64BIT && call_abi == MS_ABI)
1638 119238 : return 32;
1639 : return 0;
1640 : }
1641 :
1642 : /* We add this as a workaround in order to use libc_has_function
1643 : hook in i386.md. */
1644 : bool
1645 0 : ix86_libc_has_function (enum function_class fn_class)
1646 : {
1647 0 : return targetm.libc_has_function (fn_class, NULL_TREE);
1648 : }
1649 :
1650 : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1651 : specifying the call abi used. */
1652 : enum calling_abi
1653 457195970 : ix86_function_type_abi (const_tree fntype)
1654 : {
1655 457195970 : enum calling_abi abi = ix86_abi;
1656 :
1657 457195970 : if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1658 : return abi;
1659 :
1660 17603638 : if (abi == SYSV_ABI
1661 17603638 : && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1662 : {
1663 2601385 : static int warned;
1664 2601385 : if (TARGET_X32 && !warned)
1665 : {
1666 1 : error ("X32 does not support %<ms_abi%> attribute");
1667 1 : warned = 1;
1668 : }
1669 :
1670 : abi = MS_ABI;
1671 : }
1672 15002253 : else if (abi == MS_ABI
1673 15002253 : && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1674 : abi = SYSV_ABI;
1675 :
1676 : return abi;
1677 : }
1678 :
1679 : enum calling_abi
1680 224030231 : ix86_function_abi (const_tree fndecl)
1681 : {
1682 224030231 : return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1683 : }
1684 :
1685 : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1686 : specifying the call abi used. */
1687 : enum calling_abi
1688 2071341419 : ix86_cfun_abi (void)
1689 : {
1690 2071341419 : return cfun ? cfun->machine->call_abi : ix86_abi;
1691 : }
1692 :
1693 : bool
1694 5014605 : ix86_function_ms_hook_prologue (const_tree fn)
1695 : {
1696 5014605 : if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1697 : {
1698 8 : if (decl_function_context (fn) != NULL_TREE)
1699 0 : error_at (DECL_SOURCE_LOCATION (fn),
1700 : "%<ms_hook_prologue%> attribute is not compatible "
1701 : "with nested function");
1702 : else
1703 : return true;
1704 : }
1705 : return false;
1706 : }
1707 :
1708 : bool
1709 119273622 : ix86_function_naked (const_tree fn)
1710 : {
1711 119273622 : if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1712 : return true;
1713 :
1714 : return false;
1715 : }
1716 :
1717 : /* Write the extra assembler code needed to declare a function properly. */
1718 :
1719 : void
1720 1541054 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
1721 : tree decl)
1722 : {
1723 1541054 : bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1724 :
1725 1541054 : if (cfun)
1726 1537467 : cfun->machine->function_label_emitted = true;
1727 :
1728 1541054 : if (is_ms_hook)
1729 : {
1730 2 : int i, filler_count = (TARGET_64BIT ? 32 : 16);
1731 2 : unsigned int filler_cc = 0xcccccccc;
1732 :
1733 18 : for (i = 0; i < filler_count; i += 4)
1734 16 : fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1735 : }
1736 :
1737 : #ifdef SUBTARGET_ASM_UNWIND_INIT
1738 : SUBTARGET_ASM_UNWIND_INIT (out_file);
1739 : #endif
1740 :
1741 1541054 : assemble_function_label_raw (out_file, fname);
1742 :
1743 : /* Output magic byte marker, if hot-patch attribute is set. */
1744 1541054 : if (is_ms_hook)
1745 : {
1746 2 : if (TARGET_64BIT)
1747 : {
1748 : /* leaq [%rsp + 0], %rsp */
1749 2 : fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1750 : out_file);
1751 : }
1752 : else
1753 : {
1754 : /* movl.s %edi, %edi
1755 : push %ebp
1756 : movl.s %esp, %ebp */
1757 0 : fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1758 : }
1759 : }
1760 1541054 : }
1761 :
1762 : /* Output a user-defined label. In AT&T syntax, registers are prefixed
1763 : with %, so labels require no punctuation. In Intel syntax, registers
1764 : are unprefixed, so labels may clash with registers or other operators,
1765 : and require quoting. */
1766 : void
1767 34914475 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1768 : {
1769 34914475 : if (ASSEMBLER_DIALECT == ASM_ATT)
1770 34913592 : fprintf (file, "%s%s", prefix, label);
1771 : else
1772 883 : fprintf (file, "\"%s%s\"", prefix, label);
1773 34914475 : }
1774 :
1775 : /* Implementation of call abi switching target hook. Specific to FNDECL
1776 : the specific call register sets are set. See also
1777 : ix86_conditional_register_usage for more details. */
1778 : void
1779 203695612 : ix86_call_abi_override (const_tree fndecl)
1780 : {
1781 203695612 : cfun->machine->call_abi = ix86_function_abi (fndecl);
1782 203695612 : }
1783 :
1784 : /* Return 1 if pseudo register should be created and used to hold
1785 : GOT address for PIC code. */
1786 : bool
1787 172832897 : ix86_use_pseudo_pic_reg (void)
1788 : {
1789 172832897 : if ((TARGET_64BIT
1790 161823371 : && (ix86_cmodel == CM_SMALL_PIC
1791 : || TARGET_PECOFF))
1792 167058148 : || !flag_pic)
1793 168072558 : return false;
1794 : return true;
1795 : }
1796 :
1797 : /* Initialize large model PIC register. */
1798 :
1799 : static void
1800 52 : ix86_init_large_pic_reg (unsigned int tmp_regno)
1801 : {
1802 52 : rtx_code_label *label;
1803 52 : rtx tmp_reg;
1804 :
1805 52 : gcc_assert (Pmode == DImode);
1806 52 : label = gen_label_rtx ();
1807 52 : emit_label (label);
1808 52 : LABEL_PRESERVE_P (label) = 1;
1809 52 : tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1810 52 : gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1811 52 : emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1812 : label));
1813 52 : emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1814 52 : emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1815 52 : const char *name = LABEL_NAME (label);
1816 52 : PUT_CODE (label, NOTE);
1817 52 : NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1818 52 : NOTE_DELETED_LABEL_NAME (label) = name;
1819 52 : }
1820 :
1821 : /* Create and initialize PIC register if required. */
1822 : static void
1823 1471362 : ix86_init_pic_reg (void)
1824 : {
1825 1471362 : edge entry_edge;
1826 1471362 : rtx_insn *seq;
1827 :
1828 1471362 : if (!ix86_use_pseudo_pic_reg ())
1829 : return;
1830 :
1831 40230 : start_sequence ();
1832 :
1833 40230 : if (TARGET_64BIT)
1834 : {
1835 65 : if (ix86_cmodel == CM_LARGE_PIC)
1836 49 : ix86_init_large_pic_reg (R11_REG);
1837 : else
1838 16 : emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1839 : }
1840 : else
1841 : {
1842 : /* If there is future mcount call in the function it is more profitable
1843 : to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1844 40165 : rtx reg = crtl->profile
1845 40165 : ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1846 40165 : : pic_offset_table_rtx;
1847 40165 : rtx_insn *insn = emit_insn (gen_set_got (reg));
1848 40165 : RTX_FRAME_RELATED_P (insn) = 1;
1849 40165 : if (crtl->profile)
1850 0 : emit_move_insn (pic_offset_table_rtx, reg);
1851 40165 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1852 : }
1853 :
1854 40230 : seq = end_sequence ();
1855 :
1856 40230 : entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1857 40230 : insert_insn_on_edge (seq, entry_edge);
1858 40230 : commit_one_edge_insertion (entry_edge);
1859 : }
1860 :
1861 : /* Initialize a variable CUM of type CUMULATIVE_ARGS
1862 : for a call to a function whose data type is FNTYPE.
1863 : For a library call, FNTYPE is 0. */
1864 :
1865 : void
1866 10333582 : init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1867 : tree fntype, /* tree ptr for function decl */
1868 : rtx libname, /* SYMBOL_REF of library name or 0 */
1869 : tree fndecl,
1870 : int caller)
1871 : {
1872 10333582 : struct cgraph_node *local_info_node = NULL;
1873 10333582 : struct cgraph_node *target = NULL;
1874 :
1875 : /* Set silent_p to false to raise an error for invalid calls when
1876 : expanding function body. */
1877 10333582 : cfun->machine->silent_p = false;
1878 :
1879 10333582 : memset (cum, 0, sizeof (*cum));
1880 :
1881 10333582 : tree preserve_none_type;
1882 10333582 : if (fndecl)
1883 : {
1884 9988052 : target = cgraph_node::get (fndecl);
1885 9988052 : if (target)
1886 : {
1887 9844867 : target = target->function_symbol ();
1888 9844867 : local_info_node = cgraph_node::local_info_node (target->decl);
1889 9844867 : cum->call_abi = ix86_function_abi (target->decl);
1890 9844867 : preserve_none_type = TREE_TYPE (target->decl);
1891 : }
1892 : else
1893 : {
1894 143185 : cum->call_abi = ix86_function_abi (fndecl);
1895 143185 : preserve_none_type = TREE_TYPE (fndecl);
1896 : }
1897 : }
1898 : else
1899 : {
1900 345530 : cum->call_abi = ix86_function_type_abi (fntype);
1901 345530 : preserve_none_type = fntype;
1902 : }
1903 10333582 : cum->preserve_none_abi
1904 10333582 : = (preserve_none_type
1905 20549768 : && (lookup_attribute ("preserve_none",
1906 10216186 : TYPE_ATTRIBUTES (preserve_none_type))
1907 : != nullptr));
1908 :
1909 10333582 : cum->caller = caller;
1910 :
1911 : /* Set up the number of registers to use for passing arguments. */
1912 10333582 : cum->nregs = ix86_regparm;
1913 10333582 : if (TARGET_64BIT)
1914 : {
1915 9301409 : cum->nregs = (cum->call_abi == SYSV_ABI
1916 9301409 : ? X86_64_REGPARM_MAX
1917 : : X86_64_MS_REGPARM_MAX);
1918 : }
1919 10333582 : if (TARGET_SSE)
1920 : {
1921 10324483 : cum->sse_nregs = SSE_REGPARM_MAX;
1922 10324483 : if (TARGET_64BIT)
1923 : {
1924 9292430 : cum->sse_nregs = (cum->call_abi == SYSV_ABI
1925 9292430 : ? X86_64_SSE_REGPARM_MAX
1926 : : X86_64_MS_SSE_REGPARM_MAX);
1927 : }
1928 : }
1929 10333582 : if (TARGET_MMX)
1930 11152983 : cum->mmx_nregs = MMX_REGPARM_MAX;
1931 10333582 : cum->warn_avx512f = true;
1932 10333582 : cum->warn_avx = true;
1933 10333582 : cum->warn_sse = true;
1934 10333582 : cum->warn_mmx = true;
1935 :
1936 : /* Because type might mismatch in between caller and callee, we need to
1937 : use actual type of function for local calls.
1938 : FIXME: cgraph_analyze can be told to actually record if function uses
1939 : va_start so for local functions maybe_vaarg can be made aggressive
1940 : helping K&R code.
1941 : FIXME: once typesytem is fixed, we won't need this code anymore. */
1942 10333582 : if (local_info_node && local_info_node->local
1943 419392 : && local_info_node->can_change_signature)
1944 396148 : fntype = TREE_TYPE (target->decl);
1945 10333582 : cum->stdarg = stdarg_p (fntype);
1946 20667164 : cum->maybe_vaarg = (fntype
1947 10798944 : ? (!prototype_p (fntype) || stdarg_p (fntype))
1948 117396 : : !libname);
1949 :
1950 10333582 : cum->decl = fndecl;
1951 :
1952 10333582 : cum->warn_empty = !warn_abi || cum->stdarg;
1953 10333582 : if (!cum->warn_empty && fntype)
1954 : {
1955 2671455 : function_args_iterator iter;
1956 2671455 : tree argtype;
1957 2671455 : bool seen_empty_type = false;
1958 7434570 : FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1959 : {
1960 7434507 : if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1961 : break;
1962 4781990 : if (TYPE_EMPTY_P (argtype))
1963 : seen_empty_type = true;
1964 4713010 : else if (seen_empty_type)
1965 : {
1966 18875 : cum->warn_empty = true;
1967 18875 : break;
1968 : }
1969 : }
1970 : }
1971 :
1972 10333582 : if (!TARGET_64BIT)
1973 : {
1974 : /* If there are variable arguments, then we won't pass anything
1975 : in registers in 32-bit mode. */
1976 1032173 : if (stdarg_p (fntype))
1977 : {
1978 9043 : cum->nregs = 0;
1979 : /* Since in 32-bit, variable arguments are always passed on
1980 : stack, there is scratch register available for indirect
1981 : sibcall. */
1982 9043 : cfun->machine->arg_reg_available = true;
1983 9043 : cum->sse_nregs = 0;
1984 9043 : cum->mmx_nregs = 0;
1985 9043 : cum->warn_avx512f = false;
1986 9043 : cum->warn_avx = false;
1987 9043 : cum->warn_sse = false;
1988 9043 : cum->warn_mmx = false;
1989 9043 : return;
1990 : }
1991 :
1992 : /* Use ecx and edx registers if function has fastcall attribute,
1993 : else look for regparm information. */
1994 1023130 : if (fntype)
1995 : {
1996 1009970 : unsigned int ccvt = ix86_get_callcvt (fntype);
1997 1009970 : if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1998 : {
1999 0 : cum->nregs = 1;
2000 0 : cum->fastcall = 1; /* Same first register as in fastcall. */
2001 : }
2002 1009970 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
2003 : {
2004 4 : cum->nregs = 2;
2005 4 : cum->fastcall = 1;
2006 : }
2007 : else
2008 1009966 : cum->nregs = ix86_function_regparm (fntype, fndecl);
2009 : }
2010 :
2011 : /* Set up the number of SSE registers used for passing SFmode
2012 : and DFmode arguments. Warn for mismatching ABI. */
2013 1023130 : cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
2014 : }
2015 :
2016 10324539 : cfun->machine->arg_reg_available = (cum->nregs > 0);
2017 : }
2018 :
2019 : /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2020 : But in the case of vector types, it is some vector mode.
2021 :
2022 : When we have only some of our vector isa extensions enabled, then there
2023 : are some modes for which vector_mode_supported_p is false. For these
2024 : modes, the generic vector support in gcc will choose some non-vector mode
2025 : in order to implement the type. By computing the natural mode, we'll
2026 : select the proper ABI location for the operand and not depend on whatever
2027 : the middle-end decides to do with these vector types.
2028 :
2029 : The midde-end can't deal with the vector types > 16 bytes. In this
2030 : case, we return the original mode and warn ABI change if CUM isn't
2031 : NULL.
2032 :
2033 : If INT_RETURN is true, warn ABI change if the vector mode isn't
2034 : available for function return value. */
2035 :
2036 : static machine_mode
2037 239492175 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
2038 : bool in_return)
2039 : {
2040 239492175 : machine_mode mode = TYPE_MODE (type);
2041 :
2042 239492175 : if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
2043 : {
2044 445366 : HOST_WIDE_INT size = int_size_in_bytes (type);
2045 445366 : if ((size == 8 || size == 16 || size == 32 || size == 64)
2046 : /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2047 445366 : && TYPE_VECTOR_SUBPARTS (type) > 1)
2048 : {
2049 410406 : machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2050 :
2051 : /* There are no XFmode vector modes ... */
2052 410406 : if (innermode == XFmode)
2053 : return mode;
2054 :
2055 : /* ... and no decimal float vector modes. */
2056 409853 : if (DECIMAL_FLOAT_MODE_P (innermode))
2057 : return mode;
2058 :
2059 409560 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2060 : mode = MIN_MODE_VECTOR_FLOAT;
2061 : else
2062 345284 : mode = MIN_MODE_VECTOR_INT;
2063 :
2064 : /* Get the mode which has this inner mode and number of units. */
2065 8699949 : FOR_EACH_MODE_FROM (mode, mode)
2066 18095025 : if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2067 9804636 : && GET_MODE_INNER (mode) == innermode)
2068 : {
2069 409560 : if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2070 : {
2071 286438 : static bool warnedavx512f;
2072 286438 : static bool warnedavx512f_ret;
2073 :
2074 286438 : if (cum && cum->warn_avx512f && !warnedavx512f)
2075 : {
2076 1201 : if (warning (OPT_Wpsabi, "AVX512F vector argument "
2077 : "without AVX512F enabled changes the ABI"))
2078 2 : warnedavx512f = true;
2079 : }
2080 285237 : else if (in_return && !warnedavx512f_ret)
2081 : {
2082 282498 : if (warning (OPT_Wpsabi, "AVX512F vector return "
2083 : "without AVX512F enabled changes the ABI"))
2084 2 : warnedavx512f_ret = true;
2085 : }
2086 :
2087 286438 : return TYPE_MODE (type);
2088 : }
2089 123122 : else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2090 : {
2091 122573 : static bool warnedavx;
2092 122573 : static bool warnedavx_ret;
2093 :
2094 122573 : if (cum && cum->warn_avx && !warnedavx)
2095 : {
2096 610 : if (warning (OPT_Wpsabi, "AVX vector argument "
2097 : "without AVX enabled changes the ABI"))
2098 5 : warnedavx = true;
2099 : }
2100 121963 : else if (in_return && !warnedavx_ret)
2101 : {
2102 119961 : if (warning (OPT_Wpsabi, "AVX vector return "
2103 : "without AVX enabled changes the ABI"))
2104 7 : warnedavx_ret = true;
2105 : }
2106 :
2107 122573 : return TYPE_MODE (type);
2108 : }
2109 549 : else if (((size == 8 && TARGET_64BIT) || size == 16)
2110 546 : && !TARGET_SSE
2111 140 : && !TARGET_IAMCU)
2112 : {
2113 140 : static bool warnedsse;
2114 140 : static bool warnedsse_ret;
2115 :
2116 140 : if (cum && cum->warn_sse && !warnedsse)
2117 : {
2118 19 : if (warning (OPT_Wpsabi, "SSE vector argument "
2119 : "without SSE enabled changes the ABI"))
2120 6 : warnedsse = true;
2121 : }
2122 121 : else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2123 : {
2124 0 : if (warning (OPT_Wpsabi, "SSE vector return "
2125 : "without SSE enabled changes the ABI"))
2126 0 : warnedsse_ret = true;
2127 : }
2128 : }
2129 409 : else if ((size == 8 && !TARGET_64BIT)
2130 0 : && (!cfun
2131 0 : || cfun->machine->func_type == TYPE_NORMAL)
2132 0 : && !TARGET_MMX
2133 0 : && !TARGET_IAMCU)
2134 : {
2135 0 : static bool warnedmmx;
2136 0 : static bool warnedmmx_ret;
2137 :
2138 0 : if (cum && cum->warn_mmx && !warnedmmx)
2139 : {
2140 0 : if (warning (OPT_Wpsabi, "MMX vector argument "
2141 : "without MMX enabled changes the ABI"))
2142 0 : warnedmmx = true;
2143 : }
2144 0 : else if (in_return && !warnedmmx_ret)
2145 : {
2146 0 : if (warning (OPT_Wpsabi, "MMX vector return "
2147 : "without MMX enabled changes the ABI"))
2148 0 : warnedmmx_ret = true;
2149 : }
2150 : }
2151 549 : return mode;
2152 : }
2153 :
2154 0 : gcc_unreachable ();
2155 : }
2156 : }
2157 :
2158 : return mode;
2159 : }
2160 :
2161 : /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2162 : this may not agree with the mode that the type system has chosen for the
2163 : register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2164 : go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2165 :
2166 : static rtx
2167 36294137 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2168 : unsigned int regno)
2169 : {
2170 36294137 : rtx tmp;
2171 :
2172 36294137 : if (orig_mode != BLKmode)
2173 36294109 : tmp = gen_rtx_REG (orig_mode, regno);
2174 : else
2175 : {
2176 28 : tmp = gen_rtx_REG (mode, regno);
2177 28 : tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2178 28 : tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2179 : }
2180 :
2181 36294137 : return tmp;
2182 : }
2183 :
2184 : /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2185 : of this code is to classify each 8bytes of incoming argument by the register
2186 : class and assign registers accordingly. */
2187 :
2188 : /* Return the union class of CLASS1 and CLASS2.
2189 : See the x86-64 PS ABI for details. */
2190 :
2191 : static enum x86_64_reg_class
2192 62808408 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2193 : {
2194 : /* Rule #1: If both classes are equal, this is the resulting class. */
2195 61586067 : if (class1 == class2)
2196 : return class1;
2197 :
2198 : /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2199 : the other class. */
2200 54346915 : if (class1 == X86_64_NO_CLASS)
2201 : return class2;
2202 55152160 : if (class2 == X86_64_NO_CLASS)
2203 : return class1;
2204 :
2205 : /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2206 2365353 : if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2207 : return X86_64_MEMORY_CLASS;
2208 :
2209 : /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2210 2020313 : if ((class1 == X86_64_INTEGERSI_CLASS
2211 189412 : && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2212 2019107 : || (class2 == X86_64_INTEGERSI_CLASS
2213 982284 : && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2214 : return X86_64_INTEGERSI_CLASS;
2215 2015291 : if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2216 829324 : || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2217 : return X86_64_INTEGER_CLASS;
2218 :
2219 : /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2220 : MEMORY is used. */
2221 140672 : if (class1 == X86_64_X87_CLASS
2222 : || class1 == X86_64_X87UP_CLASS
2223 140672 : || class1 == X86_64_COMPLEX_X87_CLASS
2224 : || class2 == X86_64_X87_CLASS
2225 139767 : || class2 == X86_64_X87UP_CLASS
2226 59748 : || class2 == X86_64_COMPLEX_X87_CLASS)
2227 80924 : return X86_64_MEMORY_CLASS;
2228 :
2229 : /* Rule #6: Otherwise class SSE is used. */
2230 : return X86_64_SSE_CLASS;
2231 : }
2232 :
2233 : /* Classify the argument of type TYPE and mode MODE.
2234 : CLASSES will be filled by the register class used to pass each word
2235 : of the operand. The number of words is returned. In case the parameter
2236 : should be passed in memory, 0 is returned. As a special case for zero
2237 : sized containers, classes[0] will be NO_CLASS and 1 is returned.
2238 :
2239 : BIT_OFFSET is used internally for handling records and specifies offset
2240 : of the offset in bits modulo 512 to avoid overflow cases.
2241 :
2242 : See the x86-64 PS ABI for details.
2243 : */
2244 :
2245 : static int
2246 414648310 : classify_argument (machine_mode mode, const_tree type,
2247 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2248 : int &zero_width_bitfields)
2249 : {
2250 414648310 : HOST_WIDE_INT bytes
2251 822865857 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2252 414648310 : int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2253 :
2254 : /* Variable sized entities are always passed/returned in memory. */
2255 414648310 : if (bytes < 0)
2256 : return 0;
2257 :
2258 414647147 : if (mode != VOIDmode)
2259 : {
2260 : /* The value of "named" doesn't matter. */
2261 412229637 : function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2262 412229637 : if (targetm.calls.must_pass_in_stack (arg))
2263 37 : return 0;
2264 : }
2265 :
2266 414647110 : if (type && (AGGREGATE_TYPE_P (type)
2267 373835385 : || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2268 : {
2269 41894425 : int i;
2270 41894425 : tree field;
2271 41894425 : enum x86_64_reg_class subclasses[MAX_CLASSES];
2272 :
2273 : /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2274 41894425 : if (bytes > 64)
2275 : return 0;
2276 :
2277 104174867 : for (i = 0; i < words; i++)
2278 63100574 : classes[i] = X86_64_NO_CLASS;
2279 :
2280 : /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2281 : signalize memory class, so handle it as special case. */
2282 41074293 : if (!words)
2283 : {
2284 83076 : classes[0] = X86_64_NO_CLASS;
2285 83076 : return 1;
2286 : }
2287 :
2288 : /* Classify each field of record and merge classes. */
2289 40991217 : switch (TREE_CODE (type))
2290 : {
2291 38722761 : case RECORD_TYPE:
2292 : /* And now merge the fields of structure. */
2293 1131668136 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2294 : {
2295 1093565101 : if (TREE_CODE (field) == FIELD_DECL)
2296 : {
2297 56648403 : int num;
2298 :
2299 56648403 : if (TREE_TYPE (field) == error_mark_node)
2300 4 : continue;
2301 :
2302 : /* Bitfields are always classified as integer. Handle them
2303 : early, since later code would consider them to be
2304 : misaligned integers. */
2305 56648399 : if (DECL_BIT_FIELD (field))
2306 : {
2307 1231602 : if (integer_zerop (DECL_SIZE (field)))
2308 : {
2309 12902 : if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2310 8048 : continue;
2311 4854 : if (zero_width_bitfields != 2)
2312 : {
2313 4320 : zero_width_bitfields = 1;
2314 4320 : continue;
2315 : }
2316 : }
2317 1219234 : for (i = (int_bit_position (field)
2318 1219234 : + (bit_offset % 64)) / 8 / 8;
2319 2441575 : i < ((int_bit_position (field) + (bit_offset % 64))
2320 2441575 : + tree_to_shwi (DECL_SIZE (field))
2321 2441575 : + 63) / 8 / 8; i++)
2322 1222341 : classes[i]
2323 2444682 : = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2324 : }
2325 : else
2326 : {
2327 55416797 : int pos;
2328 :
2329 55416797 : type = TREE_TYPE (field);
2330 :
2331 : /* Flexible array member is ignored. */
2332 55416797 : if (TYPE_MODE (type) == BLKmode
2333 772099 : && TREE_CODE (type) == ARRAY_TYPE
2334 170240 : && TYPE_SIZE (type) == NULL_TREE
2335 2013 : && TYPE_DOMAIN (type) != NULL_TREE
2336 55418045 : && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2337 : == NULL_TREE))
2338 : {
2339 1248 : static bool warned;
2340 :
2341 1248 : if (!warned && warn_psabi)
2342 : {
2343 3 : warned = true;
2344 3 : inform (input_location,
2345 : "the ABI of passing struct with"
2346 : " a flexible array member has"
2347 : " changed in GCC 4.4");
2348 : }
2349 1248 : continue;
2350 1248 : }
2351 55415549 : num = classify_argument (TYPE_MODE (type), type,
2352 : subclasses,
2353 55415549 : (int_bit_position (field)
2354 55415549 : + bit_offset) % 512,
2355 : zero_width_bitfields);
2356 55415549 : if (!num)
2357 : return 0;
2358 54795823 : pos = (int_bit_position (field)
2359 54795823 : + (bit_offset % 64)) / 8 / 8;
2360 113081891 : for (i = 0; i < num && (i + pos) < words; i++)
2361 58286068 : classes[i + pos]
2362 58286068 : = merge_classes (subclasses[i], classes[i + pos]);
2363 : }
2364 : }
2365 : }
2366 : break;
2367 :
2368 645058 : case ARRAY_TYPE:
2369 : /* Arrays are handled as small records. */
2370 645058 : {
2371 645058 : int num;
2372 645058 : num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2373 645058 : TREE_TYPE (type), subclasses, bit_offset,
2374 : zero_width_bitfields);
2375 645058 : if (!num)
2376 : return 0;
2377 :
2378 : /* The partial classes are now full classes. */
2379 629639 : if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2380 13850 : subclasses[0] = X86_64_SSE_CLASS;
2381 629639 : if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2382 5126 : subclasses[0] = X86_64_SSE_CLASS;
2383 629639 : if (subclasses[0] == X86_64_INTEGERSI_CLASS
2384 164677 : && !((bit_offset % 64) == 0 && bytes == 4))
2385 132885 : subclasses[0] = X86_64_INTEGER_CLASS;
2386 :
2387 1725965 : for (i = 0; i < words; i++)
2388 1096326 : classes[i] = subclasses[i % num];
2389 :
2390 : break;
2391 : }
2392 323433 : case UNION_TYPE:
2393 323433 : case QUAL_UNION_TYPE:
2394 : /* Unions are similar to RECORD_TYPE but offset is always 0.
2395 : */
2396 4180680 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2397 : {
2398 3892512 : if (TREE_CODE (field) == FIELD_DECL)
2399 : {
2400 2276528 : int num;
2401 :
2402 2276528 : if (TREE_TYPE (field) == error_mark_node)
2403 10 : continue;
2404 :
2405 2276518 : num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2406 2276518 : TREE_TYPE (field), subclasses,
2407 : bit_offset, zero_width_bitfields);
2408 2276518 : if (!num)
2409 : return 0;
2410 5541252 : for (i = 0; i < num && i < words; i++)
2411 3299999 : classes[i] = merge_classes (subclasses[i], classes[i]);
2412 : }
2413 : }
2414 : break;
2415 :
2416 1299965 : case BITINT_TYPE:
2417 : /* _BitInt(N) for N > 64 is passed as structure containing
2418 : (N + 63) / 64 64-bit elements. */
2419 1299965 : if (words > 2)
2420 : return 0;
2421 74994 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2422 74994 : return 2;
2423 :
2424 0 : default:
2425 0 : gcc_unreachable ();
2426 : }
2427 :
2428 39020842 : if (words > 2)
2429 : {
2430 : /* When size > 16 bytes, if the first one isn't
2431 : X86_64_SSE_CLASS or any other ones aren't
2432 : X86_64_SSEUP_CLASS, everything should be passed in
2433 : memory. */
2434 1833994 : if (classes[0] != X86_64_SSE_CLASS)
2435 : return 0;
2436 :
2437 193147 : for (i = 1; i < words; i++)
2438 174958 : if (classes[i] != X86_64_SSEUP_CLASS)
2439 : return 0;
2440 : }
2441 :
2442 : /* Final merger cleanup. */
2443 86407386 : for (i = 0; i < words; i++)
2444 : {
2445 : /* If one class is MEMORY, everything should be passed in
2446 : memory. */
2447 49284930 : if (classes[i] == X86_64_MEMORY_CLASS)
2448 : return 0;
2449 :
2450 : /* The X86_64_SSEUP_CLASS should be always preceded by
2451 : X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2452 49204719 : if (classes[i] == X86_64_SSEUP_CLASS
2453 148570 : && classes[i - 1] != X86_64_SSE_CLASS
2454 76550 : && classes[i - 1] != X86_64_SSEUP_CLASS)
2455 : {
2456 : /* The first one should never be X86_64_SSEUP_CLASS. */
2457 1916 : gcc_assert (i != 0);
2458 1916 : classes[i] = X86_64_SSE_CLASS;
2459 : }
2460 :
2461 : /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2462 : everything should be passed in memory. */
2463 49204719 : if (classes[i] == X86_64_X87UP_CLASS
2464 178687 : && (classes[i - 1] != X86_64_X87_CLASS))
2465 : {
2466 2370 : static bool warned;
2467 :
2468 : /* The first one should never be X86_64_X87UP_CLASS. */
2469 2370 : gcc_assert (i != 0);
2470 2370 : if (!warned && warn_psabi)
2471 : {
2472 1 : warned = true;
2473 1 : inform (input_location,
2474 : "the ABI of passing union with %<long double%>"
2475 : " has changed in GCC 4.4");
2476 : }
2477 2370 : return 0;
2478 : }
2479 : }
2480 : return words;
2481 : }
2482 :
2483 : /* Compute alignment needed. We align all types to natural boundaries with
2484 : exception of XFmode that is aligned to 64bits. */
2485 372752685 : if (mode != VOIDmode && mode != BLKmode)
2486 : {
2487 369892960 : int mode_alignment = GET_MODE_BITSIZE (mode);
2488 :
2489 369892960 : if (mode == XFmode)
2490 : mode_alignment = 128;
2491 362958497 : else if (mode == XCmode)
2492 550777 : mode_alignment = 256;
2493 369892960 : if (COMPLEX_MODE_P (mode))
2494 2296892 : mode_alignment /= 2;
2495 : /* Misaligned fields are always returned in memory. */
2496 369892960 : if (bit_offset % mode_alignment)
2497 : return 0;
2498 : }
2499 :
2500 : /* for V1xx modes, just use the base mode */
2501 372745052 : if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2502 465888036 : && GET_MODE_UNIT_SIZE (mode) == bytes)
2503 6214 : mode = GET_MODE_INNER (mode);
2504 :
2505 : /* Classification of atomic types. */
2506 372745052 : switch (mode)
2507 : {
2508 207775 : case E_SDmode:
2509 207775 : case E_DDmode:
2510 207775 : classes[0] = X86_64_SSE_CLASS;
2511 207775 : return 1;
2512 99208 : case E_TDmode:
2513 99208 : classes[0] = X86_64_SSE_CLASS;
2514 99208 : classes[1] = X86_64_SSEUP_CLASS;
2515 99208 : return 2;
2516 248487290 : case E_DImode:
2517 248487290 : case E_SImode:
2518 248487290 : case E_HImode:
2519 248487290 : case E_QImode:
2520 248487290 : case E_CSImode:
2521 248487290 : case E_CHImode:
2522 248487290 : case E_CQImode:
2523 248487290 : {
2524 248487290 : int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2525 :
2526 : /* Analyze last 128 bits only. */
2527 248487290 : size = (size - 1) & 0x7f;
2528 :
2529 248487290 : if (size < 32)
2530 : {
2531 105395908 : classes[0] = X86_64_INTEGERSI_CLASS;
2532 105395908 : return 1;
2533 : }
2534 143091382 : else if (size < 64)
2535 : {
2536 130905384 : classes[0] = X86_64_INTEGER_CLASS;
2537 130905384 : return 1;
2538 : }
2539 12185998 : else if (size < 64+32)
2540 : {
2541 4328617 : classes[0] = X86_64_INTEGER_CLASS;
2542 4328617 : classes[1] = X86_64_INTEGERSI_CLASS;
2543 4328617 : return 2;
2544 : }
2545 7857381 : else if (size < 64+64)
2546 : {
2547 7857381 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2548 7857381 : return 2;
2549 : }
2550 : else
2551 : gcc_unreachable ();
2552 : }
2553 2510003 : case E_CDImode:
2554 2510003 : case E_TImode:
2555 2510003 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2556 2510003 : return 2;
2557 0 : case E_COImode:
2558 0 : case E_OImode:
2559 : /* OImode shouldn't be used directly. */
2560 0 : gcc_unreachable ();
2561 : case E_CTImode:
2562 : return 0;
2563 905724 : case E_HFmode:
2564 905724 : case E_BFmode:
2565 905724 : if (!(bit_offset % 64))
2566 903174 : classes[0] = X86_64_SSEHF_CLASS;
2567 : else
2568 2550 : classes[0] = X86_64_SSE_CLASS;
2569 : return 1;
2570 9783984 : case E_SFmode:
2571 9783984 : if (!(bit_offset % 64))
2572 9730524 : classes[0] = X86_64_SSESF_CLASS;
2573 : else
2574 53460 : classes[0] = X86_64_SSE_CLASS;
2575 : return 1;
2576 4397769 : case E_DFmode:
2577 4397769 : classes[0] = X86_64_SSEDF_CLASS;
2578 4397769 : return 1;
2579 6933747 : case E_XFmode:
2580 6933747 : classes[0] = X86_64_X87_CLASS;
2581 6933747 : classes[1] = X86_64_X87UP_CLASS;
2582 6933747 : return 2;
2583 1314218 : case E_TFmode:
2584 1314218 : classes[0] = X86_64_SSE_CLASS;
2585 1314218 : classes[1] = X86_64_SSEUP_CLASS;
2586 1314218 : return 2;
2587 71812 : case E_HCmode:
2588 71812 : case E_BCmode:
2589 71812 : classes[0] = X86_64_SSE_CLASS;
2590 71812 : if (!(bit_offset % 64))
2591 : return 1;
2592 : else
2593 : {
2594 98 : classes[1] = X86_64_SSEHF_CLASS;
2595 98 : return 2;
2596 : }
2597 685787 : case E_SCmode:
2598 685787 : classes[0] = X86_64_SSE_CLASS;
2599 685787 : if (!(bit_offset % 64))
2600 : return 1;
2601 : else
2602 : {
2603 1119 : static bool warned;
2604 :
2605 1119 : if (!warned && warn_psabi)
2606 : {
2607 2 : warned = true;
2608 2 : inform (input_location,
2609 : "the ABI of passing structure with %<complex float%>"
2610 : " member has changed in GCC 4.4");
2611 : }
2612 1119 : classes[1] = X86_64_SSESF_CLASS;
2613 1119 : return 2;
2614 : }
2615 695561 : case E_DCmode:
2616 695561 : classes[0] = X86_64_SSEDF_CLASS;
2617 695561 : classes[1] = X86_64_SSEDF_CLASS;
2618 695561 : return 2;
2619 550777 : case E_XCmode:
2620 550777 : classes[0] = X86_64_COMPLEX_X87_CLASS;
2621 550777 : return 1;
2622 : case E_TCmode:
2623 : /* This modes is larger than 16 bytes. */
2624 : return 0;
2625 25280716 : case E_V8SFmode:
2626 25280716 : case E_V8SImode:
2627 25280716 : case E_V32QImode:
2628 25280716 : case E_V16HFmode:
2629 25280716 : case E_V16BFmode:
2630 25280716 : case E_V16HImode:
2631 25280716 : case E_V4DFmode:
2632 25280716 : case E_V4DImode:
2633 25280716 : classes[0] = X86_64_SSE_CLASS;
2634 25280716 : classes[1] = X86_64_SSEUP_CLASS;
2635 25280716 : classes[2] = X86_64_SSEUP_CLASS;
2636 25280716 : classes[3] = X86_64_SSEUP_CLASS;
2637 25280716 : return 4;
2638 27412912 : case E_V8DFmode:
2639 27412912 : case E_V16SFmode:
2640 27412912 : case E_V32HFmode:
2641 27412912 : case E_V32BFmode:
2642 27412912 : case E_V8DImode:
2643 27412912 : case E_V16SImode:
2644 27412912 : case E_V32HImode:
2645 27412912 : case E_V64QImode:
2646 27412912 : classes[0] = X86_64_SSE_CLASS;
2647 27412912 : classes[1] = X86_64_SSEUP_CLASS;
2648 27412912 : classes[2] = X86_64_SSEUP_CLASS;
2649 27412912 : classes[3] = X86_64_SSEUP_CLASS;
2650 27412912 : classes[4] = X86_64_SSEUP_CLASS;
2651 27412912 : classes[5] = X86_64_SSEUP_CLASS;
2652 27412912 : classes[6] = X86_64_SSEUP_CLASS;
2653 27412912 : classes[7] = X86_64_SSEUP_CLASS;
2654 27412912 : return 8;
2655 37175376 : case E_V4SFmode:
2656 37175376 : case E_V4SImode:
2657 37175376 : case E_V16QImode:
2658 37175376 : case E_V8HImode:
2659 37175376 : case E_V8HFmode:
2660 37175376 : case E_V8BFmode:
2661 37175376 : case E_V2DFmode:
2662 37175376 : case E_V2DImode:
2663 37175376 : classes[0] = X86_64_SSE_CLASS;
2664 37175376 : classes[1] = X86_64_SSEUP_CLASS;
2665 37175376 : return 2;
2666 3249819 : case E_V1TImode:
2667 3249819 : case E_V1DImode:
2668 3249819 : case E_V2SFmode:
2669 3249819 : case E_V2SImode:
2670 3249819 : case E_V4HImode:
2671 3249819 : case E_V4HFmode:
2672 3249819 : case E_V4BFmode:
2673 3249819 : case E_V2HFmode:
2674 3249819 : case E_V2BFmode:
2675 3249819 : case E_V8QImode:
2676 3249819 : classes[0] = X86_64_SSE_CLASS;
2677 3249819 : return 1;
2678 : case E_BLKmode:
2679 : case E_VOIDmode:
2680 : return 0;
2681 37666 : default:
2682 37666 : gcc_assert (VECTOR_MODE_P (mode));
2683 :
2684 37666 : if (bytes > 16)
2685 : return 0;
2686 :
2687 45608 : gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2688 :
2689 45608 : if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2690 22364 : classes[0] = X86_64_INTEGERSI_CLASS;
2691 : else
2692 440 : classes[0] = X86_64_INTEGER_CLASS;
2693 22804 : classes[1] = X86_64_INTEGER_CLASS;
2694 22804 : return 1 + (bytes > 8);
2695 : }
2696 : }
2697 :
2698 : /* Wrapper around classify_argument with the extra zero_width_bitfields
2699 : argument, to diagnose GCC 12.1 ABI differences for C. */
2700 :
2701 : static int
2702 356310651 : classify_argument (machine_mode mode, const_tree type,
2703 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2704 : {
2705 356310651 : int zero_width_bitfields = 0;
2706 356310651 : static bool warned = false;
2707 356310651 : int n = classify_argument (mode, type, classes, bit_offset,
2708 : zero_width_bitfields);
2709 356310651 : if (!zero_width_bitfields || warned || !warn_psabi)
2710 : return n;
2711 534 : enum x86_64_reg_class alt_classes[MAX_CLASSES];
2712 534 : zero_width_bitfields = 2;
2713 534 : if (classify_argument (mode, type, alt_classes, bit_offset,
2714 : zero_width_bitfields) != n)
2715 0 : zero_width_bitfields = 3;
2716 : else
2717 1286 : for (int i = 0; i < n; i++)
2718 760 : if (classes[i] != alt_classes[i])
2719 : {
2720 8 : zero_width_bitfields = 3;
2721 8 : break;
2722 : }
2723 534 : if (zero_width_bitfields == 3)
2724 : {
2725 8 : warned = true;
2726 8 : const char *url
2727 : = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2728 :
2729 8 : inform (input_location,
2730 : "the ABI of passing C structures with zero-width bit-fields"
2731 : " has changed in GCC %{12.1%}", url);
2732 : }
2733 : return n;
2734 : }
2735 :
2736 : /* Examine the argument and return set number of register required in each
2737 : class. Return true iff parameter should be passed in memory. */
2738 :
2739 : static bool
2740 240184910 : examine_argument (machine_mode mode, const_tree type, int in_return,
2741 : int *int_nregs, int *sse_nregs)
2742 : {
2743 240184910 : enum x86_64_reg_class regclass[MAX_CLASSES];
2744 240184910 : int n = classify_argument (mode, type, regclass, 0);
2745 :
2746 240184910 : *int_nregs = 0;
2747 240184910 : *sse_nregs = 0;
2748 :
2749 240184910 : if (!n)
2750 : return true;
2751 683768421 : for (n--; n >= 0; n--)
2752 450134806 : switch (regclass[n])
2753 : {
2754 162678014 : case X86_64_INTEGER_CLASS:
2755 162678014 : case X86_64_INTEGERSI_CLASS:
2756 162678014 : (*int_nregs)++;
2757 162678014 : break;
2758 74078869 : case X86_64_SSE_CLASS:
2759 74078869 : case X86_64_SSEHF_CLASS:
2760 74078869 : case X86_64_SSESF_CLASS:
2761 74078869 : case X86_64_SSEDF_CLASS:
2762 74078869 : (*sse_nregs)++;
2763 74078869 : break;
2764 : case X86_64_NO_CLASS:
2765 : case X86_64_SSEUP_CLASS:
2766 : break;
2767 9338405 : case X86_64_X87_CLASS:
2768 9338405 : case X86_64_X87UP_CLASS:
2769 9338405 : case X86_64_COMPLEX_X87_CLASS:
2770 9338405 : if (!in_return)
2771 : return true;
2772 : break;
2773 0 : case X86_64_MEMORY_CLASS:
2774 0 : gcc_unreachable ();
2775 : }
2776 :
2777 : return false;
2778 : }
2779 :
2780 : /* Construct container for the argument used by GCC interface. See
2781 : FUNCTION_ARG for the detailed description. */
2782 :
2783 : static rtx
2784 116125741 : construct_container (machine_mode mode, machine_mode orig_mode,
2785 : const_tree type, int in_return, int nintregs, int nsseregs,
2786 : const int *intreg, int sse_regno)
2787 : {
2788 : /* The following variables hold the static issued_error state. */
2789 116125741 : static bool issued_sse_arg_error;
2790 116125741 : static bool issued_sse_ret_error;
2791 116125741 : static bool issued_x87_ret_error;
2792 :
2793 116125741 : machine_mode tmpmode;
2794 116125741 : int bytes
2795 231581618 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2796 116125741 : enum x86_64_reg_class regclass[MAX_CLASSES];
2797 116125741 : int n;
2798 116125741 : int i;
2799 116125741 : int nexps = 0;
2800 116125741 : int needed_sseregs, needed_intregs;
2801 116125741 : rtx exp[MAX_CLASSES];
2802 116125741 : rtx ret;
2803 :
2804 116125741 : n = classify_argument (mode, type, regclass, 0);
2805 116125741 : if (!n)
2806 : return NULL;
2807 115663163 : if (examine_argument (mode, type, in_return, &needed_intregs,
2808 : &needed_sseregs))
2809 : return NULL;
2810 115613245 : if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2811 : return NULL;
2812 :
2813 : /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2814 : some less clueful developer tries to use floating-point anyway. */
2815 114513243 : if (needed_sseregs
2816 36608108 : && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2817 : {
2818 : /* Return early if we shouldn't raise an error for invalid
2819 : calls. */
2820 71 : if (cfun != NULL && cfun->machine->silent_p)
2821 : return NULL;
2822 39 : if (in_return)
2823 : {
2824 34 : if (!issued_sse_ret_error)
2825 : {
2826 16 : if (VALID_SSE2_TYPE_MODE (mode))
2827 5 : error ("SSE register return with SSE2 disabled");
2828 : else
2829 11 : error ("SSE register return with SSE disabled");
2830 16 : issued_sse_ret_error = true;
2831 : }
2832 : }
2833 5 : else if (!issued_sse_arg_error)
2834 : {
2835 5 : if (VALID_SSE2_TYPE_MODE (mode))
2836 0 : error ("SSE register argument with SSE2 disabled");
2837 : else
2838 5 : error ("SSE register argument with SSE disabled");
2839 5 : issued_sse_arg_error = true;
2840 : }
2841 39 : return NULL;
2842 : }
2843 :
2844 : /* Likewise, error if the ABI requires us to return values in the
2845 : x87 registers and the user specified -mno-80387. */
2846 114513172 : if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2847 1421778 : for (i = 0; i < n; i++)
2848 750500 : if (regclass[i] == X86_64_X87_CLASS
2849 : || regclass[i] == X86_64_X87UP_CLASS
2850 750500 : || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2851 : {
2852 : /* Return early if we shouldn't raise an error for invalid
2853 : calls. */
2854 16 : if (cfun != NULL && cfun->machine->silent_p)
2855 : return NULL;
2856 13 : if (!issued_x87_ret_error)
2857 : {
2858 8 : error ("x87 register return with x87 disabled");
2859 8 : issued_x87_ret_error = true;
2860 : }
2861 13 : return NULL;
2862 : }
2863 :
2864 : /* First construct simple cases. Avoid SCmode, since we want to use
2865 : single register to pass this type. */
2866 114513156 : if (n == 1 && mode != SCmode && mode != HCmode)
2867 77672318 : switch (regclass[0])
2868 : {
2869 71682597 : case X86_64_INTEGER_CLASS:
2870 71682597 : case X86_64_INTEGERSI_CLASS:
2871 71682597 : return gen_rtx_REG (mode, intreg[0]);
2872 5791262 : case X86_64_SSE_CLASS:
2873 5791262 : case X86_64_SSEHF_CLASS:
2874 5791262 : case X86_64_SSESF_CLASS:
2875 5791262 : case X86_64_SSEDF_CLASS:
2876 5791262 : if (mode != BLKmode)
2877 11581716 : return gen_reg_or_parallel (mode, orig_mode,
2878 11581716 : GET_SSE_REGNO (sse_regno));
2879 : break;
2880 170369 : case X86_64_X87_CLASS:
2881 170369 : case X86_64_COMPLEX_X87_CLASS:
2882 170369 : return gen_rtx_REG (mode, FIRST_STACK_REG);
2883 : case X86_64_NO_CLASS:
2884 : /* Zero sized array, struct or class. */
2885 : return NULL;
2886 0 : default:
2887 0 : gcc_unreachable ();
2888 : }
2889 36841242 : if (n == 2
2890 19088707 : && regclass[0] == X86_64_SSE_CLASS
2891 12818130 : && regclass[1] == X86_64_SSEUP_CLASS
2892 12813220 : && mode != BLKmode)
2893 25626440 : return gen_reg_or_parallel (mode, orig_mode,
2894 25626440 : GET_SSE_REGNO (sse_regno));
2895 24028022 : if (n == 4
2896 8410452 : && regclass[0] == X86_64_SSE_CLASS
2897 8410452 : && regclass[1] == X86_64_SSEUP_CLASS
2898 8410452 : && regclass[2] == X86_64_SSEUP_CLASS
2899 8410452 : && regclass[3] == X86_64_SSEUP_CLASS
2900 8410452 : && mode != BLKmode)
2901 16817526 : return gen_reg_or_parallel (mode, orig_mode,
2902 16817526 : GET_SSE_REGNO (sse_regno));
2903 15619259 : if (n == 8
2904 9107113 : && regclass[0] == X86_64_SSE_CLASS
2905 9107113 : && regclass[1] == X86_64_SSEUP_CLASS
2906 9107113 : && regclass[2] == X86_64_SSEUP_CLASS
2907 9107113 : && regclass[3] == X86_64_SSEUP_CLASS
2908 9107113 : && regclass[4] == X86_64_SSEUP_CLASS
2909 9107113 : && regclass[5] == X86_64_SSEUP_CLASS
2910 9107113 : && regclass[6] == X86_64_SSEUP_CLASS
2911 9107113 : && regclass[7] == X86_64_SSEUP_CLASS
2912 9107113 : && mode != BLKmode)
2913 18209954 : return gen_reg_or_parallel (mode, orig_mode,
2914 18209954 : GET_SSE_REGNO (sse_regno));
2915 6514282 : if (n == 2
2916 6275487 : && regclass[0] == X86_64_X87_CLASS
2917 2229893 : && regclass[1] == X86_64_X87UP_CLASS)
2918 2229893 : return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2919 :
2920 4284389 : if (n == 2
2921 4045594 : && regclass[0] == X86_64_INTEGER_CLASS
2922 3645100 : && regclass[1] == X86_64_INTEGER_CLASS
2923 3636814 : && (mode == CDImode || mode == TImode || mode == BLKmode)
2924 3636814 : && intreg[0] + 1 == intreg[1])
2925 : {
2926 3318453 : if (mode == BLKmode)
2927 : {
2928 : /* Use TImode for BLKmode values in 2 integer registers. */
2929 496088 : exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2930 248044 : gen_rtx_REG (TImode, intreg[0]),
2931 : GEN_INT (0));
2932 248044 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2933 248044 : XVECEXP (ret, 0, 0) = exp[0];
2934 248044 : return ret;
2935 : }
2936 : else
2937 3070409 : return gen_rtx_REG (mode, intreg[0]);
2938 : }
2939 :
2940 : /* Otherwise figure out the entries of the PARALLEL. */
2941 2659013 : for (i = 0; i < n; i++)
2942 : {
2943 1693077 : int pos;
2944 :
2945 1693077 : switch (regclass[i])
2946 : {
2947 : case X86_64_NO_CLASS:
2948 : break;
2949 955045 : case X86_64_INTEGER_CLASS:
2950 955045 : case X86_64_INTEGERSI_CLASS:
2951 : /* Merge TImodes on aligned occasions here too. */
2952 955045 : if (i * 8 + 8 > bytes)
2953 : {
2954 3224 : unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2955 3224 : if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2956 : /* We've requested 24 bytes we
2957 : don't have mode for. Use DImode. */
2958 357 : tmpmode = DImode;
2959 : }
2960 951821 : else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2961 : tmpmode = SImode;
2962 : else
2963 798123 : tmpmode = DImode;
2964 1910090 : exp [nexps++]
2965 955045 : = gen_rtx_EXPR_LIST (VOIDmode,
2966 955045 : gen_rtx_REG (tmpmode, *intreg),
2967 955045 : GEN_INT (i*8));
2968 955045 : intreg++;
2969 955045 : break;
2970 592 : case X86_64_SSEHF_CLASS:
2971 592 : tmpmode = (mode == BFmode ? BFmode : HFmode);
2972 1184 : exp [nexps++]
2973 1184 : = gen_rtx_EXPR_LIST (VOIDmode,
2974 : gen_rtx_REG (tmpmode,
2975 592 : GET_SSE_REGNO (sse_regno)),
2976 592 : GEN_INT (i*8));
2977 592 : sse_regno++;
2978 592 : break;
2979 2969 : case X86_64_SSESF_CLASS:
2980 5938 : exp [nexps++]
2981 5938 : = gen_rtx_EXPR_LIST (VOIDmode,
2982 : gen_rtx_REG (SFmode,
2983 2969 : GET_SSE_REGNO (sse_regno)),
2984 2969 : GEN_INT (i*8));
2985 2969 : sse_regno++;
2986 2969 : break;
2987 478264 : case X86_64_SSEDF_CLASS:
2988 956528 : exp [nexps++]
2989 956528 : = gen_rtx_EXPR_LIST (VOIDmode,
2990 : gen_rtx_REG (DFmode,
2991 478264 : GET_SSE_REGNO (sse_regno)),
2992 478264 : GEN_INT (i*8));
2993 478264 : sse_regno++;
2994 478264 : break;
2995 248245 : case X86_64_SSE_CLASS:
2996 248245 : pos = i;
2997 248245 : switch (n)
2998 : {
2999 : case 1:
3000 : tmpmode = DImode;
3001 : break;
3002 9664 : case 2:
3003 9664 : if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
3004 : {
3005 0 : tmpmode = TImode;
3006 0 : i++;
3007 : }
3008 : else
3009 : tmpmode = DImode;
3010 : break;
3011 1689 : case 4:
3012 1689 : gcc_assert (i == 0
3013 : && regclass[1] == X86_64_SSEUP_CLASS
3014 : && regclass[2] == X86_64_SSEUP_CLASS
3015 : && regclass[3] == X86_64_SSEUP_CLASS);
3016 : tmpmode = OImode;
3017 : i += 3;
3018 : break;
3019 2136 : case 8:
3020 2136 : gcc_assert (i == 0
3021 : && regclass[1] == X86_64_SSEUP_CLASS
3022 : && regclass[2] == X86_64_SSEUP_CLASS
3023 : && regclass[3] == X86_64_SSEUP_CLASS
3024 : && regclass[4] == X86_64_SSEUP_CLASS
3025 : && regclass[5] == X86_64_SSEUP_CLASS
3026 : && regclass[6] == X86_64_SSEUP_CLASS
3027 : && regclass[7] == X86_64_SSEUP_CLASS);
3028 : tmpmode = XImode;
3029 : i += 7;
3030 : break;
3031 0 : default:
3032 0 : gcc_unreachable ();
3033 : }
3034 496490 : exp [nexps++]
3035 496490 : = gen_rtx_EXPR_LIST (VOIDmode,
3036 : gen_rtx_REG (tmpmode,
3037 248245 : GET_SSE_REGNO (sse_regno)),
3038 248245 : GEN_INT (pos*8));
3039 248245 : sse_regno++;
3040 248245 : break;
3041 0 : default:
3042 0 : gcc_unreachable ();
3043 : }
3044 : }
3045 :
3046 : /* Empty aligned struct, union or class. */
3047 965936 : if (nexps == 0)
3048 : return NULL;
3049 :
3050 965681 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3051 2650796 : for (i = 0; i < nexps; i++)
3052 1685115 : XVECEXP (ret, 0, i) = exp [i];
3053 : return ret;
3054 : }
3055 :
3056 : /* Update the data in CUM to advance over an argument of mode MODE
3057 : and data type TYPE. (TYPE is null for libcalls where that information
3058 : may not be available.)
3059 :
3060 : Return a number of integer regsiters advanced over. */
3061 :
3062 : static int
3063 2120329 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3064 : const_tree type, HOST_WIDE_INT bytes,
3065 : HOST_WIDE_INT words)
3066 : {
3067 2120329 : int res = 0;
3068 2120329 : bool error_p = false;
3069 :
3070 2120329 : if (TARGET_IAMCU)
3071 : {
3072 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3073 : bytes in registers. */
3074 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3075 0 : goto pass_in_reg;
3076 : return res;
3077 : }
3078 :
3079 2120329 : switch (mode)
3080 : {
3081 : default:
3082 : break;
3083 :
3084 93823 : case E_BLKmode:
3085 93823 : if (bytes < 0)
3086 : break;
3087 : /* FALLTHRU */
3088 :
3089 2083720 : case E_DImode:
3090 2083720 : case E_SImode:
3091 2083720 : case E_HImode:
3092 2083720 : case E_QImode:
3093 93823 : pass_in_reg:
3094 2083720 : cum->words += words;
3095 2083720 : cum->nregs -= words;
3096 2083720 : cum->regno += words;
3097 2083720 : if (cum->nregs >= 0)
3098 46777 : res = words;
3099 2083720 : if (cum->nregs <= 0)
3100 : {
3101 2050062 : cum->nregs = 0;
3102 2050062 : cfun->machine->arg_reg_available = false;
3103 2050062 : cum->regno = 0;
3104 : }
3105 : break;
3106 :
3107 0 : case E_OImode:
3108 : /* OImode shouldn't be used directly. */
3109 0 : gcc_unreachable ();
3110 :
3111 4703 : case E_DFmode:
3112 4703 : if (cum->float_in_sse == -1)
3113 0 : error_p = true;
3114 4703 : if (cum->float_in_sse < 2)
3115 : break;
3116 : /* FALLTHRU */
3117 1309 : case E_SFmode:
3118 1309 : if (cum->float_in_sse == -1)
3119 0 : error_p = true;
3120 1309 : if (cum->float_in_sse < 1)
3121 : break;
3122 : /* FALLTHRU */
3123 :
3124 52 : case E_V16HFmode:
3125 52 : case E_V16BFmode:
3126 52 : case E_V8SFmode:
3127 52 : case E_V8SImode:
3128 52 : case E_V64QImode:
3129 52 : case E_V32HImode:
3130 52 : case E_V16SImode:
3131 52 : case E_V8DImode:
3132 52 : case E_V32HFmode:
3133 52 : case E_V32BFmode:
3134 52 : case E_V16SFmode:
3135 52 : case E_V8DFmode:
3136 52 : case E_V32QImode:
3137 52 : case E_V16HImode:
3138 52 : case E_V4DFmode:
3139 52 : case E_V4DImode:
3140 52 : case E_TImode:
3141 52 : case E_V16QImode:
3142 52 : case E_V8HImode:
3143 52 : case E_V4SImode:
3144 52 : case E_V2DImode:
3145 52 : case E_V8HFmode:
3146 52 : case E_V8BFmode:
3147 52 : case E_V4SFmode:
3148 52 : case E_V2DFmode:
3149 52 : if (!type || !AGGREGATE_TYPE_P (type))
3150 : {
3151 52 : cum->sse_words += words;
3152 52 : cum->sse_nregs -= 1;
3153 52 : cum->sse_regno += 1;
3154 52 : if (cum->sse_nregs <= 0)
3155 : {
3156 4 : cum->sse_nregs = 0;
3157 4 : cum->sse_regno = 0;
3158 : }
3159 : }
3160 : break;
3161 :
3162 16 : case E_V8QImode:
3163 16 : case E_V4HImode:
3164 16 : case E_V4HFmode:
3165 16 : case E_V4BFmode:
3166 16 : case E_V2SImode:
3167 16 : case E_V2SFmode:
3168 16 : case E_V1TImode:
3169 16 : case E_V1DImode:
3170 16 : if (!type || !AGGREGATE_TYPE_P (type))
3171 : {
3172 16 : cum->mmx_words += words;
3173 16 : cum->mmx_nregs -= 1;
3174 16 : cum->mmx_regno += 1;
3175 16 : if (cum->mmx_nregs <= 0)
3176 : {
3177 0 : cum->mmx_nregs = 0;
3178 0 : cum->mmx_regno = 0;
3179 : }
3180 : }
3181 : break;
3182 : }
3183 2056126 : if (error_p)
3184 : {
3185 0 : cum->float_in_sse = 0;
3186 0 : error ("calling %qD with SSE calling convention without "
3187 : "SSE/SSE2 enabled", cum->decl);
3188 0 : sorry ("this is a GCC bug that can be worked around by adding "
3189 : "attribute used to function called");
3190 : }
3191 :
3192 : return res;
3193 : }
3194 :
3195 : static int
3196 18908315 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3197 : const_tree type, HOST_WIDE_INT words, bool named)
3198 : {
3199 18908315 : int int_nregs, sse_nregs;
3200 :
3201 : /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3202 18908315 : if (!named && (VALID_AVX512F_REG_MODE (mode)
3203 : || VALID_AVX256_REG_MODE (mode)))
3204 : return 0;
3205 :
3206 18907951 : if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3207 18907951 : && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3208 : {
3209 16638282 : cum->nregs -= int_nregs;
3210 16638282 : cum->sse_nregs -= sse_nregs;
3211 16638282 : cum->regno += int_nregs;
3212 16638282 : cum->sse_regno += sse_nregs;
3213 16638282 : return int_nregs;
3214 : }
3215 : else
3216 : {
3217 2269669 : int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3218 2269669 : cum->words = ROUND_UP (cum->words, align);
3219 2269669 : cum->words += words;
3220 2269669 : return 0;
3221 : }
3222 : }
3223 :
3224 : static int
3225 446989 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3226 : HOST_WIDE_INT words)
3227 : {
3228 : /* Otherwise, this should be passed indirect. */
3229 446989 : gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3230 :
3231 446989 : cum->words += words;
3232 446989 : if (cum->nregs > 0)
3233 : {
3234 289355 : cum->nregs -= 1;
3235 289355 : cum->regno += 1;
3236 289355 : return 1;
3237 : }
3238 : return 0;
3239 : }
3240 :
3241 : /* Update the data in CUM to advance over argument ARG. */
3242 :
3243 : static void
3244 21476000 : ix86_function_arg_advance (cumulative_args_t cum_v,
3245 : const function_arg_info &arg)
3246 : {
3247 21476000 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3248 21476000 : machine_mode mode = arg.mode;
3249 21476000 : HOST_WIDE_INT bytes, words;
3250 21476000 : int nregs;
3251 :
3252 : /* The argument of interrupt handler is a special case and is
3253 : handled in ix86_function_arg. */
3254 21476000 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3255 : return;
3256 :
3257 21475633 : bytes = arg.promoted_size_in_bytes ();
3258 21475633 : words = CEIL (bytes, UNITS_PER_WORD);
3259 :
3260 21475633 : if (arg.type)
3261 21159166 : mode = type_natural_mode (arg.type, NULL, false);
3262 :
3263 21475633 : if (TARGET_64BIT)
3264 : {
3265 19355304 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3266 :
3267 19355304 : if (call_abi == MS_ABI)
3268 446989 : nregs = function_arg_advance_ms_64 (cum, bytes, words);
3269 : else
3270 18908315 : nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3271 18908315 : arg.named);
3272 : }
3273 : else
3274 2120329 : nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3275 :
3276 21475633 : if (!nregs)
3277 : {
3278 : /* Track if there are outgoing arguments on stack. */
3279 5687481 : if (cum->caller)
3280 2710913 : cfun->machine->outgoing_args_on_stack = true;
3281 : }
3282 : }
3283 :
3284 : /* Define where to put the arguments to a function.
3285 : Value is zero to push the argument on the stack,
3286 : or a hard register in which to store the argument.
3287 :
3288 : MODE is the argument's machine mode.
3289 : TYPE is the data type of the argument (as a tree).
3290 : This is null for libcalls where that information may
3291 : not be available.
3292 : CUM is a variable of type CUMULATIVE_ARGS which gives info about
3293 : the preceding args and about the function being called.
3294 : NAMED is nonzero if this argument is a named parameter
3295 : (otherwise it is an extra parameter matching an ellipsis). */
3296 :
3297 : static rtx
3298 2549165 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3299 : machine_mode orig_mode, const_tree type,
3300 : HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3301 : {
3302 2549165 : bool error_p = false;
3303 :
3304 : /* Avoid the AL settings for the Unix64 ABI. */
3305 2549165 : if (mode == VOIDmode)
3306 739979 : return constm1_rtx;
3307 :
3308 1809186 : if (TARGET_IAMCU)
3309 : {
3310 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3311 : bytes in registers. */
3312 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3313 0 : goto pass_in_reg;
3314 : return NULL_RTX;
3315 : }
3316 :
3317 1809186 : switch (mode)
3318 : {
3319 : default:
3320 : break;
3321 :
3322 77846 : case E_BLKmode:
3323 77846 : if (bytes < 0)
3324 : break;
3325 : /* FALLTHRU */
3326 1775830 : case E_DImode:
3327 1775830 : case E_SImode:
3328 1775830 : case E_HImode:
3329 1775830 : case E_QImode:
3330 77846 : pass_in_reg:
3331 1775830 : if (words <= cum->nregs)
3332 : {
3333 44945 : int regno = cum->regno;
3334 :
3335 : /* Fastcall allocates the first two DWORD (SImode) or
3336 : smaller arguments to ECX and EDX if it isn't an
3337 : aggregate type . */
3338 44945 : if (cum->fastcall)
3339 : {
3340 6 : if (mode == BLKmode
3341 6 : || mode == DImode
3342 6 : || (type && AGGREGATE_TYPE_P (type)))
3343 : break;
3344 :
3345 : /* ECX not EAX is the first allocated register. */
3346 6 : if (regno == AX_REG)
3347 44945 : regno = CX_REG;
3348 : }
3349 44945 : return gen_rtx_REG (mode, regno);
3350 : }
3351 : break;
3352 :
3353 3322 : case E_DFmode:
3354 3322 : if (cum->float_in_sse == -1)
3355 0 : error_p = true;
3356 3322 : if (cum->float_in_sse < 2)
3357 : break;
3358 : /* FALLTHRU */
3359 918 : case E_SFmode:
3360 918 : if (cum->float_in_sse == -1)
3361 0 : error_p = true;
3362 918 : if (cum->float_in_sse < 1)
3363 : break;
3364 : /* FALLTHRU */
3365 12 : case E_TImode:
3366 : /* In 32bit, we pass TImode in xmm registers. */
3367 12 : case E_V16QImode:
3368 12 : case E_V8HImode:
3369 12 : case E_V4SImode:
3370 12 : case E_V2DImode:
3371 12 : case E_V8HFmode:
3372 12 : case E_V8BFmode:
3373 12 : case E_V4SFmode:
3374 12 : case E_V2DFmode:
3375 12 : if (!type || !AGGREGATE_TYPE_P (type))
3376 : {
3377 12 : if (cum->sse_nregs)
3378 12 : return gen_reg_or_parallel (mode, orig_mode,
3379 12 : cum->sse_regno + FIRST_SSE_REG);
3380 : }
3381 : break;
3382 :
3383 0 : case E_OImode:
3384 0 : case E_XImode:
3385 : /* OImode and XImode shouldn't be used directly. */
3386 0 : gcc_unreachable ();
3387 :
3388 9 : case E_V64QImode:
3389 9 : case E_V32HImode:
3390 9 : case E_V16SImode:
3391 9 : case E_V8DImode:
3392 9 : case E_V32HFmode:
3393 9 : case E_V32BFmode:
3394 9 : case E_V16SFmode:
3395 9 : case E_V8DFmode:
3396 9 : case E_V16HFmode:
3397 9 : case E_V16BFmode:
3398 9 : case E_V8SFmode:
3399 9 : case E_V8SImode:
3400 9 : case E_V32QImode:
3401 9 : case E_V16HImode:
3402 9 : case E_V4DFmode:
3403 9 : case E_V4DImode:
3404 9 : if (!type || !AGGREGATE_TYPE_P (type))
3405 : {
3406 9 : if (cum->sse_nregs)
3407 9 : return gen_reg_or_parallel (mode, orig_mode,
3408 9 : cum->sse_regno + FIRST_SSE_REG);
3409 : }
3410 : break;
3411 :
3412 8 : case E_V8QImode:
3413 8 : case E_V4HImode:
3414 8 : case E_V4HFmode:
3415 8 : case E_V4BFmode:
3416 8 : case E_V2SImode:
3417 8 : case E_V2SFmode:
3418 8 : case E_V1TImode:
3419 8 : case E_V1DImode:
3420 8 : if (!type || !AGGREGATE_TYPE_P (type))
3421 : {
3422 8 : if (cum->mmx_nregs)
3423 8 : return gen_reg_or_parallel (mode, orig_mode,
3424 8 : cum->mmx_regno + FIRST_MMX_REG);
3425 : }
3426 : break;
3427 : }
3428 4240 : if (error_p)
3429 : {
3430 0 : cum->float_in_sse = 0;
3431 0 : error ("calling %qD with SSE calling convention without "
3432 : "SSE/SSE2 enabled", cum->decl);
3433 0 : sorry ("this is a GCC bug that can be worked around by adding "
3434 : "attribute used to function called");
3435 : }
3436 :
3437 : return NULL_RTX;
3438 : }
3439 :
3440 : static rtx
3441 18603024 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3442 : machine_mode orig_mode, const_tree type, bool named)
3443 : {
3444 : /* Handle a hidden AL argument containing number of registers
3445 : for varargs x86-64 functions. */
3446 18603024 : if (mode == VOIDmode)
3447 5158749 : return GEN_INT (cum->maybe_vaarg
3448 : ? (cum->sse_nregs < 0
3449 : ? X86_64_SSE_REGPARM_MAX
3450 : : cum->sse_regno)
3451 : : -1);
3452 :
3453 13444275 : switch (mode)
3454 : {
3455 : default:
3456 : break;
3457 :
3458 90108 : case E_V16HFmode:
3459 90108 : case E_V16BFmode:
3460 90108 : case E_V8SFmode:
3461 90108 : case E_V8SImode:
3462 90108 : case E_V32QImode:
3463 90108 : case E_V16HImode:
3464 90108 : case E_V4DFmode:
3465 90108 : case E_V4DImode:
3466 90108 : case E_V32HFmode:
3467 90108 : case E_V32BFmode:
3468 90108 : case E_V16SFmode:
3469 90108 : case E_V16SImode:
3470 90108 : case E_V64QImode:
3471 90108 : case E_V32HImode:
3472 90108 : case E_V8DFmode:
3473 90108 : case E_V8DImode:
3474 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3475 90108 : if (!named)
3476 : return NULL;
3477 : break;
3478 : }
3479 :
3480 13443911 : const int *parm_regs;
3481 13443911 : if (cum->preserve_none_abi)
3482 : parm_regs = x86_64_preserve_none_int_parameter_registers;
3483 : else
3484 13443796 : parm_regs = x86_64_int_parameter_registers;
3485 :
3486 13443911 : return construct_container (mode, orig_mode, type, 0, cum->nregs,
3487 13443911 : cum->sse_nregs,
3488 13443911 : &parm_regs[cum->regno],
3489 13443911 : cum->sse_regno);
3490 : }
3491 :
3492 : static rtx
3493 296338 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3494 : machine_mode orig_mode, bool named, const_tree type,
3495 : HOST_WIDE_INT bytes)
3496 : {
3497 296338 : unsigned int regno;
3498 :
3499 : /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3500 : We use value of -2 to specify that current function call is MSABI. */
3501 296338 : if (mode == VOIDmode)
3502 36293 : return GEN_INT (-2);
3503 :
3504 : /* If we've run out of registers, it goes on the stack. */
3505 260045 : if (cum->nregs == 0)
3506 : return NULL_RTX;
3507 :
3508 176290 : regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3509 :
3510 : /* Only floating point modes less than 64 bits are passed in anything but
3511 : integer regs. Larger floating point types are excluded as the Windows
3512 : ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
3513 176290 : if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
3514 : {
3515 38254 : if (named)
3516 : {
3517 38254 : if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3518 37260 : regno = cum->regno + FIRST_SSE_REG;
3519 : }
3520 : else
3521 : {
3522 0 : rtx t1, t2;
3523 :
3524 : /* Unnamed floating parameters are passed in both the
3525 : SSE and integer registers. */
3526 0 : t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3527 0 : t2 = gen_rtx_REG (mode, regno);
3528 0 : t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3529 0 : t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3530 0 : return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3531 : }
3532 : }
3533 : /* Handle aggregated types passed in register. */
3534 176290 : if (orig_mode == BLKmode)
3535 : {
3536 0 : if (bytes > 0 && bytes <= 8)
3537 0 : mode = (bytes > 4 ? DImode : SImode);
3538 0 : if (mode == BLKmode)
3539 0 : mode = DImode;
3540 : }
3541 :
3542 176290 : return gen_reg_or_parallel (mode, orig_mode, regno);
3543 : }
3544 :
3545 : /* Return where to put the arguments to a function.
3546 : Return zero to push the argument on the stack, or a hard register in which to store the argument.
3547 :
3548 : ARG describes the argument while CUM gives information about the
3549 : preceding args and about the function being called. */
3550 :
3551 : static rtx
3552 21448714 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3553 : {
3554 21448714 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3555 21448714 : machine_mode mode = arg.mode;
3556 21448714 : HOST_WIDE_INT bytes, words;
3557 21448714 : rtx reg;
3558 :
3559 21448714 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3560 : {
3561 187 : gcc_assert (arg.type != NULL_TREE);
3562 187 : if (POINTER_TYPE_P (arg.type))
3563 : {
3564 : /* This is the pointer argument. */
3565 122 : gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3566 : /* It is at -WORD(AP) in the current frame in interrupt and
3567 : exception handlers. */
3568 122 : reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3569 : }
3570 : else
3571 : {
3572 65 : gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3573 : && TREE_CODE (arg.type) == INTEGER_TYPE
3574 : && TYPE_MODE (arg.type) == word_mode);
3575 : /* The error code is the word-mode integer argument at
3576 : -2 * WORD(AP) in the current frame of the exception
3577 : handler. */
3578 65 : reg = gen_rtx_MEM (word_mode,
3579 65 : plus_constant (Pmode,
3580 : arg_pointer_rtx,
3581 65 : -2 * UNITS_PER_WORD));
3582 : }
3583 187 : return reg;
3584 : }
3585 :
3586 21448527 : bytes = arg.promoted_size_in_bytes ();
3587 21448527 : words = CEIL (bytes, UNITS_PER_WORD);
3588 :
3589 : /* To simplify the code below, represent vector types with a vector mode
3590 : even if MMX/SSE are not active. */
3591 21448527 : if (arg.type && VECTOR_TYPE_P (arg.type))
3592 170093 : mode = type_natural_mode (arg.type, cum, false);
3593 :
3594 21448527 : if (TARGET_64BIT)
3595 : {
3596 18899362 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3597 :
3598 18899362 : if (call_abi == MS_ABI)
3599 296338 : reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3600 296338 : arg.type, bytes);
3601 : else
3602 18603024 : reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3603 : }
3604 : else
3605 2549165 : reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3606 :
3607 : /* Track if there are outgoing arguments on stack. */
3608 21448527 : if (reg == NULL_RTX && cum->caller)
3609 2191999 : cfun->machine->outgoing_args_on_stack = true;
3610 :
3611 : return reg;
3612 : }
3613 :
3614 : /* A C expression that indicates when an argument must be passed by
3615 : reference. If nonzero for an argument, a copy of that argument is
3616 : made in memory and a pointer to the argument is passed instead of
3617 : the argument itself. The pointer is passed in whatever way is
3618 : appropriate for passing a pointer to that type. */
3619 :
3620 : static bool
3621 21393490 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3622 : {
3623 21393490 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3624 :
3625 21393490 : if (TARGET_64BIT)
3626 : {
3627 19283415 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3628 :
3629 : /* See Windows x64 Software Convention. */
3630 19283415 : if (call_abi == MS_ABI)
3631 : {
3632 441390 : HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3633 :
3634 441390 : if (tree type = arg.type)
3635 : {
3636 : /* Arrays are passed by reference. */
3637 441390 : if (TREE_CODE (type) == ARRAY_TYPE)
3638 : return true;
3639 :
3640 441390 : if (RECORD_OR_UNION_TYPE_P (type))
3641 : {
3642 : /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3643 : are passed by reference. */
3644 15022 : msize = int_size_in_bytes (type);
3645 : }
3646 : }
3647 :
3648 : /* __m128 is passed by reference. */
3649 872851 : return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3650 : }
3651 18842025 : else if (arg.type && int_size_in_bytes (arg.type) == -1)
3652 : return true;
3653 : }
3654 :
3655 : return false;
3656 : }
3657 :
3658 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3659 : passing ABI. XXX: This function is obsolete and is only used for
3660 : checking psABI compatibility with previous versions of GCC. */
3661 :
3662 : static bool
3663 1969480 : ix86_compat_aligned_value_p (const_tree type)
3664 : {
3665 1969480 : machine_mode mode = TYPE_MODE (type);
3666 1969480 : if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3667 1969438 : || mode == TDmode
3668 1969438 : || mode == TFmode
3669 : || mode == TCmode)
3670 1969692 : && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3671 : return true;
3672 1969268 : if (TYPE_ALIGN (type) < 128)
3673 : return false;
3674 :
3675 0 : if (AGGREGATE_TYPE_P (type))
3676 : {
3677 : /* Walk the aggregates recursively. */
3678 0 : switch (TREE_CODE (type))
3679 : {
3680 0 : case RECORD_TYPE:
3681 0 : case UNION_TYPE:
3682 0 : case QUAL_UNION_TYPE:
3683 0 : {
3684 0 : tree field;
3685 :
3686 : /* Walk all the structure fields. */
3687 0 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3688 : {
3689 0 : if (TREE_CODE (field) == FIELD_DECL
3690 0 : && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3691 : return true;
3692 : }
3693 : break;
3694 : }
3695 :
3696 0 : case ARRAY_TYPE:
3697 : /* Just for use if some languages passes arrays by value. */
3698 0 : if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3699 : return true;
3700 : break;
3701 :
3702 : default:
3703 : gcc_unreachable ();
3704 : }
3705 : }
3706 : return false;
3707 : }
3708 :
3709 : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3710 : XXX: This function is obsolete and is only used for checking psABI
3711 : compatibility with previous versions of GCC. */
3712 :
3713 : static unsigned int
3714 5529135 : ix86_compat_function_arg_boundary (machine_mode mode,
3715 : const_tree type, unsigned int align)
3716 : {
3717 : /* In 32bit, only _Decimal128 and __float128 are aligned to their
3718 : natural boundaries. */
3719 5529135 : if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3720 : {
3721 : /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3722 : make an exception for SSE modes since these require 128bit
3723 : alignment.
3724 :
3725 : The handling here differs from field_alignment. ICC aligns MMX
3726 : arguments to 4 byte boundaries, while structure fields are aligned
3727 : to 8 byte boundaries. */
3728 1981390 : if (!type)
3729 : {
3730 11910 : if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3731 1981178 : align = PARM_BOUNDARY;
3732 : }
3733 : else
3734 : {
3735 1969480 : if (!ix86_compat_aligned_value_p (type))
3736 1981178 : align = PARM_BOUNDARY;
3737 : }
3738 : }
3739 10658839 : if (align > BIGGEST_ALIGNMENT)
3740 86 : align = BIGGEST_ALIGNMENT;
3741 5529135 : return align;
3742 : }
3743 :
3744 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3745 : passing ABI. */
3746 :
3747 : static bool
3748 1972122 : ix86_contains_aligned_value_p (const_tree type)
3749 : {
3750 1972122 : machine_mode mode = TYPE_MODE (type);
3751 :
3752 1972122 : if (mode == XFmode || mode == XCmode)
3753 : return false;
3754 :
3755 1970020 : if (TYPE_ALIGN (type) < 128)
3756 : return false;
3757 :
3758 2854 : if (AGGREGATE_TYPE_P (type))
3759 : {
3760 : /* Walk the aggregates recursively. */
3761 0 : switch (TREE_CODE (type))
3762 : {
3763 0 : case RECORD_TYPE:
3764 0 : case UNION_TYPE:
3765 0 : case QUAL_UNION_TYPE:
3766 0 : {
3767 0 : tree field;
3768 :
3769 : /* Walk all the structure fields. */
3770 0 : for (field = TYPE_FIELDS (type);
3771 0 : field;
3772 0 : field = DECL_CHAIN (field))
3773 : {
3774 0 : if (TREE_CODE (field) == FIELD_DECL
3775 0 : && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3776 : return true;
3777 : }
3778 : break;
3779 : }
3780 :
3781 0 : case ARRAY_TYPE:
3782 : /* Just for use if some languages passes arrays by value. */
3783 0 : if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3784 : return true;
3785 : break;
3786 :
3787 : default:
3788 : gcc_unreachable ();
3789 : }
3790 : }
3791 : else
3792 2854 : return TYPE_ALIGN (type) >= 128;
3793 :
3794 : return false;
3795 : }
3796 :
3797 : /* Gives the alignment boundary, in bits, of an argument with the
3798 : specified mode and type. */
3799 :
3800 : static unsigned int
3801 10911527 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
3802 : {
3803 10911527 : unsigned int align;
3804 10911527 : if (type)
3805 : {
3806 : /* Since the main variant type is used for call, we convert it to
3807 : the main variant type. */
3808 10871856 : type = TYPE_MAIN_VARIANT (type);
3809 10871856 : align = TYPE_ALIGN (type);
3810 10871856 : if (TYPE_EMPTY_P (type))
3811 24413 : return PARM_BOUNDARY;
3812 : }
3813 : else
3814 39671 : align = GET_MODE_ALIGNMENT (mode);
3815 12906235 : if (align < PARM_BOUNDARY)
3816 4101396 : align = PARM_BOUNDARY;
3817 : else
3818 : {
3819 6785718 : static bool warned;
3820 6785718 : unsigned int saved_align = align;
3821 :
3822 6785718 : if (!TARGET_64BIT)
3823 : {
3824 : /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3825 2007860 : if (!type)
3826 : {
3827 35738 : if (mode == XFmode || mode == XCmode)
3828 : align = PARM_BOUNDARY;
3829 : }
3830 1972122 : else if (!ix86_contains_aligned_value_p (type))
3831 : align = PARM_BOUNDARY;
3832 :
3833 38592 : if (align < 128)
3834 1981178 : align = PARM_BOUNDARY;
3835 : }
3836 :
3837 6785718 : if (warn_psabi
3838 5531801 : && !warned
3839 12314853 : && align != ix86_compat_function_arg_boundary (mode, type,
3840 : saved_align))
3841 : {
3842 86 : warned = true;
3843 86 : inform (input_location,
3844 : "the ABI for passing parameters with %d-byte"
3845 : " alignment has changed in GCC 4.6",
3846 : align / BITS_PER_UNIT);
3847 : }
3848 : }
3849 :
3850 : return align;
3851 : }
3852 :
3853 : /* Return true if N is a possible register number of function value. */
3854 :
3855 : static bool
3856 4658569 : ix86_function_value_regno_p (const unsigned int regno)
3857 : {
3858 4658569 : switch (regno)
3859 : {
3860 : case AX_REG:
3861 : return true;
3862 105987 : case DX_REG:
3863 105987 : return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3864 99341 : case DI_REG:
3865 99341 : case SI_REG:
3866 99341 : return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3867 :
3868 : /* Complex values are returned in %st(0)/%st(1) pair. */
3869 24145 : case ST0_REG:
3870 24145 : case ST1_REG:
3871 : /* TODO: The function should depend on current function ABI but
3872 : builtins.cc would need updating then. Therefore we use the
3873 : default ABI. */
3874 24145 : if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3875 : return false;
3876 24145 : return TARGET_FLOAT_RETURNS_IN_80387;
3877 :
3878 : /* Complex values are returned in %xmm0/%xmm1 pair. */
3879 1290026 : case XMM0_REG:
3880 1290026 : case XMM1_REG:
3881 1290026 : return TARGET_SSE;
3882 :
3883 9464 : case MM0_REG:
3884 9464 : if (TARGET_MACHO || TARGET_64BIT)
3885 : return false;
3886 2472 : return TARGET_MMX;
3887 : }
3888 :
3889 : return false;
3890 : }
3891 :
3892 : /* Check whether the register REGNO should be zeroed on X86.
3893 : When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3894 : together, no need to zero it again.
3895 : When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3896 :
3897 : static bool
3898 1296 : zero_call_used_regno_p (const unsigned int regno,
3899 : bool all_sse_zeroed,
3900 : bool need_zero_mmx)
3901 : {
3902 763 : return GENERAL_REGNO_P (regno)
3903 763 : || (!all_sse_zeroed && SSE_REGNO_P (regno))
3904 383 : || MASK_REGNO_P (regno)
3905 1671 : || (need_zero_mmx && MMX_REGNO_P (regno));
3906 : }
3907 :
3908 : /* Return the machine_mode that is used to zero register REGNO. */
3909 :
3910 : static machine_mode
3911 921 : zero_call_used_regno_mode (const unsigned int regno)
3912 : {
3913 : /* NB: We only need to zero the lower 32 bits for integer registers
3914 : and the lower 128 bits for vector registers since destination are
3915 : zero-extended to the full register width. */
3916 921 : if (GENERAL_REGNO_P (regno))
3917 : return SImode;
3918 : else if (SSE_REGNO_P (regno))
3919 380 : return V4SFmode;
3920 : else if (MASK_REGNO_P (regno))
3921 : return HImode;
3922 : else if (MMX_REGNO_P (regno))
3923 0 : return V2SImode;
3924 : else
3925 0 : gcc_unreachable ();
3926 : }
3927 :
3928 : /* Generate a rtx to zero all vector registers together if possible,
3929 : otherwise, return NULL. */
3930 :
3931 : static rtx
3932 130 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3933 : {
3934 130 : if (!TARGET_AVX)
3935 : return NULL;
3936 :
3937 279 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3938 276 : if ((LEGACY_SSE_REGNO_P (regno)
3939 252 : || (TARGET_64BIT
3940 252 : && (REX_SSE_REGNO_P (regno)
3941 228 : || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3942 316 : && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3943 : return NULL;
3944 :
3945 3 : return gen_avx_vzeroall ();
3946 : }
3947 :
3948 : /* Generate insns to zero all st registers together.
3949 : Return true when zeroing instructions are generated.
3950 : Assume the number of st registers that are zeroed is num_of_st,
3951 : we will emit the following sequence to zero them together:
3952 : fldz; \
3953 : fldz; \
3954 : ...
3955 : fldz; \
3956 : fstp %%st(0); \
3957 : fstp %%st(0); \
3958 : ...
3959 : fstp %%st(0);
3960 : i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3961 : mark stack slots empty.
3962 :
3963 : How to compute the num_of_st:
3964 : There is no direct mapping from stack registers to hard register
3965 : numbers. If one stack register needs to be cleared, we don't know
3966 : where in the stack the value remains. So, if any stack register
3967 : needs to be cleared, the whole stack should be cleared. However,
3968 : x87 stack registers that hold the return value should be excluded.
3969 : x87 returns in the top (two for complex values) register, so
3970 : num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3971 : return the value of num_of_st. */
3972 :
3973 :
3974 : static int
3975 130 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3976 : {
3977 :
3978 : /* If the FPU is disabled, no need to zero all st registers. */
3979 130 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3980 : return 0;
3981 :
3982 10320 : unsigned int num_of_st = 0;
3983 10320 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3984 10211 : if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3985 10211 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3986 : {
3987 : num_of_st++;
3988 : break;
3989 : }
3990 :
3991 129 : if (num_of_st == 0)
3992 : return 0;
3993 :
3994 20 : bool return_with_x87 = false;
3995 40 : return_with_x87 = (crtl->return_rtx
3996 20 : && (STACK_REG_P (crtl->return_rtx)));
3997 :
3998 20 : bool complex_return = false;
3999 40 : complex_return = (crtl->return_rtx
4000 20 : && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
4001 :
4002 20 : if (return_with_x87)
4003 2 : if (complex_return)
4004 : num_of_st = 6;
4005 : else
4006 1 : num_of_st = 7;
4007 : else
4008 : num_of_st = 8;
4009 :
4010 20 : rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
4011 177 : for (unsigned int i = 0; i < num_of_st; i++)
4012 157 : emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
4013 :
4014 177 : for (unsigned int i = 0; i < num_of_st; i++)
4015 : {
4016 157 : rtx insn;
4017 157 : insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
4018 157 : add_reg_note (insn, REG_DEAD, st_reg);
4019 : }
4020 20 : return num_of_st;
4021 : }
4022 :
4023 :
4024 : /* When the routine exit in MMX mode, if any ST register needs
4025 : to be zeroed, we should clear all MMX registers except the
4026 : RET_MMX_REGNO that holds the return value. */
4027 : static bool
4028 0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
4029 : unsigned int ret_mmx_regno)
4030 : {
4031 0 : bool need_zero_all_mm = false;
4032 0 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4033 0 : if (STACK_REGNO_P (regno)
4034 0 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4035 : {
4036 : need_zero_all_mm = true;
4037 : break;
4038 : }
4039 :
4040 0 : if (!need_zero_all_mm)
4041 : return false;
4042 :
4043 : machine_mode mode = V2SImode;
4044 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4045 0 : if (regno != ret_mmx_regno)
4046 : {
4047 0 : rtx reg = gen_rtx_REG (mode, regno);
4048 0 : emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
4049 : }
4050 : return true;
4051 : }
4052 :
4053 : /* TARGET_ZERO_CALL_USED_REGS. */
4054 : /* Generate a sequence of instructions that zero registers specified by
4055 : NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
4056 : zeroed. */
4057 : static HARD_REG_SET
4058 130 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
4059 : {
4060 130 : HARD_REG_SET zeroed_hardregs;
4061 130 : bool all_sse_zeroed = false;
4062 130 : int all_st_zeroed_num = 0;
4063 130 : bool all_mm_zeroed = false;
4064 :
4065 130 : CLEAR_HARD_REG_SET (zeroed_hardregs);
4066 :
4067 : /* first, let's see whether we can zero all vector registers together. */
4068 130 : rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4069 130 : if (zero_all_vec_insn)
4070 : {
4071 3 : emit_insn (zero_all_vec_insn);
4072 3 : all_sse_zeroed = true;
4073 : }
4074 :
4075 : /* mm/st registers are shared registers set, we should follow the following
4076 : rules to clear them:
4077 : MMX exit mode x87 exit mode
4078 : -------------|----------------------|---------------
4079 : uses x87 reg | clear all MMX | clear all x87
4080 : uses MMX reg | clear individual MMX | clear all x87
4081 : x87 + MMX | clear all MMX | clear all x87
4082 :
4083 : first, we should decide which mode (MMX mode or x87 mode) the function
4084 : exit with. */
4085 :
4086 130 : bool exit_with_mmx_mode = (crtl->return_rtx
4087 130 : && (MMX_REG_P (crtl->return_rtx)));
4088 :
4089 130 : if (!exit_with_mmx_mode)
4090 : /* x87 exit mode, we should zero all st registers together. */
4091 : {
4092 130 : all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4093 :
4094 130 : if (all_st_zeroed_num > 0)
4095 180 : for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4096 : /* x87 stack registers that hold the return value should be excluded.
4097 : x87 returns in the top (two for complex values) register. */
4098 160 : if (all_st_zeroed_num == 8
4099 160 : || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4100 : || (all_st_zeroed_num == 6
4101 7 : && (regno == (REGNO (crtl->return_rtx) + 1)))))
4102 157 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4103 : }
4104 : else
4105 : /* MMX exit mode, check whether we can zero all mm registers. */
4106 : {
4107 0 : unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4108 0 : all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4109 : exit_mmx_regno);
4110 0 : if (all_mm_zeroed)
4111 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4112 0 : if (regno != exit_mmx_regno)
4113 0 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4114 : }
4115 :
4116 : /* Now, generate instructions to zero all the other registers. */
4117 :
4118 12090 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4119 : {
4120 11960 : if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4121 10664 : continue;
4122 1671 : if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4123 1296 : exit_with_mmx_mode && !all_mm_zeroed))
4124 375 : continue;
4125 :
4126 921 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4127 :
4128 921 : machine_mode mode = zero_call_used_regno_mode (regno);
4129 :
4130 921 : rtx reg = gen_rtx_REG (mode, regno);
4131 921 : rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4132 :
4133 921 : switch (mode)
4134 : {
4135 533 : case E_SImode:
4136 533 : if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4137 : {
4138 533 : rtx clob = gen_rtx_CLOBBER (VOIDmode,
4139 : gen_rtx_REG (CCmode,
4140 : FLAGS_REG));
4141 533 : tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4142 : tmp,
4143 : clob));
4144 : }
4145 : /* FALLTHRU. */
4146 :
4147 921 : case E_V4SFmode:
4148 921 : case E_HImode:
4149 921 : case E_V2SImode:
4150 921 : emit_insn (tmp);
4151 921 : break;
4152 :
4153 0 : default:
4154 0 : gcc_unreachable ();
4155 : }
4156 : }
4157 130 : return zeroed_hardregs;
4158 : }
4159 :
4160 : /* Define how to find the value returned by a function.
4161 : VALTYPE is the data type of the value (as a tree).
4162 : If the precise function being called is known, FUNC is its FUNCTION_DECL;
4163 : otherwise, FUNC is 0. */
4164 :
4165 : static rtx
4166 3908412 : function_value_32 (machine_mode orig_mode, machine_mode mode,
4167 : const_tree fntype, const_tree fn)
4168 : {
4169 3908412 : unsigned int regno;
4170 :
4171 : /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4172 : we normally prevent this case when mmx is not available. However
4173 : some ABIs may require the result to be returned like DImode. */
4174 4176497 : if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4175 : regno = FIRST_MMX_REG;
4176 :
4177 : /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4178 : we prevent this case when sse is not available. However some ABIs
4179 : may require the result to be returned like integer TImode. */
4180 3899136 : else if (mode == TImode
4181 4157945 : || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4182 : regno = FIRST_SSE_REG;
4183 :
4184 : /* 32-byte vector modes in %ymm0. */
4185 3940047 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4186 : regno = FIRST_SSE_REG;
4187 :
4188 : /* 64-byte vector modes in %zmm0. */
4189 3795849 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4190 : regno = FIRST_SSE_REG;
4191 :
4192 : /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4193 3640327 : else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4194 : regno = FIRST_FLOAT_REG;
4195 : else
4196 : /* Most things go in %eax. */
4197 3577328 : regno = AX_REG;
4198 :
4199 : /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4200 3908412 : if (mode == HFmode || mode == BFmode)
4201 : {
4202 1555 : if (!TARGET_SSE2)
4203 : {
4204 0 : error ("SSE register return with SSE2 disabled");
4205 0 : regno = AX_REG;
4206 : }
4207 : else
4208 : regno = FIRST_SSE_REG;
4209 : }
4210 :
4211 3908412 : if (mode == HCmode)
4212 : {
4213 80 : if (!TARGET_SSE2)
4214 0 : error ("SSE register return with SSE2 disabled");
4215 :
4216 80 : rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4217 160 : XVECEXP (ret, 0, 0)
4218 160 : = gen_rtx_EXPR_LIST (VOIDmode,
4219 : gen_rtx_REG (SImode,
4220 80 : TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4221 : GEN_INT (0));
4222 80 : return ret;
4223 : }
4224 :
4225 : /* Override FP return register with %xmm0 for local functions when
4226 : SSE math is enabled or for functions with sseregparm attribute. */
4227 3908332 : if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4228 : {
4229 49354 : int sse_level = ix86_function_sseregparm (fntype, fn, false);
4230 49354 : if (sse_level == -1)
4231 : {
4232 0 : error ("calling %qD with SSE calling convention without "
4233 : "SSE/SSE2 enabled", fn);
4234 0 : sorry ("this is a GCC bug that can be worked around by adding "
4235 : "attribute used to function called");
4236 : }
4237 49354 : else if ((sse_level >= 1 && mode == SFmode)
4238 49354 : || (sse_level == 2 && mode == DFmode))
4239 : regno = FIRST_SSE_REG;
4240 : }
4241 :
4242 : /* OImode shouldn't be used directly. */
4243 3908332 : gcc_assert (mode != OImode);
4244 :
4245 3908332 : return gen_rtx_REG (orig_mode, regno);
4246 : }
4247 :
4248 : static rtx
4249 102732627 : function_value_64 (machine_mode orig_mode, machine_mode mode,
4250 : const_tree valtype)
4251 : {
4252 102732627 : rtx ret;
4253 :
4254 : /* Handle libcalls, which don't provide a type node. */
4255 102732627 : if (valtype == NULL)
4256 : {
4257 102602 : unsigned int regno;
4258 :
4259 102602 : switch (mode)
4260 : {
4261 : case E_BFmode:
4262 : case E_HFmode:
4263 : case E_HCmode:
4264 : case E_SFmode:
4265 : case E_SCmode:
4266 : case E_DFmode:
4267 : case E_DCmode:
4268 : case E_TFmode:
4269 : case E_SDmode:
4270 : case E_DDmode:
4271 : case E_TDmode:
4272 : regno = FIRST_SSE_REG;
4273 : break;
4274 1037 : case E_XFmode:
4275 1037 : case E_XCmode:
4276 1037 : regno = FIRST_FLOAT_REG;
4277 1037 : break;
4278 : case E_TCmode:
4279 : return NULL;
4280 56645 : default:
4281 56645 : regno = AX_REG;
4282 : }
4283 :
4284 102602 : return gen_rtx_REG (mode, regno);
4285 : }
4286 102630025 : else if (POINTER_TYPE_P (valtype))
4287 : {
4288 : /* Pointers are always returned in word_mode. */
4289 18540567 : mode = word_mode;
4290 : }
4291 :
4292 102630025 : ret = construct_container (mode, orig_mode, valtype, 1,
4293 : X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4294 : x86_64_int_return_registers, 0);
4295 :
4296 : /* For zero sized structures, construct_container returns NULL, but we
4297 : need to keep rest of compiler happy by returning meaningful value. */
4298 102630025 : if (!ret)
4299 205640 : ret = gen_rtx_REG (orig_mode, AX_REG);
4300 :
4301 : return ret;
4302 : }
4303 :
4304 : static rtx
4305 0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4306 : const_tree fntype, const_tree fn, const_tree valtype)
4307 : {
4308 0 : unsigned int regno;
4309 :
4310 : /* Floating point return values in %st(0)
4311 : (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4312 0 : if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4313 0 : && (GET_MODE_SIZE (mode) > 8
4314 0 : || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4315 : {
4316 0 : regno = FIRST_FLOAT_REG;
4317 0 : return gen_rtx_REG (orig_mode, regno);
4318 : }
4319 : else
4320 0 : return function_value_32(orig_mode, mode, fntype,fn);
4321 : }
4322 :
4323 : static rtx
4324 767095 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4325 : const_tree valtype)
4326 : {
4327 767095 : unsigned int regno = AX_REG;
4328 :
4329 767095 : if (TARGET_SSE)
4330 : {
4331 1532736 : switch (GET_MODE_SIZE (mode))
4332 : {
4333 14003 : case 16:
4334 14003 : if (valtype != NULL_TREE
4335 14003 : && !VECTOR_INTEGER_TYPE_P (valtype)
4336 7146 : && !INTEGRAL_TYPE_P (valtype)
4337 21149 : && !VECTOR_FLOAT_TYPE_P (valtype))
4338 : break;
4339 14003 : if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4340 : && !COMPLEX_MODE_P (mode))
4341 197656 : regno = FIRST_SSE_REG;
4342 : break;
4343 741063 : case 8:
4344 741063 : case 4:
4345 741063 : case 2:
4346 741063 : if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4347 : break;
4348 723842 : if (mode == HFmode || mode == SFmode || mode == DFmode)
4349 197656 : regno = FIRST_SSE_REG;
4350 : break;
4351 : default:
4352 : break;
4353 : }
4354 : }
4355 767095 : return gen_rtx_REG (orig_mode, regno);
4356 : }
4357 :
4358 : static rtx
4359 107408134 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4360 : machine_mode orig_mode, machine_mode mode)
4361 : {
4362 107408134 : const_tree fn, fntype;
4363 :
4364 107408134 : fn = NULL_TREE;
4365 107408134 : if (fntype_or_decl && DECL_P (fntype_or_decl))
4366 3525315 : fn = fntype_or_decl;
4367 3525315 : fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4368 :
4369 107408134 : if (ix86_function_type_abi (fntype) == MS_ABI)
4370 : {
4371 767095 : if (TARGET_64BIT)
4372 767095 : return function_value_ms_64 (orig_mode, mode, valtype);
4373 : else
4374 0 : return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4375 : }
4376 106641039 : else if (TARGET_64BIT)
4377 102732627 : return function_value_64 (orig_mode, mode, valtype);
4378 : else
4379 3908412 : return function_value_32 (orig_mode, mode, fntype, fn);
4380 : }
4381 :
4382 : static rtx
4383 107302432 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4384 : {
4385 107302432 : machine_mode mode, orig_mode;
4386 :
4387 107302432 : orig_mode = TYPE_MODE (valtype);
4388 107302432 : mode = type_natural_mode (valtype, NULL, true);
4389 107302432 : return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4390 : }
4391 :
4392 : /* Pointer function arguments and return values are promoted to
4393 : word_mode for normal functions. */
4394 :
4395 : static machine_mode
4396 31955484 : ix86_promote_function_mode (const_tree type, machine_mode mode,
4397 : int *punsignedp, const_tree fntype,
4398 : int for_return)
4399 : {
4400 31955484 : if (cfun->machine->func_type == TYPE_NORMAL
4401 31954461 : && type != NULL_TREE
4402 31920624 : && POINTER_TYPE_P (type))
4403 : {
4404 15958595 : *punsignedp = POINTERS_EXTEND_UNSIGNED;
4405 15958595 : return word_mode;
4406 : }
4407 15996889 : return default_promote_function_mode (type, mode, punsignedp, fntype,
4408 15996889 : for_return);
4409 : }
4410 :
4411 : /* Return true if a structure, union or array with MODE containing FIELD
4412 : should be accessed using BLKmode. */
4413 :
4414 : static bool
4415 149179411 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4416 : {
4417 : /* Union with XFmode must be in BLKmode. */
4418 149179411 : return (mode == XFmode
4419 149348864 : && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4420 129585 : || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4421 : }
4422 :
4423 : rtx
4424 105702 : ix86_libcall_value (machine_mode mode)
4425 : {
4426 105702 : return ix86_function_value_1 (NULL, NULL, mode, mode);
4427 : }
4428 :
4429 : /* Return true iff type is returned in memory. */
4430 :
4431 : static bool
4432 110808651 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4433 : {
4434 110808651 : const machine_mode mode = type_natural_mode (type, NULL, true);
4435 110808651 : HOST_WIDE_INT size;
4436 :
4437 110808651 : if (TARGET_64BIT)
4438 : {
4439 106291907 : if (ix86_function_type_abi (fntype) == MS_ABI)
4440 : {
4441 706853 : size = int_size_in_bytes (type);
4442 :
4443 : /* __m128 is returned in xmm0. */
4444 706853 : if ((!type || VECTOR_INTEGER_TYPE_P (type)
4445 687282 : || INTEGRAL_TYPE_P (type)
4446 216966 : || VECTOR_FLOAT_TYPE_P (type))
4447 505715 : && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4448 : && !COMPLEX_MODE_P (mode)
4449 1212568 : && (GET_MODE_SIZE (mode) == 16 || size == 16))
4450 : return false;
4451 :
4452 : /* Otherwise, the size must be exactly in [1248]. */
4453 1349324 : return size != 1 && size != 2 && size != 4 && size != 8;
4454 : }
4455 : else
4456 : {
4457 105585054 : int needed_intregs, needed_sseregs;
4458 :
4459 105585054 : return examine_argument (mode, type, 1,
4460 : &needed_intregs, &needed_sseregs);
4461 : }
4462 : }
4463 : else
4464 : {
4465 4516744 : size = int_size_in_bytes (type);
4466 :
4467 : /* Intel MCU psABI returns scalars and aggregates no larger than 8
4468 : bytes in registers. */
4469 4516744 : if (TARGET_IAMCU)
4470 0 : return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4471 :
4472 4516744 : if (mode == BLKmode)
4473 : return true;
4474 :
4475 4516744 : if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4476 : return false;
4477 :
4478 4516744 : if (VECTOR_MODE_P (mode) || mode == TImode)
4479 : {
4480 : /* User-created vectors small enough to fit in EAX. */
4481 268055 : if (size < 8)
4482 : return false;
4483 :
4484 : /* Unless ABI prescibes otherwise,
4485 : MMX/3dNow values are returned in MM0 if available. */
4486 :
4487 268055 : if (size == 8)
4488 9266 : return TARGET_VECT8_RETURNS || !TARGET_MMX;
4489 :
4490 : /* SSE values are returned in XMM0 if available. */
4491 258789 : if (size == 16)
4492 108939 : return !TARGET_SSE;
4493 :
4494 : /* AVX values are returned in YMM0 if available. */
4495 149850 : if (size == 32)
4496 72090 : return !TARGET_AVX;
4497 :
4498 : /* AVX512F values are returned in ZMM0 if available. */
4499 77760 : if (size == 64)
4500 77760 : return !TARGET_AVX512F;
4501 : }
4502 :
4503 4248689 : if (mode == XFmode)
4504 : return false;
4505 :
4506 4237387 : if (size > 12)
4507 : return true;
4508 :
4509 : /* OImode shouldn't be used directly. */
4510 3256820 : gcc_assert (mode != OImode);
4511 :
4512 : return false;
4513 : }
4514 : }
4515 :
4516 : /* Implement TARGET_PUSH_ARGUMENT. */
4517 :
4518 : static bool
4519 9315521 : ix86_push_argument (unsigned int npush)
4520 : {
4521 : /* If SSE2 is available, use vector move to put large argument onto
4522 : stack. NB: In 32-bit mode, use 8-byte vector move. */
4523 11733868 : return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4524 9051130 : && TARGET_PUSH_ARGS
4525 18366553 : && !ACCUMULATE_OUTGOING_ARGS);
4526 : }
4527 :
4528 :
4529 : /* Create the va_list data type. */
4530 :
4531 : static tree
4532 278262 : ix86_build_builtin_va_list_64 (void)
4533 : {
4534 278262 : tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4535 :
4536 278262 : record = lang_hooks.types.make_type (RECORD_TYPE);
4537 278262 : type_decl = build_decl (BUILTINS_LOCATION,
4538 : TYPE_DECL, get_identifier ("__va_list_tag"), record);
4539 :
4540 278262 : f_gpr = build_decl (BUILTINS_LOCATION,
4541 : FIELD_DECL, get_identifier ("gp_offset"),
4542 : unsigned_type_node);
4543 278262 : f_fpr = build_decl (BUILTINS_LOCATION,
4544 : FIELD_DECL, get_identifier ("fp_offset"),
4545 : unsigned_type_node);
4546 278262 : f_ovf = build_decl (BUILTINS_LOCATION,
4547 : FIELD_DECL, get_identifier ("overflow_arg_area"),
4548 : ptr_type_node);
4549 278262 : f_sav = build_decl (BUILTINS_LOCATION,
4550 : FIELD_DECL, get_identifier ("reg_save_area"),
4551 : ptr_type_node);
4552 :
4553 278262 : va_list_gpr_counter_field = f_gpr;
4554 278262 : va_list_fpr_counter_field = f_fpr;
4555 :
4556 278262 : DECL_FIELD_CONTEXT (f_gpr) = record;
4557 278262 : DECL_FIELD_CONTEXT (f_fpr) = record;
4558 278262 : DECL_FIELD_CONTEXT (f_ovf) = record;
4559 278262 : DECL_FIELD_CONTEXT (f_sav) = record;
4560 :
4561 278262 : TYPE_STUB_DECL (record) = type_decl;
4562 278262 : TYPE_NAME (record) = type_decl;
4563 278262 : TYPE_FIELDS (record) = f_gpr;
4564 278262 : DECL_CHAIN (f_gpr) = f_fpr;
4565 278262 : DECL_CHAIN (f_fpr) = f_ovf;
4566 278262 : DECL_CHAIN (f_ovf) = f_sav;
4567 :
4568 278262 : layout_type (record);
4569 :
4570 278262 : TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4571 278262 : NULL_TREE, TYPE_ATTRIBUTES (record));
4572 :
4573 : /* The correct type is an array type of one element. */
4574 278262 : return build_array_type (record, build_index_type (size_zero_node));
4575 : }
4576 :
4577 : /* Setup the builtin va_list data type and for 64-bit the additional
4578 : calling convention specific va_list data types. */
4579 :
4580 : static tree
4581 285422 : ix86_build_builtin_va_list (void)
4582 : {
4583 285422 : if (TARGET_64BIT)
4584 : {
4585 : /* Initialize ABI specific va_list builtin types.
4586 :
4587 : In lto1, we can encounter two va_list types:
4588 : - one as a result of the type-merge across TUs, and
4589 : - the one constructed here.
4590 : These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4591 : a type identity check in canonical_va_list_type based on
4592 : TYPE_MAIN_VARIANT (which we used to have) will not work.
4593 : Instead, we tag each va_list_type_node with its unique attribute, and
4594 : look for the attribute in the type identity check in
4595 : canonical_va_list_type.
4596 :
4597 : Tagging sysv_va_list_type_node directly with the attribute is
4598 : problematic since it's a array of one record, which will degrade into a
4599 : pointer to record when used as parameter (see build_va_arg comments for
4600 : an example), dropping the attribute in the process. So we tag the
4601 : record instead. */
4602 :
4603 : /* For SYSV_ABI we use an array of one record. */
4604 278262 : sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4605 :
4606 : /* For MS_ABI we use plain pointer to argument area. */
4607 278262 : tree char_ptr_type = build_pointer_type (char_type_node);
4608 278262 : tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4609 278262 : TYPE_ATTRIBUTES (char_ptr_type));
4610 278262 : ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4611 :
4612 278262 : return ((ix86_abi == MS_ABI)
4613 278262 : ? ms_va_list_type_node
4614 278262 : : sysv_va_list_type_node);
4615 : }
4616 : else
4617 : {
4618 : /* For i386 we use plain pointer to argument area. */
4619 7160 : return build_pointer_type (char_type_node);
4620 : }
4621 : }
4622 :
4623 : /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4624 :
4625 : static void
4626 15706 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4627 : {
4628 15706 : rtx save_area, mem;
4629 15706 : alias_set_type set;
4630 15706 : int i, max;
4631 :
4632 : /* GPR size of varargs save area. */
4633 15706 : if (cfun->va_list_gpr_size)
4634 15257 : ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4635 : else
4636 449 : ix86_varargs_gpr_size = 0;
4637 :
4638 : /* FPR size of varargs save area. We don't need it if we don't pass
4639 : anything in SSE registers. */
4640 15706 : if (TARGET_SSE && cfun->va_list_fpr_size)
4641 14650 : ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4642 : else
4643 1056 : ix86_varargs_fpr_size = 0;
4644 :
4645 15706 : if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4646 : return;
4647 :
4648 15426 : save_area = frame_pointer_rtx;
4649 15426 : set = get_varargs_alias_set ();
4650 :
4651 15426 : max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4652 15426 : if (max > X86_64_REGPARM_MAX)
4653 : max = X86_64_REGPARM_MAX;
4654 :
4655 15426 : const int *parm_regs;
4656 15426 : if (cum->preserve_none_abi)
4657 : parm_regs = x86_64_preserve_none_int_parameter_registers;
4658 : else
4659 15425 : parm_regs = x86_64_int_parameter_registers;
4660 :
4661 85581 : for (i = cum->regno; i < max; i++)
4662 : {
4663 70155 : mem = gen_rtx_MEM (word_mode,
4664 70155 : plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4665 70155 : MEM_NOTRAP_P (mem) = 1;
4666 70155 : set_mem_alias_set (mem, set);
4667 70155 : emit_move_insn (mem,
4668 70155 : gen_rtx_REG (word_mode, parm_regs[i]));
4669 : }
4670 :
4671 15426 : if (ix86_varargs_fpr_size)
4672 : {
4673 14650 : machine_mode smode;
4674 14650 : rtx_code_label *label;
4675 14650 : rtx test;
4676 :
4677 : /* Now emit code to save SSE registers. The AX parameter contains number
4678 : of SSE parameter registers used to call this function, though all we
4679 : actually check here is the zero/non-zero status. */
4680 :
4681 14650 : label = gen_label_rtx ();
4682 14650 : test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4683 14650 : emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4684 : label));
4685 :
4686 : /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4687 : we used movdqa (i.e. TImode) instead? Perhaps even better would
4688 : be if we could determine the real mode of the data, via a hook
4689 : into pass_stdarg. Ignore all that for now. */
4690 14650 : smode = V4SFmode;
4691 14650 : if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4692 4151 : crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4693 :
4694 14650 : max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4695 14650 : if (max > X86_64_SSE_REGPARM_MAX)
4696 : max = X86_64_SSE_REGPARM_MAX;
4697 :
4698 130241 : for (i = cum->sse_regno; i < max; ++i)
4699 : {
4700 115591 : mem = plus_constant (Pmode, save_area,
4701 115591 : i * 16 + ix86_varargs_gpr_size);
4702 115591 : mem = gen_rtx_MEM (smode, mem);
4703 115591 : MEM_NOTRAP_P (mem) = 1;
4704 115591 : set_mem_alias_set (mem, set);
4705 115591 : set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4706 :
4707 115591 : emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4708 : }
4709 :
4710 14650 : emit_label (label);
4711 : }
4712 : }
4713 :
4714 : static void
4715 5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4716 : {
4717 5652 : alias_set_type set = get_varargs_alias_set ();
4718 5652 : int i;
4719 :
4720 : /* Reset to zero, as there might be a sysv vaarg used
4721 : before. */
4722 5652 : ix86_varargs_gpr_size = 0;
4723 5652 : ix86_varargs_fpr_size = 0;
4724 :
4725 14154 : for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4726 : {
4727 8502 : rtx reg, mem;
4728 :
4729 8502 : mem = gen_rtx_MEM (Pmode,
4730 8502 : plus_constant (Pmode, virtual_incoming_args_rtx,
4731 8502 : i * UNITS_PER_WORD));
4732 8502 : MEM_NOTRAP_P (mem) = 1;
4733 8502 : set_mem_alias_set (mem, set);
4734 :
4735 8502 : reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4736 8502 : emit_move_insn (mem, reg);
4737 : }
4738 5652 : }
4739 :
4740 : static void
4741 21512 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4742 : const function_arg_info &arg,
4743 : int *, int no_rtl)
4744 : {
4745 21512 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4746 21512 : CUMULATIVE_ARGS next_cum;
4747 21512 : tree fntype;
4748 :
4749 : /* This argument doesn't appear to be used anymore. Which is good,
4750 : because the old code here didn't suppress rtl generation. */
4751 21512 : gcc_assert (!no_rtl);
4752 :
4753 21512 : if (!TARGET_64BIT)
4754 154 : return;
4755 :
4756 21358 : fntype = TREE_TYPE (current_function_decl);
4757 :
4758 : /* For varargs, we do not want to skip the dummy va_dcl argument.
4759 : For stdargs, we do want to skip the last named argument. */
4760 21358 : next_cum = *cum;
4761 21358 : if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4762 121 : || arg.type != NULL_TREE)
4763 21383 : && stdarg_p (fntype))
4764 21262 : ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4765 :
4766 21358 : if (cum->call_abi == MS_ABI)
4767 5652 : setup_incoming_varargs_ms_64 (&next_cum);
4768 : else
4769 15706 : setup_incoming_varargs_64 (&next_cum);
4770 : }
4771 :
4772 : /* Checks if TYPE is of kind va_list char *. */
4773 :
4774 : static bool
4775 73096 : is_va_list_char_pointer (tree type)
4776 : {
4777 73096 : tree canonic;
4778 :
4779 : /* For 32-bit it is always true. */
4780 73096 : if (!TARGET_64BIT)
4781 : return true;
4782 72934 : canonic = ix86_canonical_va_list_type (type);
4783 72934 : return (canonic == ms_va_list_type_node
4784 72934 : || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4785 : }
4786 :
4787 : /* Implement va_start. */
4788 :
4789 : static void
4790 21003 : ix86_va_start (tree valist, rtx nextarg)
4791 : {
4792 21003 : HOST_WIDE_INT words, n_gpr, n_fpr;
4793 21003 : tree f_gpr, f_fpr, f_ovf, f_sav;
4794 21003 : tree gpr, fpr, ovf, sav, t;
4795 21003 : tree type;
4796 21003 : rtx ovf_rtx;
4797 :
4798 21003 : if (flag_split_stack
4799 12 : && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4800 : {
4801 12 : unsigned int scratch_regno;
4802 :
4803 : /* When we are splitting the stack, we can't refer to the stack
4804 : arguments using internal_arg_pointer, because they may be on
4805 : the old stack. The split stack prologue will arrange to
4806 : leave a pointer to the old stack arguments in a scratch
4807 : register, which we here copy to a pseudo-register. The split
4808 : stack prologue can't set the pseudo-register directly because
4809 : it (the prologue) runs before any registers have been saved. */
4810 :
4811 12 : scratch_regno = split_stack_prologue_scratch_regno ();
4812 12 : if (scratch_regno != INVALID_REGNUM)
4813 : {
4814 12 : rtx reg;
4815 12 : rtx_insn *seq;
4816 :
4817 16 : reg = gen_reg_rtx (Pmode);
4818 12 : cfun->machine->split_stack_varargs_pointer = reg;
4819 :
4820 12 : start_sequence ();
4821 16 : emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4822 12 : seq = end_sequence ();
4823 :
4824 12 : push_topmost_sequence ();
4825 12 : emit_insn_after (seq, entry_of_function ());
4826 12 : pop_topmost_sequence ();
4827 : }
4828 : }
4829 :
4830 : /* Only 64bit target needs something special. */
4831 21003 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4832 : {
4833 5656 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4834 5652 : std_expand_builtin_va_start (valist, nextarg);
4835 : else
4836 : {
4837 4 : rtx va_r, next;
4838 :
4839 4 : va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4840 8 : next = expand_binop (ptr_mode, add_optab,
4841 4 : cfun->machine->split_stack_varargs_pointer,
4842 : crtl->args.arg_offset_rtx,
4843 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4844 4 : convert_move (va_r, next, 0);
4845 : }
4846 5656 : return;
4847 : }
4848 :
4849 15347 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4850 15347 : f_fpr = DECL_CHAIN (f_gpr);
4851 15347 : f_ovf = DECL_CHAIN (f_fpr);
4852 15347 : f_sav = DECL_CHAIN (f_ovf);
4853 :
4854 15347 : valist = build_simple_mem_ref (valist);
4855 15347 : TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4856 : /* The following should be folded into the MEM_REF offset. */
4857 15347 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4858 : f_gpr, NULL_TREE);
4859 15347 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4860 : f_fpr, NULL_TREE);
4861 15347 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4862 : f_ovf, NULL_TREE);
4863 15347 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4864 : f_sav, NULL_TREE);
4865 :
4866 : /* Count number of gp and fp argument registers used. */
4867 15347 : words = crtl->args.info.words;
4868 15347 : n_gpr = crtl->args.info.regno;
4869 15347 : n_fpr = crtl->args.info.sse_regno;
4870 :
4871 15347 : if (cfun->va_list_gpr_size)
4872 : {
4873 15113 : type = TREE_TYPE (gpr);
4874 15113 : t = build2 (MODIFY_EXPR, type,
4875 15113 : gpr, build_int_cst (type, n_gpr * 8));
4876 15113 : TREE_SIDE_EFFECTS (t) = 1;
4877 15113 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4878 : }
4879 :
4880 15347 : if (TARGET_SSE && cfun->va_list_fpr_size)
4881 : {
4882 14494 : type = TREE_TYPE (fpr);
4883 14494 : t = build2 (MODIFY_EXPR, type, fpr,
4884 14494 : build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4885 14494 : TREE_SIDE_EFFECTS (t) = 1;
4886 14494 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4887 : }
4888 :
4889 : /* Find the overflow area. */
4890 15347 : type = TREE_TYPE (ovf);
4891 15347 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4892 15339 : ovf_rtx = crtl->args.internal_arg_pointer;
4893 : else
4894 : ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4895 15347 : t = make_tree (type, ovf_rtx);
4896 15347 : if (words != 0)
4897 498 : t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4898 :
4899 15347 : t = build2 (MODIFY_EXPR, type, ovf, t);
4900 15347 : TREE_SIDE_EFFECTS (t) = 1;
4901 15347 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4902 :
4903 15347 : if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4904 : {
4905 : /* Find the register save area.
4906 : Prologue of the function save it right above stack frame. */
4907 15282 : type = TREE_TYPE (sav);
4908 15282 : t = make_tree (type, frame_pointer_rtx);
4909 15282 : if (!ix86_varargs_gpr_size)
4910 169 : t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4911 :
4912 15282 : t = build2 (MODIFY_EXPR, type, sav, t);
4913 15282 : TREE_SIDE_EFFECTS (t) = 1;
4914 15282 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4915 : }
4916 : }
4917 :
4918 : /* Implement va_arg. */
4919 :
4920 : static tree
4921 52093 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4922 : gimple_seq *post_p)
4923 : {
4924 52093 : static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4925 52093 : tree f_gpr, f_fpr, f_ovf, f_sav;
4926 52093 : tree gpr, fpr, ovf, sav, t;
4927 52093 : int size, rsize;
4928 52093 : tree lab_false, lab_over = NULL_TREE;
4929 52093 : tree addr, t2;
4930 52093 : rtx container;
4931 52093 : int indirect_p = 0;
4932 52093 : tree ptrtype;
4933 52093 : machine_mode nat_mode;
4934 52093 : unsigned int arg_boundary;
4935 52093 : unsigned int type_align;
4936 :
4937 : /* Only 64bit target needs something special. */
4938 52093 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4939 260 : return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4940 :
4941 51833 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4942 51833 : f_fpr = DECL_CHAIN (f_gpr);
4943 51833 : f_ovf = DECL_CHAIN (f_fpr);
4944 51833 : f_sav = DECL_CHAIN (f_ovf);
4945 :
4946 51833 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4947 : valist, f_gpr, NULL_TREE);
4948 :
4949 51833 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4950 51833 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4951 51833 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4952 :
4953 51833 : indirect_p = pass_va_arg_by_reference (type);
4954 51833 : if (indirect_p)
4955 103 : type = build_pointer_type (type);
4956 51833 : size = arg_int_size_in_bytes (type);
4957 51833 : rsize = CEIL (size, UNITS_PER_WORD);
4958 :
4959 51833 : nat_mode = type_natural_mode (type, NULL, false);
4960 51833 : switch (nat_mode)
4961 : {
4962 28 : case E_V16HFmode:
4963 28 : case E_V16BFmode:
4964 28 : case E_V8SFmode:
4965 28 : case E_V8SImode:
4966 28 : case E_V32QImode:
4967 28 : case E_V16HImode:
4968 28 : case E_V4DFmode:
4969 28 : case E_V4DImode:
4970 28 : case E_V32HFmode:
4971 28 : case E_V32BFmode:
4972 28 : case E_V16SFmode:
4973 28 : case E_V16SImode:
4974 28 : case E_V64QImode:
4975 28 : case E_V32HImode:
4976 28 : case E_V8DFmode:
4977 28 : case E_V8DImode:
4978 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4979 28 : if (!TARGET_64BIT_MS_ABI)
4980 : {
4981 : container = NULL;
4982 : break;
4983 : }
4984 : /* FALLTHRU */
4985 :
4986 51805 : default:
4987 51805 : container = construct_container (nat_mode, TYPE_MODE (type),
4988 : type, 0, X86_64_REGPARM_MAX,
4989 : X86_64_SSE_REGPARM_MAX, intreg,
4990 : 0);
4991 51805 : break;
4992 : }
4993 :
4994 : /* Pull the value out of the saved registers. */
4995 :
4996 51833 : addr = create_tmp_var (ptr_type_node, "addr");
4997 51833 : type_align = TYPE_ALIGN (type);
4998 :
4999 51833 : if (container)
5000 : {
5001 28742 : int needed_intregs, needed_sseregs;
5002 28742 : bool need_temp;
5003 28742 : tree int_addr, sse_addr;
5004 :
5005 28742 : lab_false = create_artificial_label (UNKNOWN_LOCATION);
5006 28742 : lab_over = create_artificial_label (UNKNOWN_LOCATION);
5007 :
5008 28742 : examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5009 :
5010 28742 : bool container_in_reg = false;
5011 28742 : if (REG_P (container))
5012 : container_in_reg = true;
5013 1641 : else if (GET_CODE (container) == PARALLEL
5014 1641 : && GET_MODE (container) == BLKmode
5015 580 : && XVECLEN (container, 0) == 1)
5016 : {
5017 : /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
5018 : expression in a TImode register. In this case, temp isn't
5019 : needed. Otherwise, the TImode variable will be put in the
5020 : GPR save area which guarantees only 8-byte alignment. */
5021 509 : rtx x = XVECEXP (container, 0, 0);
5022 509 : if (GET_CODE (x) == EXPR_LIST
5023 509 : && REG_P (XEXP (x, 0))
5024 509 : && XEXP (x, 1) == const0_rtx)
5025 : container_in_reg = true;
5026 : }
5027 :
5028 680 : need_temp = (!container_in_reg
5029 1150 : && ((needed_intregs && TYPE_ALIGN (type) > 64)
5030 680 : || TYPE_ALIGN (type) > 128));
5031 :
5032 : /* In case we are passing structure, verify that it is consecutive block
5033 : on the register save area. If not we need to do moves. */
5034 680 : if (!need_temp && !container_in_reg)
5035 : {
5036 : /* Verify that all registers are strictly consecutive */
5037 966 : if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5038 : {
5039 : int i;
5040 :
5041 815 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5042 : {
5043 529 : rtx slot = XVECEXP (container, 0, i);
5044 529 : if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5045 529 : || INTVAL (XEXP (slot, 1)) != i * 16)
5046 : need_temp = true;
5047 : }
5048 : }
5049 : else
5050 : {
5051 : int i;
5052 :
5053 1120 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5054 : {
5055 726 : rtx slot = XVECEXP (container, 0, i);
5056 726 : if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5057 726 : || INTVAL (XEXP (slot, 1)) != i * 8)
5058 : need_temp = true;
5059 : }
5060 : }
5061 : }
5062 28742 : if (!need_temp)
5063 : {
5064 : int_addr = addr;
5065 : sse_addr = addr;
5066 : }
5067 : else
5068 : {
5069 877 : int_addr = create_tmp_var (ptr_type_node, "int_addr");
5070 877 : sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5071 : }
5072 :
5073 : /* First ensure that we fit completely in registers. */
5074 28742 : if (needed_intregs)
5075 : {
5076 18025 : t = build_int_cst (TREE_TYPE (gpr),
5077 18025 : (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5078 18025 : t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5079 18025 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5080 18025 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5081 18025 : gimplify_and_add (t, pre_p);
5082 : }
5083 28742 : if (needed_sseregs)
5084 : {
5085 11109 : t = build_int_cst (TREE_TYPE (fpr),
5086 : (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5087 11109 : + X86_64_REGPARM_MAX * 8);
5088 11109 : t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5089 11109 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5090 11109 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5091 11109 : gimplify_and_add (t, pre_p);
5092 : }
5093 :
5094 : /* Compute index to start of area used for integer regs. */
5095 28742 : if (needed_intregs)
5096 : {
5097 : /* int_addr = gpr + sav; */
5098 18025 : t = fold_build_pointer_plus (sav, gpr);
5099 18025 : gimplify_assign (int_addr, t, pre_p);
5100 : }
5101 28742 : if (needed_sseregs)
5102 : {
5103 : /* sse_addr = fpr + sav; */
5104 11109 : t = fold_build_pointer_plus (sav, fpr);
5105 11109 : gimplify_assign (sse_addr, t, pre_p);
5106 : }
5107 28742 : if (need_temp)
5108 : {
5109 877 : int i, prev_size = 0;
5110 877 : tree temp = create_tmp_var (type, "va_arg_tmp");
5111 877 : TREE_ADDRESSABLE (temp) = 1;
5112 :
5113 : /* addr = &temp; */
5114 877 : t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5115 877 : gimplify_assign (addr, t, pre_p);
5116 :
5117 2241 : for (i = 0; i < XVECLEN (container, 0); i++)
5118 : {
5119 1364 : rtx slot = XVECEXP (container, 0, i);
5120 1364 : rtx reg = XEXP (slot, 0);
5121 1364 : machine_mode mode = GET_MODE (reg);
5122 1364 : tree piece_type;
5123 1364 : tree addr_type;
5124 1364 : tree daddr_type;
5125 1364 : tree src_addr, src;
5126 1364 : int src_offset;
5127 1364 : tree dest_addr, dest;
5128 1364 : int cur_size = GET_MODE_SIZE (mode);
5129 :
5130 1364 : gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5131 1364 : prev_size = INTVAL (XEXP (slot, 1));
5132 1364 : if (prev_size + cur_size > size)
5133 : {
5134 30 : cur_size = size - prev_size;
5135 30 : unsigned int nbits = cur_size * BITS_PER_UNIT;
5136 30 : if (!int_mode_for_size (nbits, 1).exists (&mode))
5137 10 : mode = QImode;
5138 : }
5139 1364 : piece_type = lang_hooks.types.type_for_mode (mode, 1);
5140 1364 : if (mode == GET_MODE (reg))
5141 1334 : addr_type = build_pointer_type (piece_type);
5142 : else
5143 30 : addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5144 : true);
5145 1364 : daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5146 : true);
5147 :
5148 1364 : if (SSE_REGNO_P (REGNO (reg)))
5149 : {
5150 534 : src_addr = sse_addr;
5151 534 : src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5152 : }
5153 : else
5154 : {
5155 830 : src_addr = int_addr;
5156 830 : src_offset = REGNO (reg) * 8;
5157 : }
5158 1364 : src_addr = fold_convert (addr_type, src_addr);
5159 1364 : src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5160 :
5161 1364 : dest_addr = fold_convert (daddr_type, addr);
5162 1364 : dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5163 2728 : if (cur_size == GET_MODE_SIZE (mode))
5164 : {
5165 1354 : src = build_va_arg_indirect_ref (src_addr);
5166 1354 : dest = build_va_arg_indirect_ref (dest_addr);
5167 :
5168 1354 : gimplify_assign (dest, src, pre_p);
5169 : }
5170 : else
5171 : {
5172 10 : tree copy
5173 20 : = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5174 : 3, dest_addr, src_addr,
5175 10 : size_int (cur_size));
5176 10 : gimplify_and_add (copy, pre_p);
5177 : }
5178 1364 : prev_size += cur_size;
5179 : }
5180 : }
5181 :
5182 28742 : if (needed_intregs)
5183 : {
5184 18025 : t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5185 18025 : build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5186 18025 : gimplify_assign (gpr, t, pre_p);
5187 : /* The GPR save area guarantees only 8-byte alignment. */
5188 18025 : if (!need_temp)
5189 17221 : type_align = MIN (type_align, 64);
5190 : }
5191 :
5192 28742 : if (needed_sseregs)
5193 : {
5194 11109 : t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5195 11109 : build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5196 11109 : gimplify_assign (unshare_expr (fpr), t, pre_p);
5197 : }
5198 :
5199 28742 : gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5200 :
5201 28742 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5202 : }
5203 :
5204 : /* ... otherwise out of the overflow area. */
5205 :
5206 : /* When we align parameter on stack for caller, if the parameter
5207 : alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5208 : aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5209 : here with caller. */
5210 51833 : arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5211 51833 : if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5212 : arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5213 :
5214 : /* Care for on-stack alignment if needed. */
5215 51833 : if (arg_boundary <= 64 || size == 0)
5216 34796 : t = ovf;
5217 : else
5218 : {
5219 17037 : HOST_WIDE_INT align = arg_boundary / 8;
5220 17037 : t = fold_build_pointer_plus_hwi (ovf, align - 1);
5221 17037 : t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5222 17037 : build_int_cst (TREE_TYPE (t), -align));
5223 : }
5224 :
5225 51833 : gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5226 51833 : gimplify_assign (addr, t, pre_p);
5227 :
5228 51833 : t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5229 51833 : gimplify_assign (unshare_expr (ovf), t, pre_p);
5230 :
5231 51833 : if (container)
5232 28742 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5233 :
5234 51833 : type = build_aligned_type (type, type_align);
5235 51833 : ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5236 51833 : addr = fold_convert (ptrtype, addr);
5237 :
5238 51833 : if (indirect_p)
5239 103 : addr = build_va_arg_indirect_ref (addr);
5240 51833 : return build_va_arg_indirect_ref (addr);
5241 : }
5242 :
5243 : /* Return true if OPNUM's MEM should be matched
5244 : in movabs* patterns. */
5245 :
5246 : bool
5247 505 : ix86_check_movabs (rtx insn, int opnum)
5248 : {
5249 505 : rtx set, mem;
5250 :
5251 505 : set = PATTERN (insn);
5252 505 : if (GET_CODE (set) == PARALLEL)
5253 0 : set = XVECEXP (set, 0, 0);
5254 505 : gcc_assert (GET_CODE (set) == SET);
5255 505 : mem = XEXP (set, opnum);
5256 505 : while (SUBREG_P (mem))
5257 0 : mem = SUBREG_REG (mem);
5258 505 : gcc_assert (MEM_P (mem));
5259 505 : return volatile_ok || !MEM_VOLATILE_P (mem);
5260 : }
5261 :
5262 : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5263 : bool
5264 197535 : ix86_check_movs (rtx insn, int idx)
5265 : {
5266 197535 : rtx pat = PATTERN (insn);
5267 197535 : gcc_assert (GET_CODE (pat) == PARALLEL);
5268 :
5269 197535 : rtx set = XVECEXP (pat, 0, idx);
5270 197535 : gcc_assert (GET_CODE (set) == SET);
5271 :
5272 197535 : rtx dst = SET_DEST (set);
5273 197535 : gcc_assert (MEM_P (dst));
5274 :
5275 197535 : rtx src = SET_SRC (set);
5276 197535 : gcc_assert (MEM_P (src));
5277 :
5278 197535 : return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5279 395070 : && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5280 0 : || Pmode == word_mode));
5281 : }
5282 :
5283 : /* Return false if INSN contains a MEM with a non-default address space. */
5284 : bool
5285 65278 : ix86_check_no_addr_space (rtx insn)
5286 : {
5287 65278 : subrtx_var_iterator::array_type array;
5288 1436572 : FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5289 : {
5290 1371294 : rtx x = *iter;
5291 1501850 : if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5292 0 : return false;
5293 : }
5294 65278 : return true;
5295 65278 : }
5296 :
5297 : /* Initialize the table of extra 80387 mathematical constants. */
5298 :
5299 : static void
5300 2383 : init_ext_80387_constants (void)
5301 : {
5302 2383 : static const char * cst[5] =
5303 : {
5304 : "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5305 : "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5306 : "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5307 : "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5308 : "3.1415926535897932385128089594061862044", /* 4: fldpi */
5309 : };
5310 2383 : int i;
5311 :
5312 14298 : for (i = 0; i < 5; i++)
5313 : {
5314 11915 : real_from_string (&ext_80387_constants_table[i], cst[i]);
5315 : /* Ensure each constant is rounded to XFmode precision. */
5316 11915 : real_convert (&ext_80387_constants_table[i],
5317 23830 : XFmode, &ext_80387_constants_table[i]);
5318 : }
5319 :
5320 2383 : ext_80387_constants_init = 1;
5321 2383 : }
5322 :
5323 : /* Return non-zero if the constant is something that
5324 : can be loaded with a special instruction. */
5325 :
5326 : int
5327 5064640 : standard_80387_constant_p (rtx x)
5328 : {
5329 5064640 : machine_mode mode = GET_MODE (x);
5330 :
5331 5064640 : const REAL_VALUE_TYPE *r;
5332 :
5333 5064640 : if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5334 : return -1;
5335 :
5336 4604607 : if (x == CONST0_RTX (mode))
5337 : return 1;
5338 2117412 : if (x == CONST1_RTX (mode))
5339 : return 2;
5340 :
5341 1231622 : r = CONST_DOUBLE_REAL_VALUE (x);
5342 :
5343 : /* For XFmode constants, try to find a special 80387 instruction when
5344 : optimizing for size or on those CPUs that benefit from them. */
5345 1231622 : if (mode == XFmode
5346 795963 : && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5347 2027585 : && !flag_rounding_math)
5348 : {
5349 788165 : int i;
5350 :
5351 788165 : if (! ext_80387_constants_init)
5352 2376 : init_ext_80387_constants ();
5353 :
5354 4718478 : for (i = 0; i < 5; i++)
5355 3939144 : if (real_identical (r, &ext_80387_constants_table[i]))
5356 8831 : return i + 3;
5357 : }
5358 :
5359 : /* Load of the constant -0.0 or -1.0 will be split as
5360 : fldz;fchs or fld1;fchs sequence. */
5361 1222791 : if (real_isnegzero (r))
5362 : return 8;
5363 1206414 : if (real_identical (r, &dconstm1))
5364 301732 : return 9;
5365 :
5366 : return 0;
5367 : }
5368 :
5369 : /* Return the opcode of the special instruction to be used to load
5370 : the constant X. */
5371 :
5372 : const char *
5373 54500 : standard_80387_constant_opcode (rtx x)
5374 : {
5375 54500 : switch (standard_80387_constant_p (x))
5376 : {
5377 : case 1:
5378 : return "fldz";
5379 33957 : case 2:
5380 33957 : return "fld1";
5381 1 : case 3:
5382 1 : return "fldlg2";
5383 10 : case 4:
5384 10 : return "fldln2";
5385 12 : case 5:
5386 12 : return "fldl2e";
5387 2 : case 6:
5388 2 : return "fldl2t";
5389 192 : case 7:
5390 192 : return "fldpi";
5391 0 : case 8:
5392 0 : case 9:
5393 0 : return "#";
5394 0 : default:
5395 0 : gcc_unreachable ();
5396 : }
5397 : }
5398 :
5399 : /* Return the CONST_DOUBLE representing the 80387 constant that is
5400 : loaded by the specified special instruction. The argument IDX
5401 : matches the return value from standard_80387_constant_p. */
5402 :
5403 : rtx
5404 24 : standard_80387_constant_rtx (int idx)
5405 : {
5406 24 : int i;
5407 :
5408 24 : if (! ext_80387_constants_init)
5409 7 : init_ext_80387_constants ();
5410 :
5411 24 : switch (idx)
5412 : {
5413 24 : case 3:
5414 24 : case 4:
5415 24 : case 5:
5416 24 : case 6:
5417 24 : case 7:
5418 24 : i = idx - 3;
5419 24 : break;
5420 :
5421 0 : default:
5422 0 : gcc_unreachable ();
5423 : }
5424 :
5425 24 : return const_double_from_real_value (ext_80387_constants_table[i],
5426 24 : XFmode);
5427 : }
5428 :
5429 : /* Return 1 if X is all bits 0, 2 if X is all bits 1
5430 : and 3 if X is all bits 1 with zero extend
5431 : in supported SSE/AVX vector mode. */
5432 :
5433 : int
5434 54733282 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
5435 : {
5436 54733282 : machine_mode mode;
5437 :
5438 54733282 : if (!TARGET_SSE)
5439 : return 0;
5440 :
5441 54564543 : mode = GET_MODE (x);
5442 :
5443 54564543 : if (x == const0_rtx || const0_operand (x, mode))
5444 13046677 : return 1;
5445 :
5446 41517866 : if (x == constm1_rtx
5447 41375499 : || vector_all_ones_operand (x, mode)
5448 82394265 : || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5449 34224582 : || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5450 6652738 : && float_vector_all_ones_operand (x, mode)))
5451 : {
5452 : /* VOIDmode integer constant, get mode from the predicate. */
5453 643570 : if (mode == VOIDmode)
5454 142367 : mode = pred_mode;
5455 :
5456 1287140 : switch (GET_MODE_SIZE (mode))
5457 : {
5458 31183 : case 64:
5459 31183 : if (TARGET_AVX512F)
5460 : return 2;
5461 : break;
5462 39202 : case 32:
5463 39202 : if (TARGET_AVX2)
5464 : return 2;
5465 : break;
5466 561236 : case 16:
5467 561236 : if (TARGET_SSE2)
5468 : return 2;
5469 : break;
5470 0 : case 0:
5471 : /* VOIDmode */
5472 0 : gcc_unreachable ();
5473 : default:
5474 : break;
5475 : }
5476 : }
5477 :
5478 40887175 : if (vector_all_ones_zero_extend_half_operand (x, mode)
5479 40887175 : || vector_all_ones_zero_extend_quarter_operand (x, mode))
5480 706 : return 3;
5481 :
5482 : return 0;
5483 : }
5484 :
5485 : /* Return the opcode of the special instruction to be used to load
5486 : the constant operands[1] into operands[0]. */
5487 :
5488 : const char *
5489 460573 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5490 : {
5491 460573 : machine_mode mode;
5492 460573 : rtx x = operands[1];
5493 :
5494 460573 : gcc_assert (TARGET_SSE);
5495 :
5496 460573 : mode = GET_MODE (x);
5497 :
5498 460573 : if (x == const0_rtx || const0_operand (x, mode))
5499 : {
5500 449709 : switch (get_attr_mode (insn))
5501 : {
5502 432108 : case MODE_TI:
5503 432108 : if (!EXT_REX_SSE_REG_P (operands[0]))
5504 : return "%vpxor\t%0, %d0";
5505 : /* FALLTHRU */
5506 6248 : case MODE_XI:
5507 6248 : case MODE_OI:
5508 6248 : if (EXT_REX_SSE_REG_P (operands[0]))
5509 : {
5510 71 : if (TARGET_AVX512VL)
5511 : return "vpxord\t%x0, %x0, %x0";
5512 : else
5513 29 : return "vpxord\t%g0, %g0, %g0";
5514 : }
5515 : return "vpxor\t%x0, %x0, %x0";
5516 :
5517 2073 : case MODE_V2DF:
5518 2073 : if (!EXT_REX_SSE_REG_P (operands[0]))
5519 : return "%vxorpd\t%0, %d0";
5520 : /* FALLTHRU */
5521 853 : case MODE_V8DF:
5522 853 : case MODE_V4DF:
5523 853 : if (EXT_REX_SSE_REG_P (operands[0]))
5524 : {
5525 4 : if (TARGET_AVX512DQ)
5526 : {
5527 0 : if (TARGET_AVX512VL)
5528 : return "vxorpd\t%x0, %x0, %x0";
5529 : else
5530 0 : return "vxorpd\t%g0, %g0, %g0";
5531 : }
5532 : else
5533 : {
5534 4 : if (TARGET_AVX512VL)
5535 : return "vpxorq\t%x0, %x0, %x0";
5536 : else
5537 4 : return "vpxorq\t%g0, %g0, %g0";
5538 : }
5539 : }
5540 : return "vxorpd\t%x0, %x0, %x0";
5541 :
5542 6470 : case MODE_V4SF:
5543 6470 : if (!EXT_REX_SSE_REG_P (operands[0]))
5544 : return "%vxorps\t%0, %d0";
5545 : /* FALLTHRU */
5546 1991 : case MODE_V16SF:
5547 1991 : case MODE_V8SF:
5548 1991 : if (EXT_REX_SSE_REG_P (operands[0]))
5549 : {
5550 34 : if (TARGET_AVX512DQ)
5551 : {
5552 26 : if (TARGET_AVX512VL)
5553 : return "vxorps\t%x0, %x0, %x0";
5554 : else
5555 0 : return "vxorps\t%g0, %g0, %g0";
5556 : }
5557 : else
5558 : {
5559 8 : if (TARGET_AVX512VL)
5560 : return "vpxord\t%x0, %x0, %x0";
5561 : else
5562 6 : return "vpxord\t%g0, %g0, %g0";
5563 : }
5564 : }
5565 : return "vxorps\t%x0, %x0, %x0";
5566 :
5567 0 : default:
5568 0 : gcc_unreachable ();
5569 : }
5570 : }
5571 10864 : else if (x == constm1_rtx
5572 10853 : || vector_all_ones_operand (x, mode)
5573 10931 : || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5574 45 : && float_vector_all_ones_operand (x, mode)))
5575 : {
5576 10842 : enum attr_mode insn_mode = get_attr_mode (insn);
5577 :
5578 10842 : switch (insn_mode)
5579 : {
5580 4 : case MODE_XI:
5581 4 : case MODE_V8DF:
5582 4 : case MODE_V16SF:
5583 4 : gcc_assert (TARGET_AVX512F);
5584 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5585 :
5586 948 : case MODE_OI:
5587 948 : case MODE_V4DF:
5588 948 : case MODE_V8SF:
5589 948 : gcc_assert (TARGET_AVX2);
5590 : /* FALLTHRU */
5591 10838 : case MODE_TI:
5592 10838 : case MODE_V2DF:
5593 10838 : case MODE_V4SF:
5594 10838 : gcc_assert (TARGET_SSE2);
5595 10838 : if (EXT_REX_SSE_REG_P (operands[0]))
5596 : {
5597 2 : if (TARGET_AVX512VL)
5598 : return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5599 : else
5600 0 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5601 : }
5602 10836 : return (TARGET_AVX
5603 10836 : ? "vpcmpeqd\t%0, %0, %0"
5604 10836 : : "pcmpeqd\t%0, %0");
5605 :
5606 0 : default:
5607 0 : gcc_unreachable ();
5608 : }
5609 : }
5610 22 : else if (vector_all_ones_zero_extend_half_operand (x, mode))
5611 : {
5612 40 : if (GET_MODE_SIZE (mode) == 64)
5613 : {
5614 5 : gcc_assert (TARGET_AVX512F);
5615 : return "vpcmpeqd\t%t0, %t0, %t0";
5616 : }
5617 30 : else if (GET_MODE_SIZE (mode) == 32)
5618 : {
5619 15 : gcc_assert (TARGET_AVX);
5620 : return "vpcmpeqd\t%x0, %x0, %x0";
5621 : }
5622 0 : gcc_unreachable ();
5623 : }
5624 2 : else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5625 : {
5626 2 : gcc_assert (TARGET_AVX512F);
5627 : return "vpcmpeqd\t%x0, %x0, %x0";
5628 : }
5629 :
5630 0 : gcc_unreachable ();
5631 : }
5632 :
5633 : /* Returns true if INSN can be transformed from a memory load
5634 : to a supported FP constant load. */
5635 :
5636 : bool
5637 2141475 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5638 : {
5639 2141475 : rtx src = find_constant_src (insn);
5640 :
5641 2141475 : gcc_assert (REG_P (dst));
5642 :
5643 2141475 : if (src == NULL
5644 595179 : || (SSE_REGNO_P (REGNO (dst))
5645 463349 : && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5646 165509 : || (!TARGET_AVX512VL
5647 165448 : && EXT_REX_SSE_REGNO_P (REGNO (dst))
5648 0 : && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5649 2306984 : || (STACK_REGNO_P (REGNO (dst))
5650 131830 : && standard_80387_constant_p (src) < 1))
5651 2065247 : return false;
5652 :
5653 : return true;
5654 : }
5655 :
5656 : /* Predicate for pre-reload splitters with associated instructions,
5657 : which can match any time before the split1 pass (usually combine),
5658 : then are unconditionally split in that pass and should not be
5659 : matched again afterwards. */
5660 :
5661 : bool
5662 17678266 : ix86_pre_reload_split (void)
5663 : {
5664 17678266 : return (can_create_pseudo_p ()
5665 26809499 : && !(cfun->curr_properties & PROP_rtl_split_insns));
5666 : }
5667 :
5668 : /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5669 : or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5670 : TARGET_AVX512VL or it is a register to register move which can
5671 : be done with zmm register move. */
5672 :
5673 : static const char *
5674 4186587 : ix86_get_ssemov (rtx *operands, unsigned size,
5675 : enum attr_mode insn_mode, machine_mode mode)
5676 : {
5677 4186587 : char buf[128];
5678 4186587 : bool misaligned_p = (misaligned_operand (operands[0], mode)
5679 4186587 : || misaligned_operand (operands[1], mode));
5680 4186587 : bool evex_reg_p = (size == 64
5681 4099870 : || EXT_REX_SSE_REG_P (operands[0])
5682 8285719 : || EXT_REX_SSE_REG_P (operands[1]));
5683 :
5684 4186587 : bool egpr_p = (TARGET_APX_EGPR
5685 4186587 : && (x86_extended_rex2reg_mentioned_p (operands[0])
5686 184 : || x86_extended_rex2reg_mentioned_p (operands[1])));
5687 196 : bool egpr_vl = egpr_p && TARGET_AVX512VL;
5688 :
5689 4186587 : machine_mode scalar_mode;
5690 :
5691 4186587 : const char *opcode = NULL;
5692 4186587 : enum
5693 : {
5694 : opcode_int,
5695 : opcode_float,
5696 : opcode_double
5697 4186587 : } type = opcode_int;
5698 :
5699 4186587 : switch (insn_mode)
5700 : {
5701 : case MODE_V16SF:
5702 : case MODE_V8SF:
5703 : case MODE_V4SF:
5704 : scalar_mode = E_SFmode;
5705 : type = opcode_float;
5706 : break;
5707 208918 : case MODE_V8DF:
5708 208918 : case MODE_V4DF:
5709 208918 : case MODE_V2DF:
5710 208918 : scalar_mode = E_DFmode;
5711 208918 : type = opcode_double;
5712 208918 : break;
5713 1517837 : case MODE_XI:
5714 1517837 : case MODE_OI:
5715 1517837 : case MODE_TI:
5716 1517837 : scalar_mode = GET_MODE_INNER (mode);
5717 : break;
5718 0 : default:
5719 0 : gcc_unreachable ();
5720 : }
5721 :
5722 : /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5723 : we can only use zmm register move without memory operand. */
5724 4186587 : if (evex_reg_p
5725 88758 : && !TARGET_AVX512VL
5726 4237029 : && GET_MODE_SIZE (mode) < 64)
5727 : {
5728 : /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5729 : xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5730 : AVX512VL is disabled, LRA can still generate reg to
5731 : reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5732 : modes. */
5733 0 : if (memory_operand (operands[0], mode)
5734 0 : || memory_operand (operands[1], mode))
5735 0 : gcc_unreachable ();
5736 0 : size = 64;
5737 0 : switch (type)
5738 : {
5739 0 : case opcode_int:
5740 0 : if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5741 0 : opcode = (misaligned_p
5742 0 : ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5743 : : "vmovdqa64");
5744 : else
5745 0 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5746 : break;
5747 0 : case opcode_float:
5748 0 : opcode = misaligned_p ? "vmovups" : "vmovaps";
5749 : break;
5750 0 : case opcode_double:
5751 0 : opcode = misaligned_p ? "vmovupd" : "vmovapd";
5752 : break;
5753 : }
5754 : }
5755 4186587 : else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5756 : {
5757 2847218 : switch (scalar_mode)
5758 : {
5759 36736 : case E_HFmode:
5760 36736 : case E_BFmode:
5761 36736 : if (evex_reg_p || egpr_vl)
5762 11597 : opcode = (misaligned_p
5763 173 : ? (TARGET_AVX512BW
5764 : ? "vmovdqu16"
5765 : : "vmovdqu64")
5766 : : "vmovdqa64");
5767 25139 : else if (egpr_p)
5768 817751 : opcode = (misaligned_p
5769 0 : ? (TARGET_AVX512BW
5770 0 : ? "vmovdqu16"
5771 : : "%vmovups")
5772 : : "%vmovaps");
5773 : else
5774 428068 : opcode = (misaligned_p
5775 25139 : ? (TARGET_AVX512BW && evex_reg_p
5776 : ? "vmovdqu16"
5777 : : "%vmovdqu")
5778 : : "%vmovdqa");
5779 : break;
5780 2459832 : case E_SFmode:
5781 2459832 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5782 : break;
5783 208918 : case E_DFmode:
5784 208918 : opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5785 : break;
5786 141732 : case E_TFmode:
5787 141732 : if (evex_reg_p || egpr_vl)
5788 14 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5789 141718 : else if (egpr_p)
5790 0 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5791 : else
5792 141718 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5793 : break;
5794 0 : default:
5795 0 : gcc_unreachable ();
5796 : }
5797 : }
5798 1339369 : else if (SCALAR_INT_MODE_P (scalar_mode))
5799 : {
5800 1339369 : switch (scalar_mode)
5801 : {
5802 105483 : case E_QImode:
5803 105483 : if (evex_reg_p || egpr_vl)
5804 4196497 : opcode = (misaligned_p
5805 9910 : ? (TARGET_AVX512BW
5806 5074 : ? "vmovdqu8"
5807 : : "vmovdqu64")
5808 : : "vmovdqa64");
5809 95573 : else if (egpr_p)
5810 30 : opcode = (misaligned_p
5811 0 : ? (TARGET_AVX512BW
5812 : ? "vmovdqu8"
5813 : : "%vmovups")
5814 : : "%vmovaps");
5815 : else
5816 95543 : opcode = (misaligned_p
5817 : ? (TARGET_AVX512BW && evex_reg_p
5818 : ? "vmovdqu8"
5819 : : "%vmovdqu")
5820 : : "%vmovdqa");
5821 : break;
5822 41766 : case E_HImode:
5823 41766 : if (evex_reg_p || egpr_vl)
5824 3757 : opcode = (misaligned_p
5825 294 : ? (TARGET_AVX512BW
5826 : ? "vmovdqu16"
5827 : : "vmovdqu64")
5828 : : "vmovdqa64");
5829 38009 : else if (egpr_p)
5830 817751 : opcode = (misaligned_p
5831 27 : ? (TARGET_AVX512BW
5832 0 : ? "vmovdqu16"
5833 : : "%vmovups")
5834 : : "%vmovaps");
5835 : else
5836 402929 : opcode = (misaligned_p
5837 37982 : ? (TARGET_AVX512BW && evex_reg_p
5838 : ? "vmovdqu16"
5839 : : "%vmovdqu")
5840 : : "%vmovdqa");
5841 : break;
5842 180913 : case E_SImode:
5843 180913 : if (evex_reg_p || egpr_vl)
5844 8297 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5845 172616 : else if (egpr_p)
5846 14 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5847 : else
5848 172602 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5849 : break;
5850 999399 : case E_DImode:
5851 999399 : case E_TImode:
5852 999399 : case E_OImode:
5853 999399 : if (evex_reg_p || egpr_vl)
5854 18786 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5855 980613 : else if (egpr_p)
5856 26 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5857 : else
5858 980587 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5859 : break;
5860 11808 : case E_XImode:
5861 49519 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5862 : break;
5863 0 : default:
5864 0 : gcc_unreachable ();
5865 : }
5866 : }
5867 : else
5868 0 : gcc_unreachable ();
5869 :
5870 4186587 : switch (size)
5871 : {
5872 86717 : case 64:
5873 86717 : snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5874 : opcode);
5875 86717 : break;
5876 94004 : case 32:
5877 94004 : snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5878 : opcode);
5879 94004 : break;
5880 4005866 : case 16:
5881 4005866 : snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5882 : opcode);
5883 4005866 : break;
5884 0 : default:
5885 0 : gcc_unreachable ();
5886 : }
5887 4186587 : output_asm_insn (buf, operands);
5888 4186587 : return "";
5889 : }
5890 :
5891 : /* Return the template of the TYPE_SSEMOV instruction to move
5892 : operands[1] into operands[0]. */
5893 :
5894 : const char *
5895 6548503 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5896 : {
5897 6548503 : machine_mode mode = GET_MODE (operands[0]);
5898 6548503 : if (get_attr_type (insn) != TYPE_SSEMOV
5899 6548503 : || mode != GET_MODE (operands[1]))
5900 0 : gcc_unreachable ();
5901 :
5902 6548503 : enum attr_mode insn_mode = get_attr_mode (insn);
5903 :
5904 6548503 : switch (insn_mode)
5905 : {
5906 86717 : case MODE_XI:
5907 86717 : case MODE_V8DF:
5908 86717 : case MODE_V16SF:
5909 86717 : return ix86_get_ssemov (operands, 64, insn_mode, mode);
5910 :
5911 94004 : case MODE_OI:
5912 94004 : case MODE_V4DF:
5913 94004 : case MODE_V8SF:
5914 94004 : return ix86_get_ssemov (operands, 32, insn_mode, mode);
5915 :
5916 4005866 : case MODE_TI:
5917 4005866 : case MODE_V2DF:
5918 4005866 : case MODE_V4SF:
5919 4005866 : return ix86_get_ssemov (operands, 16, insn_mode, mode);
5920 :
5921 654577 : case MODE_DI:
5922 : /* Handle broken assemblers that require movd instead of movq. */
5923 654577 : if (GENERAL_REG_P (operands[0]))
5924 : {
5925 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5926 : return "%vmovq\t{%1, %q0|%q0, %1}";
5927 : else
5928 : return "%vmovd\t{%1, %q0|%q0, %1}";
5929 : }
5930 580826 : else if (GENERAL_REG_P (operands[1]))
5931 : {
5932 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5933 : return "%vmovq\t{%q1, %0|%0, %q1}";
5934 : else
5935 : return "%vmovd\t{%q1, %0|%0, %q1}";
5936 : }
5937 : else
5938 421323 : return "%vmovq\t{%1, %0|%0, %1}";
5939 :
5940 198654 : case MODE_SI:
5941 198654 : if (GENERAL_REG_P (operands[0]))
5942 : return "%vmovd\t{%1, %k0|%k0, %1}";
5943 143800 : else if (GENERAL_REG_P (operands[1]))
5944 : return "%vmovd\t{%k1, %0|%0, %k1}";
5945 : else
5946 60422 : return "%vmovd\t{%1, %0|%0, %1}";
5947 :
5948 54128 : case MODE_HI:
5949 54128 : if (GENERAL_REG_P (operands[0]))
5950 : return "vmovw\t{%1, %k0|%k0, %1}";
5951 53965 : else if (GENERAL_REG_P (operands[1]))
5952 : return "vmovw\t{%k1, %0|%0, %k1}";
5953 : else
5954 53731 : return "vmovw\t{%1, %0|%0, %1}";
5955 :
5956 779068 : case MODE_DF:
5957 779068 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5958 : return "vmovsd\t{%d1, %0|%0, %d1}";
5959 : else
5960 778307 : return "%vmovsd\t{%1, %0|%0, %1}";
5961 :
5962 671504 : case MODE_SF:
5963 671504 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5964 : return "vmovss\t{%d1, %0|%0, %d1}";
5965 : else
5966 670928 : return "%vmovss\t{%1, %0|%0, %1}";
5967 :
5968 96 : case MODE_HF:
5969 96 : case MODE_BF:
5970 96 : if (REG_P (operands[0]) && REG_P (operands[1]))
5971 : return "vmovsh\t{%d1, %0|%0, %d1}";
5972 : else
5973 0 : return "vmovsh\t{%1, %0|%0, %1}";
5974 :
5975 36 : case MODE_V1DF:
5976 36 : gcc_assert (!TARGET_AVX);
5977 : return "movlpd\t{%1, %0|%0, %1}";
5978 :
5979 3853 : case MODE_V2SF:
5980 3853 : if (TARGET_AVX && REG_P (operands[0]))
5981 : return "vmovlps\t{%1, %d0|%d0, %1}";
5982 : else
5983 3773 : return "%vmovlps\t{%1, %0|%0, %1}";
5984 :
5985 0 : default:
5986 0 : gcc_unreachable ();
5987 : }
5988 : }
5989 :
5990 : /* Returns true if OP contains a symbol reference */
5991 :
5992 : bool
5993 583614079 : symbolic_reference_mentioned_p (rtx op)
5994 : {
5995 583614079 : const char *fmt;
5996 583614079 : int i;
5997 :
5998 583614079 : if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
5999 : return true;
6000 :
6001 441156835 : fmt = GET_RTX_FORMAT (GET_CODE (op));
6002 748518509 : for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6003 : {
6004 597179772 : if (fmt[i] == 'E')
6005 : {
6006 2019905 : int j;
6007 :
6008 4042340 : for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6009 3326903 : if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6010 : return true;
6011 : }
6012 :
6013 595159867 : else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6014 : return true;
6015 : }
6016 :
6017 : return false;
6018 : }
6019 :
6020 : /* Return true if it is appropriate to emit `ret' instructions in the
6021 : body of a function. Do this only if the epilogue is simple, needing a
6022 : couple of insns. Prior to reloading, we can't tell how many registers
6023 : must be saved, so return false then. Return false if there is no frame
6024 : marker to de-allocate. */
6025 :
6026 : bool
6027 0 : ix86_can_use_return_insn_p (void)
6028 : {
6029 0 : if (ix86_function_ms_hook_prologue (current_function_decl))
6030 : return false;
6031 :
6032 0 : if (ix86_function_naked (current_function_decl))
6033 : return false;
6034 :
6035 : /* Don't use `ret' instruction in interrupt handler. */
6036 0 : if (! reload_completed
6037 0 : || frame_pointer_needed
6038 0 : || cfun->machine->func_type != TYPE_NORMAL)
6039 : return 0;
6040 :
6041 : /* Don't allow more than 32k pop, since that's all we can do
6042 : with one instruction. */
6043 0 : if (crtl->args.pops_args && crtl->args.size >= 32768)
6044 : return 0;
6045 :
6046 0 : struct ix86_frame &frame = cfun->machine->frame;
6047 0 : return (frame.stack_pointer_offset == UNITS_PER_WORD
6048 0 : && (frame.nregs + frame.nsseregs) == 0);
6049 : }
6050 :
6051 : /* Return stack frame size. get_frame_size () returns used stack slots
6052 : during compilation, which may be optimized out later. If stack frame
6053 : is needed, stack_frame_required should be true. */
6054 :
6055 : static HOST_WIDE_INT
6056 8193916 : ix86_get_frame_size (void)
6057 : {
6058 8193916 : if (cfun->machine->stack_frame_required)
6059 8124631 : return get_frame_size ();
6060 : else
6061 : return 0;
6062 : }
6063 :
6064 : /* Value should be nonzero if functions must have frame pointers.
6065 : Zero means the frame pointer need not be set up (and parms may
6066 : be accessed via the stack pointer) in functions that seem suitable. */
6067 :
6068 : static bool
6069 1224784 : ix86_frame_pointer_required (void)
6070 : {
6071 : /* If we accessed previous frames, then the generated code expects
6072 : to be able to access the saved ebp value in our frame. */
6073 1224784 : if (cfun->machine->accesses_prev_frame)
6074 : return true;
6075 :
6076 : /* Several x86 os'es need a frame pointer for other reasons,
6077 : usually pertaining to setjmp. */
6078 1224751 : if (SUBTARGET_FRAME_POINTER_REQUIRED)
6079 : return true;
6080 :
6081 : /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6082 1224751 : if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6083 : return true;
6084 :
6085 : /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6086 : allocation is 4GB. */
6087 1224751 : if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6088 : return true;
6089 :
6090 : /* SSE saves require frame-pointer when stack is misaligned. */
6091 1224751 : if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6092 : return true;
6093 :
6094 : /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6095 : turns off the frame pointer by default. Turn it back on now if
6096 : we've not got a leaf function. */
6097 1224750 : if (TARGET_OMIT_LEAF_FRAME_POINTER
6098 1224750 : && (!crtl->is_leaf
6099 0 : || ix86_current_function_calls_tls_descriptor))
6100 0 : return true;
6101 :
6102 : /* Several versions of mcount for the x86 assumes that there is a
6103 : frame, so we cannot allow profiling without a frame pointer. */
6104 1224750 : if (crtl->profile && !flag_fentry)
6105 : return true;
6106 :
6107 : return false;
6108 : }
6109 :
6110 : /* Record that the current function accesses previous call frames. */
6111 :
6112 : void
6113 966 : ix86_setup_frame_addresses (void)
6114 : {
6115 966 : cfun->machine->accesses_prev_frame = 1;
6116 966 : }
6117 :
6118 : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6119 : # define USE_HIDDEN_LINKONCE 1
6120 : #else
6121 : # define USE_HIDDEN_LINKONCE 0
6122 : #endif
6123 :
6124 : /* Label count for call and return thunks. It is used to make unique
6125 : labels in call and return thunks. */
6126 : static int indirectlabelno;
6127 :
6128 : /* True if call thunk function is needed. */
6129 : static bool indirect_thunk_needed = false;
6130 :
6131 : /* Bit masks of integer registers, which contain branch target, used
6132 : by call thunk functions. */
6133 : static HARD_REG_SET indirect_thunks_used;
6134 :
6135 : /* True if return thunk function is needed. */
6136 : static bool indirect_return_needed = false;
6137 :
6138 : /* True if return thunk function via CX is needed. */
6139 : static bool indirect_return_via_cx;
6140 :
6141 : #ifndef INDIRECT_LABEL
6142 : # define INDIRECT_LABEL "LIND"
6143 : #endif
6144 :
6145 : /* Indicate what prefix is needed for an indirect branch. */
6146 : enum indirect_thunk_prefix
6147 : {
6148 : indirect_thunk_prefix_none,
6149 : indirect_thunk_prefix_nt
6150 : };
6151 :
6152 : /* Return the prefix needed for an indirect branch INSN. */
6153 :
6154 : enum indirect_thunk_prefix
6155 68 : indirect_thunk_need_prefix (rtx_insn *insn)
6156 : {
6157 68 : enum indirect_thunk_prefix need_prefix;
6158 68 : if ((cfun->machine->indirect_branch_type
6159 68 : == indirect_branch_thunk_extern)
6160 68 : && ix86_notrack_prefixed_insn_p (insn))
6161 : {
6162 : /* NOTRACK prefix is only used with external thunk so that it
6163 : can be properly updated to support CET at run-time. */
6164 : need_prefix = indirect_thunk_prefix_nt;
6165 : }
6166 : else
6167 : need_prefix = indirect_thunk_prefix_none;
6168 68 : return need_prefix;
6169 : }
6170 :
6171 : /* Fills in the label name that should be used for the indirect thunk. */
6172 :
6173 : static void
6174 74 : indirect_thunk_name (char name[32], unsigned int regno,
6175 : enum indirect_thunk_prefix need_prefix,
6176 : bool ret_p)
6177 : {
6178 74 : if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6179 0 : gcc_unreachable ();
6180 :
6181 74 : if (USE_HIDDEN_LINKONCE)
6182 : {
6183 74 : const char *prefix;
6184 :
6185 74 : if (need_prefix == indirect_thunk_prefix_nt
6186 74 : && regno != INVALID_REGNUM)
6187 : {
6188 : /* NOTRACK prefix is only used with external thunk via
6189 : register so that NOTRACK prefix can be added to indirect
6190 : branch via register to support CET at run-time. */
6191 : prefix = "_nt";
6192 : }
6193 : else
6194 72 : prefix = "";
6195 :
6196 74 : const char *ret = ret_p ? "return" : "indirect";
6197 :
6198 74 : if (regno != INVALID_REGNUM)
6199 : {
6200 55 : const char *reg_prefix;
6201 55 : if (LEGACY_INT_REGNO_P (regno))
6202 53 : reg_prefix = TARGET_64BIT ? "r" : "e";
6203 : else
6204 : reg_prefix = "";
6205 55 : sprintf (name, "__x86_%s_thunk%s_%s%s",
6206 : ret, prefix, reg_prefix, reg_names[regno]);
6207 : }
6208 : else
6209 19 : sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6210 : }
6211 : else
6212 : {
6213 : if (regno != INVALID_REGNUM)
6214 : ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6215 : else
6216 : {
6217 : if (ret_p)
6218 : ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6219 : else
6220 74 : ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6221 : }
6222 : }
6223 74 : }
6224 :
6225 : /* Output a call and return thunk for indirect branch. If REGNO != -1,
6226 : the function address is in REGNO and the call and return thunk looks like:
6227 :
6228 : call L2
6229 : L1:
6230 : pause
6231 : lfence
6232 : jmp L1
6233 : L2:
6234 : mov %REG, (%sp)
6235 : ret
6236 :
6237 : Otherwise, the function address is on the top of stack and the
6238 : call and return thunk looks like:
6239 :
6240 : call L2
6241 : L1:
6242 : pause
6243 : lfence
6244 : jmp L1
6245 : L2:
6246 : lea WORD_SIZE(%sp), %sp
6247 : ret
6248 : */
6249 :
6250 : static void
6251 38 : output_indirect_thunk (unsigned int regno)
6252 : {
6253 38 : char indirectlabel1[32];
6254 38 : char indirectlabel2[32];
6255 :
6256 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6257 : indirectlabelno++);
6258 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6259 : indirectlabelno++);
6260 :
6261 : /* Call */
6262 38 : fputs ("\tcall\t", asm_out_file);
6263 38 : assemble_name_raw (asm_out_file, indirectlabel2);
6264 38 : fputc ('\n', asm_out_file);
6265 :
6266 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6267 :
6268 : /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6269 : Usage of both pause + lfence is compromise solution. */
6270 38 : fprintf (asm_out_file, "\tpause\n\tlfence\n");
6271 :
6272 : /* Jump. */
6273 38 : fputs ("\tjmp\t", asm_out_file);
6274 38 : assemble_name_raw (asm_out_file, indirectlabel1);
6275 38 : fputc ('\n', asm_out_file);
6276 :
6277 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6278 :
6279 : /* The above call insn pushed a word to stack. Adjust CFI info. */
6280 38 : if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6281 : {
6282 38 : if (! dwarf2out_do_cfi_asm ())
6283 : {
6284 0 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6285 0 : xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6286 0 : xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6287 0 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6288 : }
6289 38 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6290 38 : xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6291 38 : xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6292 38 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6293 38 : dwarf2out_emit_cfi (xcfi);
6294 : }
6295 :
6296 38 : if (regno != INVALID_REGNUM)
6297 : {
6298 : /* MOV. */
6299 27 : rtx xops[2];
6300 27 : xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6301 27 : xops[1] = gen_rtx_REG (word_mode, regno);
6302 27 : output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6303 : }
6304 : else
6305 : {
6306 : /* LEA. */
6307 11 : rtx xops[2];
6308 11 : xops[0] = stack_pointer_rtx;
6309 11 : xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6310 11 : output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6311 : }
6312 :
6313 38 : fputs ("\tret\n", asm_out_file);
6314 38 : if ((ix86_harden_sls & harden_sls_return))
6315 1 : fputs ("\tint3\n", asm_out_file);
6316 38 : }
6317 :
6318 : /* Output a funtion with a call and return thunk for indirect branch.
6319 : If REGNO != INVALID_REGNUM, the function address is in REGNO.
6320 : Otherwise, the function address is on the top of stack. Thunk is
6321 : used for function return if RET_P is true. */
6322 :
6323 : static void
6324 22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6325 : unsigned int regno, bool ret_p)
6326 : {
6327 22 : char name[32];
6328 22 : tree decl;
6329 :
6330 : /* Create __x86_indirect_thunk. */
6331 22 : indirect_thunk_name (name, regno, need_prefix, ret_p);
6332 22 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6333 : get_identifier (name),
6334 : build_function_type_list (void_type_node, NULL_TREE));
6335 22 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6336 : NULL_TREE, void_type_node);
6337 22 : TREE_PUBLIC (decl) = 1;
6338 22 : TREE_STATIC (decl) = 1;
6339 22 : DECL_IGNORED_P (decl) = 1;
6340 :
6341 : #if TARGET_MACHO
6342 : if (TARGET_MACHO)
6343 : {
6344 : switch_to_section (darwin_sections[picbase_thunk_section]);
6345 : fputs ("\t.weak_definition\t", asm_out_file);
6346 : assemble_name (asm_out_file, name);
6347 : fputs ("\n\t.private_extern\t", asm_out_file);
6348 : assemble_name (asm_out_file, name);
6349 : putc ('\n', asm_out_file);
6350 : ASM_OUTPUT_LABEL (asm_out_file, name);
6351 : DECL_WEAK (decl) = 1;
6352 : }
6353 : else
6354 : #endif
6355 22 : if (USE_HIDDEN_LINKONCE)
6356 : {
6357 22 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6358 :
6359 22 : targetm.asm_out.unique_section (decl, 0);
6360 22 : switch_to_section (get_named_section (decl, NULL, 0));
6361 :
6362 22 : targetm.asm_out.globalize_label (asm_out_file, name);
6363 22 : fputs ("\t.hidden\t", asm_out_file);
6364 22 : assemble_name (asm_out_file, name);
6365 22 : putc ('\n', asm_out_file);
6366 22 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6367 : }
6368 : else
6369 : {
6370 : switch_to_section (text_section);
6371 22 : ASM_OUTPUT_LABEL (asm_out_file, name);
6372 : }
6373 :
6374 22 : DECL_INITIAL (decl) = make_node (BLOCK);
6375 22 : current_function_decl = decl;
6376 22 : allocate_struct_function (decl, false);
6377 22 : init_function_start (decl);
6378 : /* We're about to hide the function body from callees of final_* by
6379 : emitting it directly; tell them we're a thunk, if they care. */
6380 22 : cfun->is_thunk = true;
6381 22 : first_function_block_is_cold = false;
6382 : /* Make sure unwind info is emitted for the thunk if needed. */
6383 22 : final_start_function (emit_barrier (), asm_out_file, 1);
6384 :
6385 22 : output_indirect_thunk (regno);
6386 :
6387 22 : final_end_function ();
6388 22 : init_insn_lengths ();
6389 22 : free_after_compilation (cfun);
6390 22 : set_cfun (NULL);
6391 22 : current_function_decl = NULL;
6392 22 : }
6393 :
6394 : static int pic_labels_used;
6395 :
6396 : /* Fills in the label name that should be used for a pc thunk for
6397 : the given register. */
6398 :
6399 : static void
6400 37243 : get_pc_thunk_name (char name[32], unsigned int regno)
6401 : {
6402 37243 : gcc_assert (!TARGET_64BIT);
6403 :
6404 37243 : if (USE_HIDDEN_LINKONCE)
6405 37243 : sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6406 : else
6407 37243 : ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6408 37243 : }
6409 :
6410 :
6411 : /* This function generates code for -fpic that loads %ebx with
6412 : the return address of the caller and then returns. */
6413 :
6414 : static void
6415 230133 : ix86_code_end (void)
6416 : {
6417 230133 : rtx xops[2];
6418 230133 : unsigned int regno;
6419 :
6420 230133 : if (indirect_return_needed)
6421 6 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6422 : INVALID_REGNUM, true);
6423 230133 : if (indirect_return_via_cx)
6424 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6425 : CX_REG, true);
6426 230133 : if (indirect_thunk_needed)
6427 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6428 : INVALID_REGNUM, false);
6429 :
6430 2071197 : for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6431 : {
6432 1841064 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6433 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6434 : regno, false);
6435 : }
6436 :
6437 3912261 : for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6438 : {
6439 3682128 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6440 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6441 : regno, false);
6442 : }
6443 :
6444 2071197 : for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6445 : {
6446 1841064 : char name[32];
6447 1841064 : tree decl;
6448 :
6449 1841064 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6450 16 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6451 : regno, false);
6452 :
6453 1841064 : if (!(pic_labels_used & (1 << regno)))
6454 1837499 : continue;
6455 :
6456 3565 : get_pc_thunk_name (name, regno);
6457 :
6458 3565 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6459 : get_identifier (name),
6460 : build_function_type_list (void_type_node, NULL_TREE));
6461 3565 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6462 : NULL_TREE, void_type_node);
6463 3565 : TREE_PUBLIC (decl) = 1;
6464 3565 : TREE_STATIC (decl) = 1;
6465 3565 : DECL_IGNORED_P (decl) = 1;
6466 :
6467 : #if TARGET_MACHO
6468 : if (TARGET_MACHO)
6469 : {
6470 : switch_to_section (darwin_sections[picbase_thunk_section]);
6471 : fputs ("\t.weak_definition\t", asm_out_file);
6472 : assemble_name (asm_out_file, name);
6473 : fputs ("\n\t.private_extern\t", asm_out_file);
6474 : assemble_name (asm_out_file, name);
6475 : putc ('\n', asm_out_file);
6476 : ASM_OUTPUT_LABEL (asm_out_file, name);
6477 : DECL_WEAK (decl) = 1;
6478 : }
6479 : else
6480 : #endif
6481 3565 : if (USE_HIDDEN_LINKONCE)
6482 : {
6483 3565 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6484 :
6485 3565 : targetm.asm_out.unique_section (decl, 0);
6486 3565 : switch_to_section (get_named_section (decl, NULL, 0));
6487 :
6488 3565 : targetm.asm_out.globalize_label (asm_out_file, name);
6489 3565 : fputs ("\t.hidden\t", asm_out_file);
6490 3565 : assemble_name (asm_out_file, name);
6491 3565 : putc ('\n', asm_out_file);
6492 3565 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6493 : }
6494 : else
6495 : {
6496 : switch_to_section (text_section);
6497 3565 : ASM_OUTPUT_LABEL (asm_out_file, name);
6498 : }
6499 :
6500 3565 : DECL_INITIAL (decl) = make_node (BLOCK);
6501 3565 : current_function_decl = decl;
6502 3565 : allocate_struct_function (decl, false);
6503 3565 : init_function_start (decl);
6504 : /* We're about to hide the function body from callees of final_* by
6505 : emitting it directly; tell them we're a thunk, if they care. */
6506 3565 : cfun->is_thunk = true;
6507 3565 : first_function_block_is_cold = false;
6508 : /* Make sure unwind info is emitted for the thunk if needed. */
6509 3565 : final_start_function (emit_barrier (), asm_out_file, 1);
6510 :
6511 : /* Pad stack IP move with 4 instructions (two NOPs count
6512 : as one instruction). */
6513 3565 : if (TARGET_PAD_SHORT_FUNCTION)
6514 : {
6515 : int i = 8;
6516 :
6517 0 : while (i--)
6518 0 : fputs ("\tnop\n", asm_out_file);
6519 : }
6520 :
6521 7130 : xops[0] = gen_rtx_REG (Pmode, regno);
6522 7130 : xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6523 3565 : output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6524 3565 : fputs ("\tret\n", asm_out_file);
6525 3565 : final_end_function ();
6526 3565 : init_insn_lengths ();
6527 3565 : free_after_compilation (cfun);
6528 3565 : set_cfun (NULL);
6529 3565 : current_function_decl = NULL;
6530 : }
6531 :
6532 230133 : if (flag_split_stack)
6533 4712 : file_end_indicate_split_stack ();
6534 230133 : }
6535 :
6536 : /* Emit code for the SET_GOT patterns. */
6537 :
6538 : const char *
6539 33678 : output_set_got (rtx dest, rtx label)
6540 : {
6541 33678 : rtx xops[3];
6542 :
6543 33678 : xops[0] = dest;
6544 :
6545 33678 : if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
6546 : {
6547 : /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6548 : xops[2] = gen_rtx_MEM (Pmode,
6549 : gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6550 : output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6551 :
6552 : /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6553 : Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6554 : an unadorned address. */
6555 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6556 : SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6557 : output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6558 : return "";
6559 : }
6560 :
6561 67356 : xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6562 :
6563 33678 : if (flag_pic)
6564 : {
6565 33678 : char name[32];
6566 33678 : get_pc_thunk_name (name, REGNO (dest));
6567 33678 : pic_labels_used |= 1 << REGNO (dest);
6568 :
6569 67356 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6570 33678 : xops[2] = gen_rtx_MEM (QImode, xops[2]);
6571 33678 : output_asm_insn ("%!call\t%X2", xops);
6572 :
6573 : #if TARGET_MACHO
6574 : /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6575 : This is what will be referenced by the Mach-O PIC subsystem. */
6576 : if (machopic_should_output_picbase_label () || !label)
6577 : ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6578 :
6579 : /* When we are restoring the pic base at the site of a nonlocal label,
6580 : and we decided to emit the pic base above, we will still output a
6581 : local label used for calculating the correction offset (even though
6582 : the offset will be 0 in that case). */
6583 : if (label)
6584 : targetm.asm_out.internal_label (asm_out_file, "L",
6585 : CODE_LABEL_NUMBER (label));
6586 : #endif
6587 : }
6588 : else
6589 : {
6590 0 : if (TARGET_MACHO)
6591 : /* We don't need a pic base, we're not producing pic. */
6592 : gcc_unreachable ();
6593 :
6594 0 : xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6595 0 : output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6596 0 : targetm.asm_out.internal_label (asm_out_file, "L",
6597 0 : CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6598 : }
6599 :
6600 33678 : if (!TARGET_MACHO)
6601 33678 : output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6602 :
6603 33678 : return "";
6604 : }
6605 :
6606 : /* Generate an "push" pattern for input ARG. */
6607 :
6608 : rtx
6609 1872107 : gen_push (rtx arg, bool ppx_p)
6610 : {
6611 1872107 : struct machine_function *m = cfun->machine;
6612 :
6613 1872107 : if (m->fs.cfa_reg == stack_pointer_rtx)
6614 1597627 : m->fs.cfa_offset += UNITS_PER_WORD;
6615 1872107 : m->fs.sp_offset += UNITS_PER_WORD;
6616 :
6617 1872107 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6618 28 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6619 :
6620 1872107 : rtx stack = gen_rtx_MEM (word_mode,
6621 1872107 : gen_rtx_PRE_DEC (Pmode,
6622 : stack_pointer_rtx));
6623 3744126 : return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6624 : }
6625 :
6626 : rtx
6627 23 : gen_pushfl (void)
6628 : {
6629 23 : struct machine_function *m = cfun->machine;
6630 23 : rtx flags, mem;
6631 :
6632 23 : if (m->fs.cfa_reg == stack_pointer_rtx)
6633 0 : m->fs.cfa_offset += UNITS_PER_WORD;
6634 23 : m->fs.sp_offset += UNITS_PER_WORD;
6635 :
6636 23 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6637 :
6638 23 : mem = gen_rtx_MEM (word_mode,
6639 23 : gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6640 :
6641 23 : return gen_pushfl2 (word_mode, mem, flags);
6642 : }
6643 :
6644 : /* Generate an "pop" pattern for input ARG. */
6645 :
6646 : rtx
6647 1455780 : gen_pop (rtx arg, bool ppx_p)
6648 : {
6649 1455780 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6650 24 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6651 :
6652 1455780 : rtx stack = gen_rtx_MEM (word_mode,
6653 1455780 : gen_rtx_POST_INC (Pmode,
6654 : stack_pointer_rtx));
6655 :
6656 2911472 : return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6657 : }
6658 :
6659 : rtx
6660 21 : gen_popfl (void)
6661 : {
6662 21 : rtx flags, mem;
6663 :
6664 21 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6665 :
6666 21 : mem = gen_rtx_MEM (word_mode,
6667 21 : gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6668 :
6669 21 : return gen_popfl1 (word_mode, flags, mem);
6670 : }
6671 :
6672 : /* Generate a "push2" pattern for input ARG. */
6673 : rtx
6674 19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6675 : {
6676 19 : struct machine_function *m = cfun->machine;
6677 19 : const int offset = UNITS_PER_WORD * 2;
6678 :
6679 19 : if (m->fs.cfa_reg == stack_pointer_rtx)
6680 14 : m->fs.cfa_offset += offset;
6681 19 : m->fs.sp_offset += offset;
6682 :
6683 19 : if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6684 0 : reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6685 :
6686 19 : if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6687 0 : reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6688 :
6689 19 : return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6690 4 : : gen_push2_di (mem, reg1, reg2);
6691 : }
6692 :
6693 : /* Return >= 0 if there is an unused call-clobbered register available
6694 : for the entire function. */
6695 :
6696 : static unsigned int
6697 0 : ix86_select_alt_pic_regnum (void)
6698 : {
6699 0 : if (ix86_use_pseudo_pic_reg ())
6700 : return INVALID_REGNUM;
6701 :
6702 0 : if (crtl->is_leaf
6703 0 : && !crtl->profile
6704 0 : && !ix86_current_function_calls_tls_descriptor)
6705 : {
6706 0 : int i, drap;
6707 : /* Can't use the same register for both PIC and DRAP. */
6708 0 : if (crtl->drap_reg)
6709 0 : drap = REGNO (crtl->drap_reg);
6710 : else
6711 : drap = -1;
6712 0 : for (i = 2; i >= 0; --i)
6713 0 : if (i != drap && !df_regs_ever_live_p (i))
6714 : return i;
6715 : }
6716 :
6717 : return INVALID_REGNUM;
6718 : }
6719 :
6720 : /* Return true if REGNO is used by the epilogue. */
6721 :
6722 : bool
6723 1656573810 : ix86_epilogue_uses (int regno)
6724 : {
6725 : /* If there are no caller-saved registers, we preserve all registers,
6726 : except for MMX and x87 registers which aren't supported when saving
6727 : and restoring registers. Don't explicitly save SP register since
6728 : it is always preserved. */
6729 1656573810 : return (epilogue_completed
6730 262311074 : && (cfun->machine->call_saved_registers
6731 262311074 : == TYPE_NO_CALLER_SAVED_REGISTERS)
6732 27140 : && !fixed_regs[regno]
6733 4857 : && !STACK_REGNO_P (regno)
6734 1656578667 : && !MMX_REGNO_P (regno));
6735 : }
6736 :
6737 : /* Return nonzero if register REGNO can be used as a scratch register
6738 : in peephole2. */
6739 :
6740 : static bool
6741 1241603 : ix86_hard_regno_scratch_ok (unsigned int regno)
6742 : {
6743 : /* If there are no caller-saved registers, we can't use any register
6744 : as a scratch register after epilogue and use REGNO as scratch
6745 : register only if it has been used before to avoid saving and
6746 : restoring it. */
6747 1241603 : return ((cfun->machine->call_saved_registers
6748 1241603 : != TYPE_NO_CALLER_SAVED_REGISTERS)
6749 1241603 : || (!epilogue_completed
6750 0 : && df_regs_ever_live_p (regno)));
6751 : }
6752 :
6753 : /* Return TRUE if we need to save REGNO. */
6754 :
6755 : bool
6756 352483790 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6757 : {
6758 352483790 : rtx reg;
6759 :
6760 352483790 : switch (cfun->machine->call_saved_registers)
6761 : {
6762 : case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6763 : break;
6764 :
6765 57152 : case TYPE_NO_CALLER_SAVED_REGISTERS:
6766 : /* If there are no caller-saved registers, we preserve all
6767 : registers, except for MMX and x87 registers which aren't
6768 : supported when saving and restoring registers. Don't
6769 : explicitly save SP register since it is always preserved.
6770 :
6771 : Don't preserve registers used for function return value. */
6772 57152 : reg = crtl->return_rtx;
6773 57152 : if (reg)
6774 : {
6775 768 : unsigned int i = REGNO (reg);
6776 768 : unsigned int nregs = REG_NREGS (reg);
6777 1522 : while (nregs-- > 0)
6778 768 : if ((i + nregs) == regno)
6779 : return false;
6780 : }
6781 :
6782 57138 : return (df_regs_ever_live_p (regno)
6783 6932 : && !fixed_regs[regno]
6784 5964 : && !STACK_REGNO_P (regno)
6785 5964 : && !MMX_REGNO_P (regno)
6786 63102 : && (regno != HARD_FRAME_POINTER_REGNUM
6787 249 : || !frame_pointer_needed));
6788 :
6789 17696 : case TYPE_NO_CALLEE_SAVED_REGISTERS:
6790 17696 : case TYPE_PRESERVE_NONE:
6791 17696 : if (regno != HARD_FRAME_POINTER_REGNUM)
6792 : return false;
6793 : break;
6794 : }
6795 :
6796 386298967 : if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6797 10720185 : && pic_offset_table_rtx)
6798 : {
6799 382662 : if (ix86_use_pseudo_pic_reg ())
6800 : {
6801 : /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6802 : _mcount in prologue. */
6803 382662 : if (!TARGET_64BIT && flag_pic && crtl->profile)
6804 : return true;
6805 : }
6806 0 : else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6807 0 : || crtl->profile
6808 0 : || crtl->calls_eh_return
6809 0 : || crtl->uses_const_pool
6810 0 : || cfun->has_nonlocal_label)
6811 0 : return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6812 : }
6813 :
6814 352409495 : if (crtl->calls_eh_return && maybe_eh_return)
6815 : {
6816 : unsigned i;
6817 13237 : for (i = 0; ; i++)
6818 : {
6819 20181 : unsigned test = EH_RETURN_DATA_REGNO (i);
6820 13671 : if (test == INVALID_REGNUM)
6821 : break;
6822 13671 : if (test == regno)
6823 : return true;
6824 13237 : }
6825 : }
6826 :
6827 352409061 : if (ignore_outlined && cfun->machine->call_ms2sysv)
6828 : {
6829 2650688 : unsigned count = cfun->machine->call_ms2sysv_extra_regs
6830 : + xlogue_layout::MIN_REGS;
6831 2650688 : if (xlogue_layout::is_stub_managed_reg (regno, count))
6832 : return false;
6833 : }
6834 :
6835 351909192 : if (crtl->drap_reg
6836 2188960 : && regno == REGNO (crtl->drap_reg)
6837 351964707 : && !cfun->machine->no_drap_save_restore)
6838 : return true;
6839 :
6840 351853677 : return (df_regs_ever_live_p (regno)
6841 371239381 : && !call_used_or_fixed_reg_p (regno)
6842 370609268 : && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6843 : }
6844 :
6845 : /* Return number of saved general prupose registers. */
6846 :
6847 : static int
6848 8118237 : ix86_nsaved_regs (void)
6849 : {
6850 8118237 : int nregs = 0;
6851 8118237 : int regno;
6852 :
6853 754996041 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6854 746877804 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6855 8186197 : nregs ++;
6856 8118237 : return nregs;
6857 : }
6858 :
6859 : /* Return number of saved SSE registers. */
6860 :
6861 : static int
6862 8153150 : ix86_nsaved_sseregs (void)
6863 : {
6864 8153150 : int nregs = 0;
6865 8153150 : int regno;
6866 :
6867 7353000 : if (!TARGET_64BIT_MS_ABI
6868 8153150 : && (cfun->machine->call_saved_registers
6869 7927744 : != TYPE_NO_CALLER_SAVED_REGISTERS))
6870 : return 0;
6871 21031299 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6872 20805156 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6873 1896395 : nregs ++;
6874 : return nregs;
6875 : }
6876 :
6877 : /* Given FROM and TO register numbers, say whether this elimination is
6878 : allowed. If stack alignment is needed, we can only replace argument
6879 : pointer with hard frame pointer, or replace frame pointer with stack
6880 : pointer. Otherwise, frame pointer elimination is automatically
6881 : handled and all other eliminations are valid. */
6882 :
6883 : static bool
6884 48080289 : ix86_can_eliminate (const int from, const int to)
6885 : {
6886 48080289 : if (stack_realign_fp)
6887 1704188 : return ((from == ARG_POINTER_REGNUM
6888 1704188 : && to == HARD_FRAME_POINTER_REGNUM)
6889 1704188 : || (from == FRAME_POINTER_REGNUM
6890 1704188 : && to == STACK_POINTER_REGNUM));
6891 : else
6892 86352244 : return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6893 : }
6894 :
6895 : /* Return the offset between two registers, one to be eliminated, and the other
6896 : its replacement, at the start of a routine. */
6897 :
6898 : HOST_WIDE_INT
6899 141078402 : ix86_initial_elimination_offset (int from, int to)
6900 : {
6901 141078402 : struct ix86_frame &frame = cfun->machine->frame;
6902 :
6903 141078402 : if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6904 10370397 : return frame.hard_frame_pointer_offset;
6905 130708005 : else if (from == FRAME_POINTER_REGNUM
6906 130708005 : && to == HARD_FRAME_POINTER_REGNUM)
6907 8082997 : return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6908 : else
6909 : {
6910 122625008 : gcc_assert (to == STACK_POINTER_REGNUM);
6911 :
6912 122625008 : if (from == ARG_POINTER_REGNUM)
6913 114542011 : return frame.stack_pointer_offset;
6914 :
6915 8082997 : gcc_assert (from == FRAME_POINTER_REGNUM);
6916 8082997 : return frame.stack_pointer_offset - frame.frame_pointer_offset;
6917 : }
6918 : }
6919 :
6920 : /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6921 : void
6922 0 : warn_once_call_ms2sysv_xlogues (const char *feature)
6923 : {
6924 0 : static bool warned_once = false;
6925 0 : if (!warned_once)
6926 : {
6927 0 : warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6928 : feature);
6929 0 : warned_once = true;
6930 : }
6931 0 : }
6932 :
6933 : /* Return the probing interval for -fstack-clash-protection. */
6934 :
6935 : static HOST_WIDE_INT
6936 489 : get_probe_interval (void)
6937 : {
6938 335 : if (flag_stack_clash_protection)
6939 405 : return (HOST_WIDE_INT_1U
6940 405 : << param_stack_clash_protection_probe_interval);
6941 : else
6942 : return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6943 : }
6944 :
6945 : /* When using -fsplit-stack, the allocation routines set a field in
6946 : the TCB to the bottom of the stack plus this much space, measured
6947 : in bytes. */
6948 :
6949 : #define SPLIT_STACK_AVAILABLE 256
6950 :
6951 : /* Return true if push2/pop2 can be generated. */
6952 :
6953 : static bool
6954 8118888 : ix86_can_use_push2pop2 (void)
6955 : {
6956 : /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6957 8118888 : unsigned int incoming_stack_boundary
6958 8118888 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6959 8118888 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6960 8118888 : return incoming_stack_boundary % 128 == 0;
6961 : }
6962 :
6963 : /* Helper function to determine whether push2/pop2 can be used in prologue or
6964 : epilogue for register save/restore. */
6965 : static bool
6966 8118237 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6967 : {
6968 8118237 : if (!ix86_can_use_push2pop2 ())
6969 : return false;
6970 8082286 : int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6971 8082286 : return TARGET_APX_PUSH2POP2
6972 2829 : && !cfun->machine->frame.save_regs_using_mov
6973 2817 : && cfun->machine->func_type == TYPE_NORMAL
6974 8085095 : && (nregs + aligned) >= 3;
6975 : }
6976 :
6977 : /* Check if push/pop should be used to save/restore registers. */
6978 : static bool
6979 8841911 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
6980 : {
6981 3183971 : return ((!to_allocate && cfun->machine->frame.nregs <= 1)
6982 5908147 : || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6983 : /* If static stack checking is enabled and done with probes,
6984 : the registers need to be saved before allocating the frame. */
6985 5907486 : || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6986 : /* If stack clash probing needs a loop, then it needs a
6987 : scratch register. But the returned register is only guaranteed
6988 : to be safe to use after register saves are complete. So if
6989 : stack clash protections are enabled and the allocated frame is
6990 : larger than the probe interval, then use pushes to save
6991 : callee saved registers. */
6992 14749317 : || (flag_stack_clash_protection
6993 335 : && !ix86_target_stack_probe ()
6994 335 : && to_allocate > get_probe_interval ()));
6995 : }
6996 :
6997 : /* Fill structure ix86_frame about frame of currently computed function. */
6998 :
6999 : static void
7000 8118237 : ix86_compute_frame_layout (void)
7001 : {
7002 8118237 : struct ix86_frame *frame = &cfun->machine->frame;
7003 8118237 : struct machine_function *m = cfun->machine;
7004 8118237 : unsigned HOST_WIDE_INT stack_alignment_needed;
7005 8118237 : HOST_WIDE_INT offset;
7006 8118237 : unsigned HOST_WIDE_INT preferred_alignment;
7007 8118237 : HOST_WIDE_INT size = ix86_get_frame_size ();
7008 8118237 : HOST_WIDE_INT to_allocate;
7009 :
7010 : /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
7011 : * ms_abi functions that call a sysv function. We now need to prune away
7012 : * cases where it should be disabled. */
7013 8118237 : if (TARGET_64BIT && m->call_ms2sysv)
7014 : {
7015 35225 : gcc_assert (TARGET_64BIT_MS_ABI);
7016 35225 : gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
7017 35225 : gcc_assert (!TARGET_SEH);
7018 35225 : gcc_assert (TARGET_SSE);
7019 35225 : gcc_assert (!ix86_using_red_zone ());
7020 :
7021 35225 : if (crtl->calls_eh_return)
7022 : {
7023 0 : gcc_assert (!reload_completed);
7024 0 : m->call_ms2sysv = false;
7025 0 : warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
7026 : }
7027 :
7028 35225 : else if (ix86_static_chain_on_stack)
7029 : {
7030 0 : gcc_assert (!reload_completed);
7031 0 : m->call_ms2sysv = false;
7032 0 : warn_once_call_ms2sysv_xlogues ("static call chains");
7033 : }
7034 :
7035 : /* Finally, compute which registers the stub will manage. */
7036 : else
7037 : {
7038 35225 : unsigned count = xlogue_layout::count_stub_managed_regs ();
7039 35225 : m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
7040 35225 : m->call_ms2sysv_pad_in = 0;
7041 : }
7042 : }
7043 :
7044 8118237 : frame->nregs = ix86_nsaved_regs ();
7045 8118237 : frame->nsseregs = ix86_nsaved_sseregs ();
7046 :
7047 : /* 64-bit MS ABI seem to require stack alignment to be always 16,
7048 : except for function prologues, leaf functions and when the defult
7049 : incoming stack boundary is overriden at command line or via
7050 : force_align_arg_pointer attribute.
7051 :
7052 : Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
7053 : at call sites, including profile function calls.
7054 :
7055 : For APX push2/pop2, the stack also requires 128b alignment. */
7056 8118237 : if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
7057 67 : && crtl->preferred_stack_boundary < 128)
7058 8118302 : || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
7059 225404 : && crtl->preferred_stack_boundary < 128)
7060 0 : && (!crtl->is_leaf || cfun->calls_alloca != 0
7061 0 : || ix86_current_function_calls_tls_descriptor
7062 0 : || (TARGET_MACHO && crtl->profile)
7063 0 : || ix86_incoming_stack_boundary < 128)))
7064 : {
7065 2 : crtl->preferred_stack_boundary = 128;
7066 2 : if (crtl->stack_alignment_needed < 128)
7067 1 : crtl->stack_alignment_needed = 128;
7068 : }
7069 :
7070 8118237 : stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7071 8118237 : preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7072 :
7073 8118237 : gcc_assert (!size || stack_alignment_needed);
7074 8918358 : gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7075 8118237 : gcc_assert (preferred_alignment <= stack_alignment_needed);
7076 :
7077 : /* The only ABI saving SSE regs should be 64-bit ms_abi or with
7078 : no_caller_saved_registers attribue. */
7079 8118237 : gcc_assert (TARGET_64BIT
7080 : || (cfun->machine->call_saved_registers
7081 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7082 : || !frame->nsseregs);
7083 8118237 : if (TARGET_64BIT && m->call_ms2sysv)
7084 : {
7085 35225 : gcc_assert (stack_alignment_needed >= 16);
7086 35225 : gcc_assert ((cfun->machine->call_saved_registers
7087 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7088 : || !frame->nsseregs);
7089 : }
7090 :
7091 : /* For SEH we have to limit the amount of code movement into the prologue.
7092 : At present we do this via a BLOCKAGE, at which point there's very little
7093 : scheduling that can be done, which means that there's very little point
7094 : in doing anything except PUSHs. */
7095 8118237 : if (TARGET_SEH)
7096 : m->use_fast_prologue_epilogue = false;
7097 8118237 : else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7098 : {
7099 7785220 : int count = frame->nregs;
7100 7785220 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
7101 :
7102 : /* The fast prologue uses move instead of push to save registers. This
7103 : is significantly longer, but also executes faster as modern hardware
7104 : can execute the moves in parallel, but can't do that for push/pop.
7105 :
7106 : Be careful about choosing what prologue to emit: When function takes
7107 : many instructions to execute we may use slow version as well as in
7108 : case function is known to be outside hot spot (this is known with
7109 : feedback only). Weight the size of function by number of registers
7110 : to save as it is cheap to use one or two push instructions but very
7111 : slow to use many of them.
7112 :
7113 : Calling this hook multiple times with the same frame requirements
7114 : must produce the same layout, since the RA might otherwise be
7115 : unable to reach a fixed point or might fail its final sanity checks.
7116 : This means that once we've assumed that a function does or doesn't
7117 : have a particular size, we have to stick to that assumption
7118 : regardless of how the function has changed since. */
7119 7785220 : if (count)
7120 2600395 : count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7121 7785220 : if (node->frequency < NODE_FREQUENCY_NORMAL
7122 7094868 : || (flag_branch_probabilities
7123 986 : && node->frequency < NODE_FREQUENCY_HOT))
7124 690677 : m->use_fast_prologue_epilogue = false;
7125 : else
7126 : {
7127 7094543 : if (count != frame->expensive_count)
7128 : {
7129 285493 : frame->expensive_count = count;
7130 285493 : frame->expensive_p = expensive_function_p (count);
7131 : }
7132 7094543 : m->use_fast_prologue_epilogue = !frame->expensive_p;
7133 : }
7134 : }
7135 :
7136 8118237 : frame->save_regs_using_mov
7137 8118237 : = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7138 :
7139 : /* Skip return address and error code in exception handler. */
7140 8118237 : offset = INCOMING_FRAME_SP_OFFSET;
7141 :
7142 : /* Skip pushed static chain. */
7143 8118237 : if (ix86_static_chain_on_stack)
7144 0 : offset += UNITS_PER_WORD;
7145 :
7146 : /* Skip saved base pointer. */
7147 8118237 : if (frame_pointer_needed)
7148 2732125 : offset += UNITS_PER_WORD;
7149 8118237 : frame->hfp_save_offset = offset;
7150 :
7151 : /* The traditional frame pointer location is at the top of the frame. */
7152 8118237 : frame->hard_frame_pointer_offset = offset;
7153 :
7154 : /* Register save area */
7155 8118237 : offset += frame->nregs * UNITS_PER_WORD;
7156 8118237 : frame->reg_save_offset = offset;
7157 :
7158 : /* Calculate the size of the va-arg area (not including padding, if any). */
7159 8118237 : frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7160 :
7161 : /* Also adjust stack_realign_offset for the largest alignment of
7162 : stack slot actually used. */
7163 8118237 : if (stack_realign_fp
7164 7811701 : || (cfun->machine->max_used_stack_alignment != 0
7165 133 : && (offset % cfun->machine->max_used_stack_alignment) != 0))
7166 : {
7167 : /* We may need a 16-byte aligned stack for the remainder of the
7168 : register save area, but the stack frame for the local function
7169 : may require a greater alignment if using AVX/2/512. In order
7170 : to avoid wasting space, we first calculate the space needed for
7171 : the rest of the register saves, add that to the stack pointer,
7172 : and then realign the stack to the boundary of the start of the
7173 : frame for the local function. */
7174 306601 : HOST_WIDE_INT space_needed = 0;
7175 306601 : HOST_WIDE_INT sse_reg_space_needed = 0;
7176 :
7177 306601 : if (TARGET_64BIT)
7178 : {
7179 304801 : if (m->call_ms2sysv)
7180 : {
7181 6415 : m->call_ms2sysv_pad_in = 0;
7182 6415 : space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7183 : }
7184 :
7185 298386 : else if (frame->nsseregs)
7186 : /* The only ABI that has saved SSE registers (Win64) also has a
7187 : 16-byte aligned default stack. However, many programs violate
7188 : the ABI, and Wine64 forces stack realignment to compensate. */
7189 6447 : space_needed = frame->nsseregs * 16;
7190 :
7191 304801 : sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7192 :
7193 : /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7194 : rounding to be pedantic. */
7195 304801 : space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7196 : }
7197 : else
7198 1800 : space_needed = frame->va_arg_size;
7199 :
7200 : /* Record the allocation size required prior to the realignment AND. */
7201 306601 : frame->stack_realign_allocate = space_needed;
7202 :
7203 : /* The re-aligned stack starts at frame->stack_realign_offset. Values
7204 : before this point are not directly comparable with values below
7205 : this point. Use sp_valid_at to determine if the stack pointer is
7206 : valid for a given offset, fp_valid_at for the frame pointer, or
7207 : choose_baseaddr to have a base register chosen for you.
7208 :
7209 : Note that the result of (frame->stack_realign_offset
7210 : & (stack_alignment_needed - 1)) may not equal zero. */
7211 306601 : offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7212 306601 : frame->stack_realign_offset = offset - space_needed;
7213 306601 : frame->sse_reg_save_offset = frame->stack_realign_offset
7214 306601 : + sse_reg_space_needed;
7215 306601 : }
7216 : else
7217 : {
7218 7811636 : frame->stack_realign_offset = offset;
7219 :
7220 7811636 : if (TARGET_64BIT && m->call_ms2sysv)
7221 : {
7222 28810 : m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7223 28810 : offset += xlogue_layout::get_instance ().get_stack_space_used ();
7224 : }
7225 :
7226 : /* Align and set SSE register save area. */
7227 7782826 : else if (frame->nsseregs)
7228 : {
7229 : /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7230 : required and the DRAP re-alignment boundary is at least 16 bytes,
7231 : then we want the SSE register save area properly aligned. */
7232 183182 : if (ix86_incoming_stack_boundary >= 128
7233 6400 : || (stack_realign_drap && stack_alignment_needed >= 16))
7234 183182 : offset = ROUND_UP (offset, 16);
7235 183182 : offset += frame->nsseregs * 16;
7236 : }
7237 7811636 : frame->sse_reg_save_offset = offset;
7238 7811636 : offset += frame->va_arg_size;
7239 : }
7240 :
7241 : /* Align start of frame for local function. When a function call
7242 : is removed, it may become a leaf function. But if argument may
7243 : be passed on stack, we need to align the stack when there is no
7244 : tail call. */
7245 8118237 : if (m->call_ms2sysv
7246 8083012 : || frame->va_arg_size != 0
7247 8003889 : || size != 0
7248 4372328 : || !crtl->is_leaf
7249 2046006 : || (!crtl->tail_call_emit
7250 1720791 : && cfun->machine->outgoing_args_on_stack)
7251 2045956 : || cfun->calls_alloca
7252 10162406 : || ix86_current_function_calls_tls_descriptor)
7253 6074482 : offset = ROUND_UP (offset, stack_alignment_needed);
7254 :
7255 : /* Frame pointer points here. */
7256 8118237 : frame->frame_pointer_offset = offset;
7257 :
7258 8118237 : offset += size;
7259 :
7260 : /* Add outgoing arguments area. Can be skipped if we eliminated
7261 : all the function calls as dead code.
7262 : Skipping is however impossible when function calls alloca. Alloca
7263 : expander assumes that last crtl->outgoing_args_size
7264 : of stack frame are unused. */
7265 8118237 : if (ACCUMULATE_OUTGOING_ARGS
7266 8736158 : && (!crtl->is_leaf || cfun->calls_alloca
7267 391782 : || ix86_current_function_calls_tls_descriptor))
7268 : {
7269 226139 : offset += crtl->outgoing_args_size;
7270 226139 : frame->outgoing_arguments_size = crtl->outgoing_args_size;
7271 : }
7272 : else
7273 7892098 : frame->outgoing_arguments_size = 0;
7274 :
7275 : /* Align stack boundary. Only needed if we're calling another function
7276 : or using alloca. */
7277 2744149 : if (!crtl->is_leaf || cfun->calls_alloca
7278 10858909 : || ix86_current_function_calls_tls_descriptor)
7279 5379363 : offset = ROUND_UP (offset, preferred_alignment);
7280 :
7281 : /* We've reached end of stack frame. */
7282 8118237 : frame->stack_pointer_offset = offset;
7283 :
7284 : /* Size prologue needs to allocate. */
7285 8118237 : to_allocate = offset - frame->sse_reg_save_offset;
7286 :
7287 8118237 : if (save_regs_using_push_pop (to_allocate))
7288 2561606 : frame->save_regs_using_mov = false;
7289 :
7290 8118237 : if (ix86_using_red_zone ()
7291 7092521 : && crtl->sp_is_unchanging
7292 6450241 : && crtl->is_leaf
7293 2645102 : && !cfun->machine->asm_redzone_clobber_seen
7294 2645089 : && !ix86_pc_thunk_call_expanded
7295 10763326 : && !ix86_current_function_calls_tls_descriptor)
7296 : {
7297 2645074 : frame->red_zone_size = to_allocate;
7298 2645074 : if (frame->save_regs_using_mov)
7299 139945 : frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7300 2645074 : if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7301 102783 : frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7302 : }
7303 : else
7304 5473163 : frame->red_zone_size = 0;
7305 8118237 : frame->stack_pointer_offset -= frame->red_zone_size;
7306 :
7307 : /* The SEH frame pointer location is near the bottom of the frame.
7308 : This is enforced by the fact that the difference between the
7309 : stack pointer and the frame pointer is limited to 240 bytes in
7310 : the unwind data structure. */
7311 8118237 : if (TARGET_SEH)
7312 : {
7313 : /* Force the frame pointer to point at or below the lowest register save
7314 : area, see the SEH code in config/i386/winnt.cc for the rationale. */
7315 : frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7316 :
7317 : /* If we can leave the frame pointer where it is, do so; however return
7318 : the establisher frame for __builtin_frame_address (0) or else if the
7319 : frame overflows the SEH maximum frame size.
7320 :
7321 : Note that the value returned by __builtin_frame_address (0) is quite
7322 : constrained, because setjmp is piggybacked on the SEH machinery with
7323 : recent versions of MinGW:
7324 :
7325 : # elif defined(__SEH__)
7326 : # if defined(__aarch64__) || defined(_ARM64_)
7327 : # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7328 : # elif (__MINGW_GCC_VERSION < 40702)
7329 : # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7330 : # else
7331 : # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7332 : # endif
7333 :
7334 : and the second argument passed to _setjmp, if not null, is forwarded
7335 : to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7336 : built an ExceptionRecord on the fly describing the setjmp buffer). */
7337 : const HOST_WIDE_INT diff
7338 : = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7339 : if (diff <= 255 && !crtl->accesses_prior_frames)
7340 : {
7341 : /* The resulting diff will be a multiple of 16 lower than 255,
7342 : i.e. at most 240 as required by the unwind data structure. */
7343 : frame->hard_frame_pointer_offset += (diff & 15);
7344 : }
7345 : else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7346 : {
7347 : /* Ideally we'd determine what portion of the local stack frame
7348 : (within the constraint of the lowest 240) is most heavily used.
7349 : But without that complication, simply bias the frame pointer
7350 : by 128 bytes so as to maximize the amount of the local stack
7351 : frame that is addressable with 8-bit offsets. */
7352 : frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7353 : }
7354 : else
7355 : frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7356 : }
7357 8118237 : }
7358 :
7359 : /* This is semi-inlined memory_address_length, but simplified
7360 : since we know that we're always dealing with reg+offset, and
7361 : to avoid having to create and discard all that rtl. */
7362 :
7363 : static inline int
7364 1019563 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7365 : {
7366 1019563 : int len = 4;
7367 :
7368 0 : if (offset == 0)
7369 : {
7370 : /* EBP and R13 cannot be encoded without an offset. */
7371 0 : len = (regno == BP_REG || regno == R13_REG);
7372 : }
7373 1011457 : else if (IN_RANGE (offset, -128, 127))
7374 635693 : len = 1;
7375 :
7376 : /* ESP and R12 must be encoded with a SIB byte. */
7377 0 : if (regno == SP_REG || regno == R12_REG)
7378 0 : len++;
7379 :
7380 1019563 : return len;
7381 : }
7382 :
7383 : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7384 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7385 :
7386 : static bool
7387 3505103 : sp_valid_at (HOST_WIDE_INT cfa_offset)
7388 : {
7389 3505103 : const struct machine_frame_state &fs = cfun->machine->fs;
7390 3505103 : if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7391 : {
7392 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7393 46396 : gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7394 : return false;
7395 : }
7396 3458707 : return fs.sp_valid;
7397 : }
7398 :
7399 : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7400 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7401 :
7402 : static inline bool
7403 1389367 : fp_valid_at (HOST_WIDE_INT cfa_offset)
7404 : {
7405 1389367 : const struct machine_frame_state &fs = cfun->machine->fs;
7406 1389367 : if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7407 : {
7408 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7409 28328 : gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7410 : return false;
7411 : }
7412 1361039 : return fs.fp_valid;
7413 : }
7414 :
7415 : /* Choose a base register based upon alignment requested, speed and/or
7416 : size. */
7417 :
7418 : static void
7419 1389367 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7420 : HOST_WIDE_INT &base_offset,
7421 : unsigned int align_reqested, unsigned int *align)
7422 : {
7423 1389367 : const struct machine_function *m = cfun->machine;
7424 1389367 : unsigned int hfp_align;
7425 1389367 : unsigned int drap_align;
7426 1389367 : unsigned int sp_align;
7427 1389367 : bool hfp_ok = fp_valid_at (cfa_offset);
7428 1389367 : bool drap_ok = m->fs.drap_valid;
7429 1389367 : bool sp_ok = sp_valid_at (cfa_offset);
7430 :
7431 1389367 : hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7432 :
7433 : /* Filter out any registers that don't meet the requested alignment
7434 : criteria. */
7435 1389367 : if (align_reqested)
7436 : {
7437 974972 : if (m->fs.realigned)
7438 28160 : hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7439 : /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7440 : notes (which we would need to use a realigned stack pointer),
7441 : so disable on SEH targets. */
7442 946812 : else if (m->fs.sp_realigned)
7443 28328 : sp_align = crtl->stack_alignment_needed;
7444 :
7445 974972 : hfp_ok = hfp_ok && hfp_align >= align_reqested;
7446 974972 : drap_ok = drap_ok && drap_align >= align_reqested;
7447 974972 : sp_ok = sp_ok && sp_align >= align_reqested;
7448 : }
7449 :
7450 1389367 : if (m->use_fast_prologue_epilogue)
7451 : {
7452 : /* Choose the base register most likely to allow the most scheduling
7453 : opportunities. Generally FP is valid throughout the function,
7454 : while DRAP must be reloaded within the epilogue. But choose either
7455 : over the SP due to increased encoding size. */
7456 :
7457 665191 : if (hfp_ok)
7458 : {
7459 117557 : base_reg = hard_frame_pointer_rtx;
7460 117557 : base_offset = m->fs.fp_offset - cfa_offset;
7461 : }
7462 547634 : else if (drap_ok)
7463 : {
7464 0 : base_reg = crtl->drap_reg;
7465 0 : base_offset = 0 - cfa_offset;
7466 : }
7467 547634 : else if (sp_ok)
7468 : {
7469 547634 : base_reg = stack_pointer_rtx;
7470 547634 : base_offset = m->fs.sp_offset - cfa_offset;
7471 : }
7472 : }
7473 : else
7474 : {
7475 724176 : HOST_WIDE_INT toffset;
7476 724176 : int len = 16, tlen;
7477 :
7478 : /* Choose the base register with the smallest address encoding.
7479 : With a tie, choose FP > DRAP > SP. */
7480 724176 : if (sp_ok)
7481 : {
7482 706851 : base_reg = stack_pointer_rtx;
7483 706851 : base_offset = m->fs.sp_offset - cfa_offset;
7484 1405596 : len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7485 : }
7486 724176 : if (drap_ok)
7487 : {
7488 0 : toffset = 0 - cfa_offset;
7489 0 : tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7490 0 : if (tlen <= len)
7491 : {
7492 0 : base_reg = crtl->drap_reg;
7493 0 : base_offset = toffset;
7494 0 : len = tlen;
7495 : }
7496 : }
7497 724176 : if (hfp_ok)
7498 : {
7499 312712 : toffset = m->fs.fp_offset - cfa_offset;
7500 312712 : tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7501 312712 : if (tlen <= len)
7502 : {
7503 222080 : base_reg = hard_frame_pointer_rtx;
7504 222080 : base_offset = toffset;
7505 : }
7506 : }
7507 : }
7508 :
7509 : /* Set the align return value. */
7510 1389367 : if (align)
7511 : {
7512 974972 : if (base_reg == stack_pointer_rtx)
7513 693241 : *align = sp_align;
7514 281731 : else if (base_reg == crtl->drap_reg)
7515 0 : *align = drap_align;
7516 281731 : else if (base_reg == hard_frame_pointer_rtx)
7517 281731 : *align = hfp_align;
7518 : }
7519 1389367 : }
7520 :
7521 : /* Return an RTX that points to CFA_OFFSET within the stack frame and
7522 : the alignment of address. If ALIGN is non-null, it should point to
7523 : an alignment value (in bits) that is preferred or zero and will
7524 : recieve the alignment of the base register that was selected,
7525 : irrespective of rather or not CFA_OFFSET is a multiple of that
7526 : alignment value. If it is possible for the base register offset to be
7527 : non-immediate then SCRATCH_REGNO should specify a scratch register to
7528 : use.
7529 :
7530 : The valid base registers are taken from CFUN->MACHINE->FS. */
7531 :
7532 : static rtx
7533 1389367 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7534 : unsigned int scratch_regno = INVALID_REGNUM)
7535 : {
7536 1389367 : rtx base_reg = NULL;
7537 1389367 : HOST_WIDE_INT base_offset = 0;
7538 :
7539 : /* If a specific alignment is requested, try to get a base register
7540 : with that alignment first. */
7541 1389367 : if (align && *align)
7542 974972 : choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7543 :
7544 1389367 : if (!base_reg)
7545 414395 : choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7546 :
7547 1389367 : gcc_assert (base_reg != NULL);
7548 :
7549 1389367 : rtx base_offset_rtx = GEN_INT (base_offset);
7550 :
7551 1440897 : if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7552 : {
7553 1 : gcc_assert (scratch_regno != INVALID_REGNUM);
7554 :
7555 1 : rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7556 1 : emit_move_insn (scratch_reg, base_offset_rtx);
7557 :
7558 1 : return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7559 : }
7560 :
7561 1440896 : return plus_constant (Pmode, base_reg, base_offset);
7562 : }
7563 :
7564 : /* Emit code to save registers in the prologue. */
7565 :
7566 : static void
7567 426733 : ix86_emit_save_regs (void)
7568 : {
7569 426733 : int regno;
7570 426733 : rtx_insn *insn;
7571 426733 : bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7572 :
7573 426733 : if (!TARGET_APX_PUSH2POP2
7574 90 : || !ix86_can_use_push2pop2 ()
7575 426821 : || cfun->machine->func_type != TYPE_NORMAL)
7576 : {
7577 39678078 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7578 39251432 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7579 : {
7580 1195779 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7581 : use_ppx));
7582 1195779 : RTX_FRAME_RELATED_P (insn) = 1;
7583 : }
7584 : }
7585 : else
7586 : {
7587 87 : int regno_list[2];
7588 87 : regno_list[0] = regno_list[1] = -1;
7589 87 : int loaded_regnum = 0;
7590 87 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7591 :
7592 8091 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7593 8004 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7594 : {
7595 127 : if (aligned)
7596 : {
7597 45 : regno_list[loaded_regnum++] = regno;
7598 45 : if (loaded_regnum == 2)
7599 : {
7600 19 : gcc_assert (regno_list[0] != -1
7601 : && regno_list[1] != -1
7602 : && regno_list[0] != regno_list[1]);
7603 19 : const int offset = UNITS_PER_WORD * 2;
7604 19 : rtx mem = gen_rtx_MEM (TImode,
7605 19 : gen_rtx_PRE_DEC (Pmode,
7606 : stack_pointer_rtx));
7607 19 : insn = emit_insn (gen_push2 (mem,
7608 : gen_rtx_REG (word_mode,
7609 : regno_list[0]),
7610 : gen_rtx_REG (word_mode,
7611 : regno_list[1]),
7612 : use_ppx));
7613 19 : RTX_FRAME_RELATED_P (insn) = 1;
7614 19 : rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7615 :
7616 57 : for (int i = 0; i < 2; i++)
7617 : {
7618 76 : rtx dwarf_reg = gen_rtx_REG (word_mode,
7619 38 : regno_list[i]);
7620 38 : rtx sp_offset = plus_constant (Pmode,
7621 : stack_pointer_rtx,
7622 38 : + UNITS_PER_WORD
7623 38 : * (1 - i));
7624 38 : rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7625 : sp_offset),
7626 : dwarf_reg);
7627 38 : RTX_FRAME_RELATED_P (tmp) = 1;
7628 38 : XVECEXP (dwarf, 0, i + 1) = tmp;
7629 : }
7630 19 : rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7631 : plus_constant (Pmode,
7632 : stack_pointer_rtx,
7633 : -offset));
7634 19 : RTX_FRAME_RELATED_P (sp_tmp) = 1;
7635 19 : XVECEXP (dwarf, 0, 0) = sp_tmp;
7636 19 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7637 :
7638 19 : loaded_regnum = 0;
7639 19 : regno_list[0] = regno_list[1] = -1;
7640 : }
7641 : }
7642 : else
7643 : {
7644 82 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7645 : use_ppx));
7646 82 : RTX_FRAME_RELATED_P (insn) = 1;
7647 82 : aligned = true;
7648 : }
7649 : }
7650 87 : if (loaded_regnum == 1)
7651 : {
7652 7 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7653 7 : regno_list[0]),
7654 : use_ppx));
7655 7 : RTX_FRAME_RELATED_P (insn) = 1;
7656 : }
7657 : }
7658 426733 : }
7659 :
7660 : /* Emit a single register save at CFA - CFA_OFFSET. */
7661 :
7662 : static void
7663 620989 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7664 : HOST_WIDE_INT cfa_offset)
7665 : {
7666 620989 : struct machine_function *m = cfun->machine;
7667 620989 : rtx reg = gen_rtx_REG (mode, regno);
7668 620989 : rtx mem, addr, base, insn;
7669 620989 : unsigned int align = GET_MODE_ALIGNMENT (mode);
7670 :
7671 620989 : addr = choose_baseaddr (cfa_offset, &align);
7672 620989 : mem = gen_frame_mem (mode, addr);
7673 :
7674 : /* The location aligment depends upon the base register. */
7675 620989 : align = MIN (GET_MODE_ALIGNMENT (mode), align);
7676 620989 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7677 620989 : set_mem_align (mem, align);
7678 :
7679 620989 : insn = emit_insn (gen_rtx_SET (mem, reg));
7680 620989 : RTX_FRAME_RELATED_P (insn) = 1;
7681 :
7682 620989 : base = addr;
7683 620989 : if (GET_CODE (base) == PLUS)
7684 608882 : base = XEXP (base, 0);
7685 620989 : gcc_checking_assert (REG_P (base));
7686 :
7687 : /* When saving registers into a re-aligned local stack frame, avoid
7688 : any tricky guessing by dwarf2out. */
7689 620989 : if (m->fs.realigned)
7690 : {
7691 12800 : gcc_checking_assert (stack_realign_drap);
7692 :
7693 12800 : if (regno == REGNO (crtl->drap_reg))
7694 : {
7695 : /* A bit of a hack. We force the DRAP register to be saved in
7696 : the re-aligned stack frame, which provides us with a copy
7697 : of the CFA that will last past the prologue. Install it. */
7698 0 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7699 0 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7700 0 : cfun->machine->fs.fp_offset - cfa_offset);
7701 0 : mem = gen_rtx_MEM (mode, addr);
7702 0 : add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7703 : }
7704 : else
7705 : {
7706 : /* The frame pointer is a stable reference within the
7707 : aligned frame. Use it. */
7708 12800 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7709 12800 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7710 12800 : cfun->machine->fs.fp_offset - cfa_offset);
7711 12800 : mem = gen_rtx_MEM (mode, addr);
7712 12800 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7713 : }
7714 : }
7715 :
7716 608189 : else if (base == stack_pointer_rtx && m->fs.sp_realigned
7717 12881 : && cfa_offset >= m->fs.sp_realigned_offset)
7718 : {
7719 12881 : gcc_checking_assert (stack_realign_fp);
7720 12881 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7721 : }
7722 :
7723 : /* The memory may not be relative to the current CFA register,
7724 : which means that we may need to generate a new pattern for
7725 : use by the unwind info. */
7726 595308 : else if (base != m->fs.cfa_reg)
7727 : {
7728 45078 : addr = plus_constant (Pmode, m->fs.cfa_reg,
7729 45078 : m->fs.cfa_offset - cfa_offset);
7730 45078 : mem = gen_rtx_MEM (mode, addr);
7731 45078 : add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7732 : }
7733 620989 : }
7734 :
7735 : /* Emit code to save registers using MOV insns.
7736 : First register is stored at CFA - CFA_OFFSET. */
7737 : static void
7738 45826 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7739 : {
7740 45826 : unsigned int regno;
7741 :
7742 4261818 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7743 4215992 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7744 : {
7745 : /* Skip registers, already processed by shrink wrap separate. */
7746 192698 : if (!cfun->machine->reg_is_wrapped_separately[regno])
7747 85015 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7748 207148 : cfa_offset -= UNITS_PER_WORD;
7749 : }
7750 45826 : }
7751 :
7752 : /* Emit code to save SSE registers using MOV insns.
7753 : First register is stored at CFA - CFA_OFFSET. */
7754 : static void
7755 33353 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7756 : {
7757 33353 : unsigned int regno;
7758 :
7759 3101829 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7760 3068476 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7761 : {
7762 333557 : ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7763 333557 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
7764 : }
7765 33353 : }
7766 :
7767 : static GTY(()) rtx queued_cfa_restores;
7768 :
7769 : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7770 : manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7771 : Don't add the note if the previously saved value will be left untouched
7772 : within stack red-zone till return, as unwinders can find the same value
7773 : in the register and on the stack. */
7774 :
7775 : static void
7776 2274698 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7777 : {
7778 2274698 : if (!crtl->shrink_wrapped
7779 2255484 : && cfa_offset <= cfun->machine->fs.red_zone_offset)
7780 : return;
7781 :
7782 770598 : if (insn)
7783 : {
7784 359760 : add_reg_note (insn, REG_CFA_RESTORE, reg);
7785 359760 : RTX_FRAME_RELATED_P (insn) = 1;
7786 : }
7787 : else
7788 410838 : queued_cfa_restores
7789 410838 : = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7790 : }
7791 :
7792 : /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7793 :
7794 : static void
7795 2537226 : ix86_add_queued_cfa_restore_notes (rtx insn)
7796 : {
7797 2537226 : rtx last;
7798 2537226 : if (!queued_cfa_restores)
7799 : return;
7800 410838 : for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7801 : ;
7802 53154 : XEXP (last, 1) = REG_NOTES (insn);
7803 53154 : REG_NOTES (insn) = queued_cfa_restores;
7804 53154 : queued_cfa_restores = NULL_RTX;
7805 53154 : RTX_FRAME_RELATED_P (insn) = 1;
7806 : }
7807 :
7808 : /* Expand prologue or epilogue stack adjustment.
7809 : The pattern exist to put a dependency on all ebp-based memory accesses.
7810 : STYLE should be negative if instructions should be marked as frame related,
7811 : zero if %r11 register is live and cannot be freely used and positive
7812 : otherwise. */
7813 :
7814 : static rtx
7815 1578300 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7816 : int style, bool set_cfa)
7817 : {
7818 1578300 : struct machine_function *m = cfun->machine;
7819 1578300 : rtx addend = offset;
7820 1578300 : rtx insn;
7821 1578300 : bool add_frame_related_expr = false;
7822 :
7823 1796242 : if (!x86_64_immediate_operand (offset, Pmode))
7824 : {
7825 : /* r11 is used by indirect sibcall return as well, set before the
7826 : epilogue and used after the epilogue. */
7827 199 : if (style)
7828 174 : addend = gen_rtx_REG (Pmode, R11_REG);
7829 : else
7830 : {
7831 25 : gcc_assert (src != hard_frame_pointer_rtx
7832 : && dest != hard_frame_pointer_rtx);
7833 : addend = hard_frame_pointer_rtx;
7834 : }
7835 199 : emit_insn (gen_rtx_SET (addend, offset));
7836 199 : if (style < 0)
7837 88 : add_frame_related_expr = true;
7838 : }
7839 :
7840 : /* Shrink wrap separate may insert prologue between TEST and JMP. In order
7841 : not to affect EFlags, emit add without reg clobbering. */
7842 1578300 : if (crtl->shrink_wrapped_separate)
7843 97249 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
7844 97249 : (Pmode, dest, src, addend));
7845 : else
7846 1481051 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7847 1481051 : (Pmode, dest, src, addend));
7848 :
7849 1578300 : if (style >= 0)
7850 696873 : ix86_add_queued_cfa_restore_notes (insn);
7851 :
7852 1578300 : if (set_cfa)
7853 : {
7854 1219648 : rtx r;
7855 :
7856 1219648 : gcc_assert (m->fs.cfa_reg == src);
7857 1219648 : m->fs.cfa_offset += INTVAL (offset);
7858 1219648 : m->fs.cfa_reg = dest;
7859 :
7860 1415292 : r = gen_rtx_PLUS (Pmode, src, offset);
7861 1219648 : r = gen_rtx_SET (dest, r);
7862 1219648 : add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7863 1219648 : RTX_FRAME_RELATED_P (insn) = 1;
7864 : }
7865 358652 : else if (style < 0)
7866 : {
7867 292418 : RTX_FRAME_RELATED_P (insn) = 1;
7868 292418 : if (add_frame_related_expr)
7869 : {
7870 20 : rtx r = gen_rtx_PLUS (Pmode, src, offset);
7871 20 : r = gen_rtx_SET (dest, r);
7872 20 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7873 : }
7874 : }
7875 :
7876 1578300 : if (dest == stack_pointer_rtx)
7877 : {
7878 1578300 : HOST_WIDE_INT ooffset = m->fs.sp_offset;
7879 1578300 : bool valid = m->fs.sp_valid;
7880 1578300 : bool realigned = m->fs.sp_realigned;
7881 :
7882 1578300 : if (src == hard_frame_pointer_rtx)
7883 : {
7884 29774 : valid = m->fs.fp_valid;
7885 29774 : realigned = false;
7886 29774 : ooffset = m->fs.fp_offset;
7887 : }
7888 1548526 : else if (src == crtl->drap_reg)
7889 : {
7890 0 : valid = m->fs.drap_valid;
7891 0 : realigned = false;
7892 0 : ooffset = 0;
7893 : }
7894 : else
7895 : {
7896 : /* Else there are two possibilities: SP itself, which we set
7897 : up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7898 : taken care of this by hand along the eh_return path. */
7899 1548526 : gcc_checking_assert (src == stack_pointer_rtx
7900 : || offset == const0_rtx);
7901 : }
7902 :
7903 1578300 : m->fs.sp_offset = ooffset - INTVAL (offset);
7904 1578300 : m->fs.sp_valid = valid;
7905 1578300 : m->fs.sp_realigned = realigned;
7906 : }
7907 1578300 : return insn;
7908 : }
7909 :
7910 : /* Find an available register to be used as dynamic realign argument
7911 : pointer regsiter. Such a register will be written in prologue and
7912 : used in begin of body, so it must not be
7913 : 1. parameter passing register.
7914 : 2. GOT pointer.
7915 : We reuse static-chain register if it is available. Otherwise, we
7916 : use DI for i386 and R13 for x86-64. We chose R13 since it has
7917 : shorter encoding.
7918 :
7919 : Return: the regno of chosen register. */
7920 :
7921 : static unsigned int
7922 7280 : find_drap_reg (void)
7923 : {
7924 7280 : tree decl = cfun->decl;
7925 :
7926 : /* Always use callee-saved register if there are no caller-saved
7927 : registers. */
7928 7280 : if (TARGET_64BIT)
7929 : {
7930 : /* Use R13 for nested function or function need static chain.
7931 : Since function with tail call may use any caller-saved
7932 : registers in epilogue, DRAP must not use caller-saved
7933 : register in such case. */
7934 6995 : if (DECL_STATIC_CHAIN (decl)
7935 6953 : || (cfun->machine->call_saved_registers
7936 6953 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7937 13948 : || crtl->tail_call_emit)
7938 190 : return R13_REG;
7939 :
7940 : return R10_REG;
7941 : }
7942 : else
7943 : {
7944 : /* Use DI for nested function or function need static chain.
7945 : Since function with tail call may use any caller-saved
7946 : registers in epilogue, DRAP must not use caller-saved
7947 : register in such case. */
7948 285 : if (DECL_STATIC_CHAIN (decl)
7949 285 : || (cfun->machine->call_saved_registers
7950 285 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7951 285 : || crtl->tail_call_emit
7952 550 : || crtl->calls_eh_return)
7953 : return DI_REG;
7954 :
7955 : /* Reuse static chain register if it isn't used for parameter
7956 : passing. */
7957 265 : if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7958 : {
7959 265 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7960 265 : if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7961 : return CX_REG;
7962 : }
7963 0 : return DI_REG;
7964 : }
7965 : }
7966 :
7967 : /* Return minimum incoming stack alignment. */
7968 :
7969 : static unsigned int
7970 1606624 : ix86_minimum_incoming_stack_boundary (bool sibcall)
7971 : {
7972 1606624 : unsigned int incoming_stack_boundary;
7973 :
7974 : /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7975 1606624 : if (cfun->machine->func_type != TYPE_NORMAL)
7976 120 : incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7977 : /* Prefer the one specified at command line. */
7978 1606504 : else if (ix86_user_incoming_stack_boundary)
7979 : incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7980 : /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7981 : if -mstackrealign is used, it isn't used for sibcall check and
7982 : estimated stack alignment is 128bit. */
7983 1606482 : else if (!sibcall
7984 1472003 : && ix86_force_align_arg_pointer
7985 4574 : && crtl->stack_alignment_estimated == 128)
7986 596 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
7987 : else
7988 1605886 : incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7989 :
7990 : /* Incoming stack alignment can be changed on individual functions
7991 : via force_align_arg_pointer attribute. We use the smallest
7992 : incoming stack boundary. */
7993 1606624 : if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7994 3212642 : && lookup_attribute ("force_align_arg_pointer",
7995 1606018 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7996 5708 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
7997 :
7998 : /* The incoming stack frame has to be aligned at least at
7999 : parm_stack_boundary. */
8000 1606624 : if (incoming_stack_boundary < crtl->parm_stack_boundary)
8001 : incoming_stack_boundary = crtl->parm_stack_boundary;
8002 :
8003 : /* Stack at entrance of main is aligned by runtime. We use the
8004 : smallest incoming stack boundary. */
8005 1606624 : if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8006 140508 : && DECL_NAME (current_function_decl)
8007 140508 : && MAIN_NAME_P (DECL_NAME (current_function_decl))
8008 1609106 : && DECL_FILE_SCOPE_P (current_function_decl))
8009 2482 : incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8010 :
8011 1606624 : return incoming_stack_boundary;
8012 : }
8013 :
8014 : /* Update incoming stack boundary and estimated stack alignment. */
8015 :
8016 : static void
8017 1472140 : ix86_update_stack_boundary (void)
8018 : {
8019 1472140 : ix86_incoming_stack_boundary
8020 1472140 : = ix86_minimum_incoming_stack_boundary (false);
8021 :
8022 : /* x86_64 vararg needs 16byte stack alignment for register save area. */
8023 1472140 : if (TARGET_64BIT
8024 1345859 : && cfun->stdarg
8025 21359 : && crtl->stack_alignment_estimated < 128)
8026 10170 : crtl->stack_alignment_estimated = 128;
8027 :
8028 : /* __tls_get_addr needs to be called with 16-byte aligned stack. */
8029 1472140 : if (ix86_tls_descriptor_calls_expanded_in_cfun
8030 1072 : && crtl->preferred_stack_boundary < 128)
8031 745 : crtl->preferred_stack_boundary = 128;
8032 :
8033 : /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
8034 : are 32 bits, but if force_align_arg_pointer is specified, it should
8035 : prefer 128 bits for a backward-compatibility reason, which is also
8036 : what the doc suggests. */
8037 1472140 : if (lookup_attribute ("force_align_arg_pointer",
8038 1472140 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
8039 1472140 : && crtl->preferred_stack_boundary < 128)
8040 4 : crtl->preferred_stack_boundary = 128;
8041 1472140 : }
8042 :
8043 : /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8044 : needed or an rtx for DRAP otherwise. */
8045 :
8046 : static rtx
8047 1576785 : ix86_get_drap_rtx (void)
8048 : {
8049 : /* We must use DRAP if there are outgoing arguments on stack or
8050 : the stack pointer register is clobbered by asm statement and
8051 : ACCUMULATE_OUTGOING_ARGS is false. */
8052 1576785 : if (ix86_force_drap
8053 1576785 : || ((cfun->machine->outgoing_args_on_stack
8054 1245393 : || crtl->sp_is_clobbered_by_asm)
8055 329446 : && !ACCUMULATE_OUTGOING_ARGS))
8056 309254 : crtl->need_drap = true;
8057 :
8058 1576785 : if (stack_realign_drap)
8059 : {
8060 : /* Assign DRAP to vDRAP and returns vDRAP */
8061 7280 : unsigned int regno = find_drap_reg ();
8062 7280 : rtx drap_vreg;
8063 7280 : rtx arg_ptr;
8064 7280 : rtx_insn *seq, *insn;
8065 :
8066 7565 : arg_ptr = gen_rtx_REG (Pmode, regno);
8067 7280 : crtl->drap_reg = arg_ptr;
8068 :
8069 7280 : start_sequence ();
8070 7280 : drap_vreg = copy_to_reg (arg_ptr);
8071 7280 : seq = end_sequence ();
8072 :
8073 7280 : insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8074 7280 : if (!optimize)
8075 : {
8076 1893 : add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8077 1893 : RTX_FRAME_RELATED_P (insn) = 1;
8078 : }
8079 7280 : return drap_vreg;
8080 : }
8081 : else
8082 : return NULL;
8083 : }
8084 :
8085 : /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8086 :
8087 : static rtx
8088 1472141 : ix86_internal_arg_pointer (void)
8089 : {
8090 1472141 : return virtual_incoming_args_rtx;
8091 : }
8092 :
8093 : struct scratch_reg {
8094 : rtx reg;
8095 : bool saved;
8096 : };
8097 :
8098 : /* Return a short-lived scratch register for use on function entry.
8099 : In 32-bit mode, it is valid only after the registers are saved
8100 : in the prologue. This register must be released by means of
8101 : release_scratch_register_on_entry once it is dead. */
8102 :
8103 : static void
8104 25 : get_scratch_register_on_entry (struct scratch_reg *sr)
8105 : {
8106 25 : int regno;
8107 :
8108 25 : sr->saved = false;
8109 :
8110 25 : if (TARGET_64BIT)
8111 : {
8112 : /* We always use R11 in 64-bit mode. */
8113 : regno = R11_REG;
8114 : }
8115 : else
8116 : {
8117 0 : tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8118 0 : bool fastcall_p
8119 0 : = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8120 0 : bool thiscall_p
8121 0 : = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8122 0 : bool static_chain_p = DECL_STATIC_CHAIN (decl);
8123 0 : int regparm = ix86_function_regparm (fntype, decl);
8124 0 : int drap_regno
8125 0 : = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8126 :
8127 : /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8128 : for the static chain register. */
8129 0 : if ((regparm < 1 || (fastcall_p && !static_chain_p))
8130 0 : && drap_regno != AX_REG)
8131 : regno = AX_REG;
8132 : /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8133 : for the static chain register. */
8134 0 : else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8135 : regno = AX_REG;
8136 0 : else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8137 : regno = DX_REG;
8138 : /* ecx is the static chain register. */
8139 0 : else if (regparm < 3 && !fastcall_p && !thiscall_p
8140 0 : && !static_chain_p
8141 0 : && drap_regno != CX_REG)
8142 : regno = CX_REG;
8143 0 : else if (ix86_save_reg (BX_REG, true, false))
8144 : regno = BX_REG;
8145 : /* esi is the static chain register. */
8146 0 : else if (!(regparm == 3 && static_chain_p)
8147 0 : && ix86_save_reg (SI_REG, true, false))
8148 : regno = SI_REG;
8149 0 : else if (ix86_save_reg (DI_REG, true, false))
8150 : regno = DI_REG;
8151 : else
8152 : {
8153 0 : regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8154 0 : sr->saved = true;
8155 : }
8156 : }
8157 :
8158 25 : sr->reg = gen_rtx_REG (Pmode, regno);
8159 25 : if (sr->saved)
8160 : {
8161 0 : rtx_insn *insn = emit_insn (gen_push (sr->reg));
8162 0 : RTX_FRAME_RELATED_P (insn) = 1;
8163 : }
8164 25 : }
8165 :
8166 : /* Release a scratch register obtained from the preceding function.
8167 :
8168 : If RELEASE_VIA_POP is true, we just pop the register off the stack
8169 : to release it. This is what non-Linux systems use with -fstack-check.
8170 :
8171 : Otherwise we use OFFSET to locate the saved register and the
8172 : allocated stack space becomes part of the local frame and is
8173 : deallocated by the epilogue. */
8174 :
8175 : static void
8176 25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8177 : bool release_via_pop)
8178 : {
8179 25 : if (sr->saved)
8180 : {
8181 0 : if (release_via_pop)
8182 : {
8183 0 : struct machine_function *m = cfun->machine;
8184 0 : rtx x, insn = emit_insn (gen_pop (sr->reg));
8185 :
8186 : /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8187 0 : RTX_FRAME_RELATED_P (insn) = 1;
8188 0 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8189 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
8190 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8191 0 : m->fs.sp_offset -= UNITS_PER_WORD;
8192 : }
8193 : else
8194 : {
8195 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8196 0 : x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8197 0 : emit_insn (x);
8198 : }
8199 : }
8200 25 : }
8201 :
8202 : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8203 :
8204 : If INT_REGISTERS_SAVED is true, then integer registers have already been
8205 : pushed on the stack.
8206 :
8207 : If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8208 : beyond SIZE bytes.
8209 :
8210 : This assumes no knowledge of the current probing state, i.e. it is never
8211 : allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8212 : a suitable probe. */
8213 :
8214 : static void
8215 126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8216 : const bool int_registers_saved,
8217 : const bool protection_area)
8218 : {
8219 126 : struct machine_function *m = cfun->machine;
8220 :
8221 : /* If this function does not statically allocate stack space, then
8222 : no probes are needed. */
8223 126 : if (!size)
8224 : {
8225 : /* However, the allocation of space via pushes for register
8226 : saves could be viewed as allocating space, but without the
8227 : need to probe. */
8228 43 : if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8229 23 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8230 : else
8231 20 : dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8232 43 : return;
8233 : }
8234 :
8235 : /* If we are a noreturn function, then we have to consider the
8236 : possibility that we're called via a jump rather than a call.
8237 :
8238 : Thus we don't have the implicit probe generated by saving the
8239 : return address into the stack at the call. Thus, the stack
8240 : pointer could be anywhere in the guard page. The safe thing
8241 : to do is emit a probe now.
8242 :
8243 : The probe can be avoided if we have already emitted any callee
8244 : register saves into the stack or have a frame pointer (which will
8245 : have been saved as well). Those saves will function as implicit
8246 : probes.
8247 :
8248 : ?!? This should be revamped to work like aarch64 and s390 where
8249 : we track the offset from the most recent probe. Normally that
8250 : offset would be zero. For a noreturn function we would reset
8251 : it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8252 : we just probe when we cross PROBE_INTERVAL. */
8253 83 : if (TREE_THIS_VOLATILE (cfun->decl)
8254 15 : && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8255 : {
8256 : /* We can safely use any register here since we're just going to push
8257 : its value and immediately pop it back. But we do try and avoid
8258 : argument passing registers so as not to introduce dependencies in
8259 : the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8260 15 : rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8261 15 : rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8262 15 : rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8263 15 : m->fs.sp_offset -= UNITS_PER_WORD;
8264 15 : if (m->fs.cfa_reg == stack_pointer_rtx)
8265 : {
8266 15 : m->fs.cfa_offset -= UNITS_PER_WORD;
8267 15 : rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8268 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8269 15 : add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8270 15 : RTX_FRAME_RELATED_P (insn_push) = 1;
8271 15 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8272 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8273 15 : add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8274 15 : RTX_FRAME_RELATED_P (insn_pop) = 1;
8275 : }
8276 15 : emit_insn (gen_blockage ());
8277 : }
8278 :
8279 83 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8280 83 : const int dope = 4 * UNITS_PER_WORD;
8281 :
8282 : /* If there is protection area, take it into account in the size. */
8283 83 : if (protection_area)
8284 25 : size += probe_interval + dope;
8285 :
8286 : /* If we allocate less than the size of the guard statically,
8287 : then no probing is necessary, but we do need to allocate
8288 : the stack. */
8289 58 : else if (size < (1 << param_stack_clash_protection_guard_size))
8290 : {
8291 37 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8292 : GEN_INT (-size), -1,
8293 37 : m->fs.cfa_reg == stack_pointer_rtx);
8294 37 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8295 37 : return;
8296 : }
8297 :
8298 : /* We're allocating a large enough stack frame that we need to
8299 : emit probes. Either emit them inline or in a loop depending
8300 : on the size. */
8301 46 : if (size <= 4 * probe_interval)
8302 : {
8303 : HOST_WIDE_INT i;
8304 49 : for (i = probe_interval; i <= size; i += probe_interval)
8305 : {
8306 : /* Allocate PROBE_INTERVAL bytes. */
8307 28 : rtx insn
8308 28 : = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8309 : GEN_INT (-probe_interval), -1,
8310 28 : m->fs.cfa_reg == stack_pointer_rtx);
8311 28 : add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8312 :
8313 : /* And probe at *sp. */
8314 28 : emit_stack_probe (stack_pointer_rtx);
8315 28 : emit_insn (gen_blockage ());
8316 : }
8317 :
8318 : /* We need to allocate space for the residual, but we do not need
8319 : to probe the residual... */
8320 21 : HOST_WIDE_INT residual = (i - probe_interval - size);
8321 21 : if (residual)
8322 : {
8323 21 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8324 : GEN_INT (residual), -1,
8325 21 : m->fs.cfa_reg == stack_pointer_rtx);
8326 :
8327 : /* ...except if there is a protection area to maintain. */
8328 21 : if (protection_area)
8329 12 : emit_stack_probe (stack_pointer_rtx);
8330 : }
8331 :
8332 21 : dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8333 : }
8334 : else
8335 : {
8336 : /* We expect the GP registers to be saved when probes are used
8337 : as the probing sequences might need a scratch register and
8338 : the routine to allocate one assumes the integer registers
8339 : have already been saved. */
8340 25 : gcc_assert (int_registers_saved);
8341 :
8342 25 : struct scratch_reg sr;
8343 25 : get_scratch_register_on_entry (&sr);
8344 :
8345 : /* If we needed to save a register, then account for any space
8346 : that was pushed (we are not going to pop the register when
8347 : we do the restore). */
8348 25 : if (sr.saved)
8349 0 : size -= UNITS_PER_WORD;
8350 :
8351 : /* Step 1: round SIZE down to a multiple of the interval. */
8352 25 : HOST_WIDE_INT rounded_size = size & -probe_interval;
8353 :
8354 : /* Step 2: compute final value of the loop counter. Use lea if
8355 : possible. */
8356 25 : rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8357 25 : rtx insn;
8358 25 : if (address_no_seg_operand (addr, Pmode))
8359 13 : insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8360 : else
8361 : {
8362 12 : emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8363 12 : insn = emit_insn (gen_rtx_SET (sr.reg,
8364 : gen_rtx_PLUS (Pmode, sr.reg,
8365 : stack_pointer_rtx)));
8366 : }
8367 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8368 : {
8369 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8370 22 : plus_constant (Pmode, sr.reg,
8371 22 : m->fs.cfa_offset + rounded_size));
8372 22 : RTX_FRAME_RELATED_P (insn) = 1;
8373 : }
8374 :
8375 : /* Step 3: the loop. */
8376 25 : rtx size_rtx = GEN_INT (rounded_size);
8377 25 : insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8378 : size_rtx));
8379 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8380 : {
8381 22 : m->fs.cfa_offset += rounded_size;
8382 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8383 22 : plus_constant (Pmode, stack_pointer_rtx,
8384 22 : m->fs.cfa_offset));
8385 22 : RTX_FRAME_RELATED_P (insn) = 1;
8386 : }
8387 25 : m->fs.sp_offset += rounded_size;
8388 25 : emit_insn (gen_blockage ());
8389 :
8390 : /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8391 : is equal to ROUNDED_SIZE. */
8392 :
8393 25 : if (size != rounded_size)
8394 : {
8395 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8396 : GEN_INT (rounded_size - size), -1,
8397 25 : m->fs.cfa_reg == stack_pointer_rtx);
8398 :
8399 25 : if (protection_area)
8400 13 : emit_stack_probe (stack_pointer_rtx);
8401 : }
8402 :
8403 25 : dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8404 :
8405 : /* This does not deallocate the space reserved for the scratch
8406 : register. That will be deallocated in the epilogue. */
8407 25 : release_scratch_register_on_entry (&sr, size, false);
8408 : }
8409 :
8410 : /* Adjust back to account for the protection area. */
8411 46 : if (protection_area)
8412 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8413 25 : GEN_INT (probe_interval + dope), -1,
8414 25 : m->fs.cfa_reg == stack_pointer_rtx);
8415 :
8416 : /* Make sure nothing is scheduled before we are done. */
8417 46 : emit_insn (gen_blockage ());
8418 : }
8419 :
8420 : /* Adjust the stack pointer up to REG while probing it. */
8421 :
8422 : const char *
8423 25 : output_adjust_stack_and_probe (rtx reg)
8424 : {
8425 25 : static int labelno = 0;
8426 25 : char loop_lab[32];
8427 25 : rtx xops[2];
8428 :
8429 25 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8430 :
8431 : /* Loop. */
8432 25 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8433 :
8434 : /* SP = SP + PROBE_INTERVAL. */
8435 25 : xops[0] = stack_pointer_rtx;
8436 37 : xops[1] = GEN_INT (get_probe_interval ());
8437 25 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8438 :
8439 : /* Probe at SP. */
8440 25 : xops[1] = const0_rtx;
8441 25 : output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8442 :
8443 : /* Test if SP == LAST_ADDR. */
8444 25 : xops[0] = stack_pointer_rtx;
8445 25 : xops[1] = reg;
8446 25 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8447 :
8448 : /* Branch. */
8449 25 : fputs ("\tjne\t", asm_out_file);
8450 25 : assemble_name_raw (asm_out_file, loop_lab);
8451 25 : fputc ('\n', asm_out_file);
8452 :
8453 25 : return "";
8454 : }
8455 :
8456 : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8457 : inclusive. These are offsets from the current stack pointer.
8458 :
8459 : INT_REGISTERS_SAVED is true if integer registers have already been
8460 : pushed on the stack. */
8461 :
8462 : static void
8463 0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8464 : const bool int_registers_saved)
8465 : {
8466 0 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8467 :
8468 : /* See if we have a constant small number of probes to generate. If so,
8469 : that's the easy case. The run-time loop is made up of 6 insns in the
8470 : generic case while the compile-time loop is made up of n insns for n #
8471 : of intervals. */
8472 0 : if (size <= 6 * probe_interval)
8473 : {
8474 : HOST_WIDE_INT i;
8475 :
8476 : /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8477 : it exceeds SIZE. If only one probe is needed, this will not
8478 : generate any code. Then probe at FIRST + SIZE. */
8479 0 : for (i = probe_interval; i < size; i += probe_interval)
8480 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8481 0 : -(first + i)));
8482 :
8483 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8484 0 : -(first + size)));
8485 : }
8486 :
8487 : /* Otherwise, do the same as above, but in a loop. Note that we must be
8488 : extra careful with variables wrapping around because we might be at
8489 : the very top (or the very bottom) of the address space and we have
8490 : to be able to handle this case properly; in particular, we use an
8491 : equality test for the loop condition. */
8492 : else
8493 : {
8494 : /* We expect the GP registers to be saved when probes are used
8495 : as the probing sequences might need a scratch register and
8496 : the routine to allocate one assumes the integer registers
8497 : have already been saved. */
8498 0 : gcc_assert (int_registers_saved);
8499 :
8500 0 : HOST_WIDE_INT rounded_size, last;
8501 0 : struct scratch_reg sr;
8502 :
8503 0 : get_scratch_register_on_entry (&sr);
8504 :
8505 :
8506 : /* Step 1: round SIZE to the previous multiple of the interval. */
8507 :
8508 0 : rounded_size = ROUND_DOWN (size, probe_interval);
8509 :
8510 :
8511 : /* Step 2: compute initial and final value of the loop counter. */
8512 :
8513 : /* TEST_OFFSET = FIRST. */
8514 0 : emit_move_insn (sr.reg, GEN_INT (-first));
8515 :
8516 : /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8517 0 : last = first + rounded_size;
8518 :
8519 :
8520 : /* Step 3: the loop
8521 :
8522 : do
8523 : {
8524 : TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8525 : probe at TEST_ADDR
8526 : }
8527 : while (TEST_ADDR != LAST_ADDR)
8528 :
8529 : probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8530 : until it is equal to ROUNDED_SIZE. */
8531 :
8532 0 : emit_insn
8533 0 : (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8534 :
8535 :
8536 : /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8537 : that SIZE is equal to ROUNDED_SIZE. */
8538 :
8539 0 : if (size != rounded_size)
8540 0 : emit_stack_probe (plus_constant (Pmode,
8541 0 : gen_rtx_PLUS (Pmode,
8542 : stack_pointer_rtx,
8543 : sr.reg),
8544 0 : rounded_size - size));
8545 :
8546 0 : release_scratch_register_on_entry (&sr, size, true);
8547 : }
8548 :
8549 : /* Make sure nothing is scheduled before we are done. */
8550 0 : emit_insn (gen_blockage ());
8551 0 : }
8552 :
8553 : /* Probe a range of stack addresses from REG to END, inclusive. These are
8554 : offsets from the current stack pointer. */
8555 :
8556 : const char *
8557 0 : output_probe_stack_range (rtx reg, rtx end)
8558 : {
8559 0 : static int labelno = 0;
8560 0 : char loop_lab[32];
8561 0 : rtx xops[3];
8562 :
8563 0 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8564 :
8565 : /* Loop. */
8566 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8567 :
8568 : /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8569 0 : xops[0] = reg;
8570 0 : xops[1] = GEN_INT (get_probe_interval ());
8571 0 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8572 :
8573 : /* Probe at TEST_ADDR. */
8574 0 : xops[0] = stack_pointer_rtx;
8575 0 : xops[1] = reg;
8576 0 : xops[2] = const0_rtx;
8577 0 : output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8578 :
8579 : /* Test if TEST_ADDR == LAST_ADDR. */
8580 0 : xops[0] = reg;
8581 0 : xops[1] = end;
8582 0 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8583 :
8584 : /* Branch. */
8585 0 : fputs ("\tjne\t", asm_out_file);
8586 0 : assemble_name_raw (asm_out_file, loop_lab);
8587 0 : fputc ('\n', asm_out_file);
8588 :
8589 0 : return "";
8590 : }
8591 :
8592 : /* Data passed to ix86_update_stack_alignment. */
8593 : struct stack_access_data
8594 : {
8595 : /* The stack access register. */
8596 : const_rtx reg;
8597 : /* Pointer to stack alignment. */
8598 : unsigned int *stack_alignment;
8599 : };
8600 :
8601 : /* Update the maximum stack slot alignment from memory alignment in PAT. */
8602 :
8603 : static void
8604 49896142 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8605 : {
8606 : /* This insn may reference stack slot. Update the maximum stack slot
8607 : alignment if the memory is referenced by the stack access register. */
8608 49896142 : stack_access_data *p = (stack_access_data *) data;
8609 :
8610 49896142 : subrtx_iterator::array_type array;
8611 189559678 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
8612 : {
8613 161429330 : auto op = *iter;
8614 161429330 : if (MEM_P (op))
8615 : {
8616 26632649 : if (reg_mentioned_p (p->reg, XEXP (op, 0)))
8617 : {
8618 21765794 : unsigned int alignment = MEM_ALIGN (op);
8619 :
8620 21765794 : if (alignment > *p->stack_alignment)
8621 55998 : *p->stack_alignment = alignment;
8622 : break;
8623 : }
8624 : else
8625 4866855 : iter.skip_subrtxes ();
8626 : }
8627 : }
8628 49896142 : }
8629 :
8630 : /* Helper function for ix86_find_all_reg_uses. */
8631 :
8632 : static void
8633 45242357 : ix86_find_all_reg_uses_1 (HARD_REG_SET ®set,
8634 : rtx set, unsigned int regno,
8635 : auto_bitmap &worklist)
8636 : {
8637 45242357 : rtx dest = SET_DEST (set);
8638 :
8639 45242357 : if (!REG_P (dest))
8640 40963381 : return;
8641 :
8642 : /* Reject non-Pmode modes. */
8643 34285907 : if (GET_MODE (dest) != Pmode)
8644 : return;
8645 :
8646 18109115 : unsigned int dst_regno = REGNO (dest);
8647 :
8648 18109115 : if (TEST_HARD_REG_BIT (regset, dst_regno))
8649 : return;
8650 :
8651 4278976 : const_rtx src = SET_SRC (set);
8652 :
8653 4278976 : subrtx_iterator::array_type array;
8654 8506059 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8655 : {
8656 5495156 : auto op = *iter;
8657 :
8658 5495156 : if (MEM_P (op))
8659 2995307 : iter.skip_subrtxes ();
8660 :
8661 5495156 : if (REG_P (op) && REGNO (op) == regno)
8662 : {
8663 : /* Add this register to register set. */
8664 1435809 : add_to_hard_reg_set (®set, Pmode, dst_regno);
8665 1268073 : bitmap_set_bit (worklist, dst_regno);
8666 1268073 : break;
8667 : }
8668 : }
8669 4278976 : }
8670 :
8671 : /* Find all registers defined with register REGNO. */
8672 :
8673 : static void
8674 2279754 : ix86_find_all_reg_uses (HARD_REG_SET ®set,
8675 : unsigned int regno, auto_bitmap &worklist)
8676 : {
8677 2279754 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8678 81417394 : ref != NULL;
8679 79137640 : ref = DF_REF_NEXT_REG (ref))
8680 : {
8681 79137640 : if (DF_REF_IS_ARTIFICIAL (ref))
8682 16565509 : continue;
8683 :
8684 62572131 : rtx_insn *insn = DF_REF_INSN (ref);
8685 :
8686 62572131 : if (!NONJUMP_INSN_P (insn))
8687 17946631 : continue;
8688 :
8689 44625500 : unsigned int ref_regno = DF_REF_REGNO (ref);
8690 :
8691 44625500 : rtx set = single_set (insn);
8692 44625500 : if (set)
8693 : {
8694 43867317 : ix86_find_all_reg_uses_1 (regset, set,
8695 : ref_regno, worklist);
8696 43867317 : continue;
8697 : }
8698 :
8699 758183 : rtx pat = PATTERN (insn);
8700 758183 : if (GET_CODE (pat) != PARALLEL)
8701 124772 : continue;
8702 :
8703 2411505 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8704 : {
8705 1778094 : rtx exp = XVECEXP (pat, 0, i);
8706 :
8707 1778094 : if (GET_CODE (exp) == SET)
8708 1375040 : ix86_find_all_reg_uses_1 (regset, exp,
8709 : ref_regno, worklist);
8710 : }
8711 : }
8712 2279754 : }
8713 :
8714 : /* Return true if the hard register REGNO used for a stack access is
8715 : defined in a basic block that dominates the block where it is used. */
8716 :
8717 : static bool
8718 21504333 : ix86_access_stack_p (unsigned int regno, basic_block bb,
8719 : HARD_REG_SET &set_up_by_prologue,
8720 : HARD_REG_SET &prologue_used)
8721 : {
8722 : /* Get all BBs which set REGNO and dominate the current BB from all
8723 : DEFs of REGNO. */
8724 21504333 : for (df_ref def = DF_REG_DEF_CHAIN (regno);
8725 2368599318 : def;
8726 2347094985 : def = DF_REF_NEXT_REG (def))
8727 2365433418 : if (!DF_REF_IS_ARTIFICIAL (def)
8728 2352479283 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
8729 1931259793 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
8730 : {
8731 1929802146 : basic_block set_bb = DF_REF_BB (def);
8732 1929802146 : if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
8733 : {
8734 77660654 : rtx_insn *insn = DF_REF_INSN (def);
8735 : /* Return true if INSN requires stack. */
8736 77660654 : if (requires_stack_frame_p (insn, prologue_used,
8737 : set_up_by_prologue))
8738 : return true;
8739 : }
8740 : }
8741 :
8742 : return false;
8743 : }
8744 :
8745 : /* Set stack_frame_required to false if stack frame isn't required.
8746 : Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8747 : slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8748 :
8749 : static void
8750 1471289 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8751 : bool check_stack_slot)
8752 : {
8753 1471289 : HARD_REG_SET set_up_by_prologue, prologue_used;
8754 1471289 : basic_block bb;
8755 :
8756 5885156 : CLEAR_HARD_REG_SET (prologue_used);
8757 1471289 : CLEAR_HARD_REG_SET (set_up_by_prologue);
8758 1597677 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8759 1471289 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8760 1471289 : add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8761 : HARD_FRAME_POINTER_REGNUM);
8762 :
8763 1471289 : bool require_stack_frame = false;
8764 :
8765 15856566 : FOR_EACH_BB_FN (bb, cfun)
8766 : {
8767 14385277 : rtx_insn *insn;
8768 90355241 : FOR_BB_INSNS (bb, insn)
8769 83831751 : if (NONDEBUG_INSN_P (insn)
8770 83831751 : && requires_stack_frame_p (insn, prologue_used,
8771 : set_up_by_prologue))
8772 : {
8773 : require_stack_frame = true;
8774 : break;
8775 : }
8776 : }
8777 :
8778 1471289 : cfun->machine->stack_frame_required = require_stack_frame;
8779 :
8780 : /* Stop if we don't need to check stack slot. */
8781 1471289 : if (!check_stack_slot)
8782 782680 : return;
8783 :
8784 : /* The preferred stack alignment is the minimum stack alignment. */
8785 688609 : if (stack_alignment > crtl->preferred_stack_boundary)
8786 142802 : stack_alignment = crtl->preferred_stack_boundary;
8787 :
8788 : HARD_REG_SET stack_slot_access;
8789 688609 : CLEAR_HARD_REG_SET (stack_slot_access);
8790 :
8791 : /* Stack slot can be accessed by stack pointer, frame pointer or
8792 : registers defined by stack pointer or frame pointer. */
8793 688609 : auto_bitmap worklist;
8794 :
8795 747984 : add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8796 688609 : bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8797 :
8798 688609 : if (frame_pointer_needed)
8799 : {
8800 332062 : add_to_hard_reg_set (&stack_slot_access, Pmode,
8801 : HARD_FRAME_POINTER_REGNUM);
8802 323072 : bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8803 : }
8804 :
8805 688609 : HARD_REG_SET hard_stack_slot_access = stack_slot_access;
8806 :
8807 688609 : calculate_dominance_info (CDI_DOMINATORS);
8808 :
8809 2279754 : unsigned int regno;
8810 :
8811 2279754 : do
8812 : {
8813 2279754 : regno = bitmap_clear_first_set_bit (worklist);
8814 2279754 : ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
8815 : }
8816 2279754 : while (!bitmap_empty_p (worklist));
8817 :
8818 688609 : hard_reg_set_iterator hrsi;
8819 688609 : stack_access_data data;
8820 :
8821 688609 : data.stack_alignment = &stack_alignment;
8822 :
8823 2968363 : EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8824 2279754 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8825 81417394 : ref != NULL;
8826 79137640 : ref = DF_REF_NEXT_REG (ref))
8827 : {
8828 79137640 : if (DF_REF_IS_ARTIFICIAL (ref))
8829 16565509 : continue;
8830 :
8831 62572131 : rtx_insn *insn = DF_REF_INSN (ref);
8832 :
8833 62572131 : if (!NONJUMP_INSN_P (insn))
8834 17946631 : continue;
8835 :
8836 44625500 : if (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
8837 44625500 : || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
8838 : set_up_by_prologue, prologue_used))
8839 : {
8840 : /* Update stack alignment if REGNO is used for stack
8841 : access. */
8842 41459600 : data.reg = DF_REF_REG (ref);
8843 41459600 : note_stores (insn, ix86_update_stack_alignment, &data);
8844 41459600 : continue;
8845 : }
8846 : }
8847 :
8848 688609 : free_dominance_info (CDI_DOMINATORS);
8849 688609 : }
8850 :
8851 : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8852 : will guide prologue/epilogue to be generated in correct form. */
8853 :
8854 : static void
8855 3419428 : ix86_finalize_stack_frame_flags (void)
8856 : {
8857 : /* Check if stack realign is really needed after reload, and
8858 : stores result in cfun */
8859 3419428 : unsigned int incoming_stack_boundary
8860 3419428 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8861 3419428 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8862 3419428 : unsigned int stack_alignment
8863 1174212 : = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8864 4593640 : ? crtl->max_used_stack_slot_alignment
8865 3419428 : : crtl->stack_alignment_needed);
8866 3419428 : unsigned int stack_realign
8867 3419428 : = (incoming_stack_boundary < stack_alignment);
8868 3419428 : bool recompute_frame_layout_p = false;
8869 :
8870 3419428 : if (crtl->stack_realign_finalized)
8871 : {
8872 : /* After stack_realign_needed is finalized, we can't no longer
8873 : change it. */
8874 1948139 : gcc_assert (crtl->stack_realign_needed == stack_realign);
8875 1948139 : return;
8876 : }
8877 :
8878 : /* It is always safe to compute max_used_stack_alignment. We
8879 : compute it only if 128-bit aligned load/store may be generated
8880 : on misaligned stack slot which will lead to segfault. */
8881 2942578 : bool check_stack_slot
8882 1471289 : = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8883 1471289 : ix86_find_max_used_stack_alignment (stack_alignment,
8884 : check_stack_slot);
8885 :
8886 : /* If the only reason for frame_pointer_needed is that we conservatively
8887 : assumed stack realignment might be needed or -fno-omit-frame-pointer
8888 : is used, but in the end nothing that needed the stack alignment had
8889 : been spilled nor stack access, clear frame_pointer_needed and say we
8890 : don't need stack realignment.
8891 :
8892 : When vector register is used for piecewise move and store, we don't
8893 : increase stack_alignment_needed as there is no register spill for
8894 : piecewise move and store. Since stack_realign_needed is set to true
8895 : by checking stack_alignment_estimated which is updated by pseudo
8896 : vector register usage, we also need to check stack_realign_needed to
8897 : eliminate frame pointer. */
8898 1471289 : if ((stack_realign
8899 1405089 : || (!flag_omit_frame_pointer && optimize)
8900 1394843 : || crtl->stack_realign_needed)
8901 77097 : && frame_pointer_needed
8902 77097 : && crtl->is_leaf
8903 52638 : && crtl->sp_is_unchanging
8904 52586 : && !ix86_current_function_calls_tls_descriptor
8905 52586 : && !crtl->accesses_prior_frames
8906 52586 : && !cfun->calls_alloca
8907 52586 : && !crtl->calls_eh_return
8908 : /* See ira_setup_eliminable_regset for the rationale. */
8909 52586 : && !(STACK_CHECK_MOVING_SP
8910 52586 : && flag_stack_check
8911 0 : && flag_exceptions
8912 0 : && cfun->can_throw_non_call_exceptions)
8913 52586 : && !ix86_frame_pointer_required ()
8914 52585 : && ix86_get_frame_size () == 0
8915 34913 : && ix86_nsaved_sseregs () == 0
8916 1506202 : && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8917 : {
8918 34913 : if (cfun->machine->stack_frame_required)
8919 : {
8920 : /* Stack frame is required. If stack alignment needed is less
8921 : than incoming stack boundary, don't realign stack. */
8922 272 : stack_realign = incoming_stack_boundary < stack_alignment;
8923 272 : if (!stack_realign)
8924 : {
8925 272 : crtl->max_used_stack_slot_alignment
8926 272 : = incoming_stack_boundary;
8927 272 : crtl->stack_alignment_needed
8928 272 : = incoming_stack_boundary;
8929 : /* Also update preferred_stack_boundary for leaf
8930 : functions. */
8931 272 : crtl->preferred_stack_boundary
8932 272 : = incoming_stack_boundary;
8933 : }
8934 : }
8935 : else
8936 : {
8937 : /* If drap has been set, but it actually isn't live at the
8938 : start of the function, there is no reason to set it up. */
8939 34641 : if (crtl->drap_reg)
8940 : {
8941 35 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8942 70 : if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8943 : REGNO (crtl->drap_reg)))
8944 : {
8945 35 : crtl->drap_reg = NULL_RTX;
8946 35 : crtl->need_drap = false;
8947 : }
8948 : }
8949 : else
8950 34606 : cfun->machine->no_drap_save_restore = true;
8951 :
8952 34641 : frame_pointer_needed = false;
8953 34641 : stack_realign = false;
8954 34641 : crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8955 34641 : crtl->stack_alignment_needed = incoming_stack_boundary;
8956 34641 : crtl->stack_alignment_estimated = incoming_stack_boundary;
8957 34641 : if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8958 1 : crtl->preferred_stack_boundary = incoming_stack_boundary;
8959 34641 : df_finish_pass (true);
8960 34641 : df_scan_alloc (NULL);
8961 34641 : df_scan_blocks ();
8962 34641 : df_compute_regs_ever_live (true);
8963 34641 : df_analyze ();
8964 :
8965 34641 : if (flag_var_tracking)
8966 : {
8967 : /* Since frame pointer is no longer available, replace it with
8968 : stack pointer - UNITS_PER_WORD in debug insns. */
8969 133 : df_ref ref, next;
8970 133 : for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8971 133 : ref; ref = next)
8972 : {
8973 0 : next = DF_REF_NEXT_REG (ref);
8974 0 : if (!DF_REF_INSN_INFO (ref))
8975 0 : continue;
8976 :
8977 : /* Make sure the next ref is for a different instruction,
8978 : so that we're not affected by the rescan. */
8979 0 : rtx_insn *insn = DF_REF_INSN (ref);
8980 0 : while (next && DF_REF_INSN (next) == insn)
8981 0 : next = DF_REF_NEXT_REG (next);
8982 :
8983 0 : if (DEBUG_INSN_P (insn))
8984 : {
8985 : bool changed = false;
8986 0 : for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8987 : {
8988 0 : rtx *loc = DF_REF_LOC (ref);
8989 0 : if (*loc == hard_frame_pointer_rtx)
8990 : {
8991 0 : *loc = plus_constant (Pmode,
8992 : stack_pointer_rtx,
8993 0 : -UNITS_PER_WORD);
8994 0 : changed = true;
8995 : }
8996 : }
8997 0 : if (changed)
8998 0 : df_insn_rescan (insn);
8999 : }
9000 : }
9001 : }
9002 :
9003 : recompute_frame_layout_p = true;
9004 : }
9005 : }
9006 1436376 : else if (crtl->max_used_stack_slot_alignment >= 128
9007 652667 : && cfun->machine->stack_frame_required)
9008 : {
9009 : /* We don't need to realign stack. max_used_stack_alignment is
9010 : used to decide how stack frame should be aligned. This is
9011 : independent of any psABIs nor 32-bit vs 64-bit. */
9012 607691 : cfun->machine->max_used_stack_alignment
9013 607691 : = stack_alignment / BITS_PER_UNIT;
9014 : }
9015 :
9016 1471289 : if (crtl->stack_realign_needed != stack_realign)
9017 35147 : recompute_frame_layout_p = true;
9018 1471289 : crtl->stack_realign_needed = stack_realign;
9019 1471289 : crtl->stack_realign_finalized = true;
9020 1471289 : if (recompute_frame_layout_p)
9021 35240 : ix86_compute_frame_layout ();
9022 : }
9023 :
9024 : /* Delete SET_GOT right after entry block if it is allocated to reg. */
9025 :
9026 : static void
9027 0 : ix86_elim_entry_set_got (rtx reg)
9028 : {
9029 0 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9030 0 : rtx_insn *c_insn = BB_HEAD (bb);
9031 0 : if (!NONDEBUG_INSN_P (c_insn))
9032 0 : c_insn = next_nonnote_nondebug_insn (c_insn);
9033 0 : if (c_insn && NONJUMP_INSN_P (c_insn))
9034 : {
9035 0 : rtx pat = PATTERN (c_insn);
9036 0 : if (GET_CODE (pat) == PARALLEL)
9037 : {
9038 0 : rtx set = XVECEXP (pat, 0, 0);
9039 0 : if (GET_CODE (set) == SET
9040 0 : && GET_CODE (SET_SRC (set)) == UNSPEC
9041 0 : && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
9042 0 : && REGNO (SET_DEST (set)) == REGNO (reg))
9043 0 : delete_insn (c_insn);
9044 : }
9045 : }
9046 0 : }
9047 :
9048 : static rtx
9049 193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
9050 : {
9051 193166 : rtx addr, mem;
9052 :
9053 193166 : if (offset)
9054 184480 : addr = plus_constant (Pmode, frame_reg, offset);
9055 193166 : mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
9056 193166 : return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
9057 : }
9058 :
9059 : static inline rtx
9060 100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
9061 : {
9062 100333 : return gen_frame_set (reg, frame_reg, offset, false);
9063 : }
9064 :
9065 : static inline rtx
9066 92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
9067 : {
9068 92833 : return gen_frame_set (reg, frame_reg, offset, true);
9069 : }
9070 :
9071 : static void
9072 7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
9073 : {
9074 7045 : struct machine_function *m = cfun->machine;
9075 7045 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9076 7045 : + m->call_ms2sysv_extra_regs;
9077 7045 : rtvec v = rtvec_alloc (ncregs + 1);
9078 7045 : unsigned int align, i, vi = 0;
9079 7045 : rtx_insn *insn;
9080 7045 : rtx sym, addr;
9081 7045 : rtx rax = gen_rtx_REG (word_mode, AX_REG);
9082 7045 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9083 :
9084 : /* AL should only be live with sysv_abi. */
9085 7045 : gcc_assert (!ix86_eax_live_at_start_p ());
9086 7045 : gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
9087 :
9088 : /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
9089 : we've actually realigned the stack or not. */
9090 7045 : align = GET_MODE_ALIGNMENT (V4SFmode);
9091 7045 : addr = choose_baseaddr (frame.stack_realign_offset
9092 7045 : + xlogue.get_stub_ptr_offset (), &align, AX_REG);
9093 7045 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9094 :
9095 7045 : emit_insn (gen_rtx_SET (rax, addr));
9096 :
9097 : /* Get the stub symbol. */
9098 8327 : sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
9099 : : XLOGUE_STUB_SAVE);
9100 7045 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9101 :
9102 99878 : for (i = 0; i < ncregs; ++i)
9103 : {
9104 92833 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9105 92833 : rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
9106 92833 : r.regno);
9107 92833 : RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
9108 : }
9109 :
9110 7045 : gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
9111 :
9112 7045 : insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
9113 7045 : RTX_FRAME_RELATED_P (insn) = true;
9114 7045 : }
9115 :
9116 : /* Generate and return an insn body to AND X with Y. */
9117 :
9118 : static rtx_insn *
9119 31704 : gen_and2_insn (rtx x, rtx y)
9120 : {
9121 31704 : enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
9122 :
9123 31704 : gcc_assert (insn_operand_matches (icode, 0, x));
9124 31704 : gcc_assert (insn_operand_matches (icode, 1, x));
9125 31704 : gcc_assert (insn_operand_matches (icode, 2, y));
9126 :
9127 31704 : return GEN_FCN (icode) (x, x, y);
9128 : }
9129 :
9130 : /* Expand the prologue into a bunch of separate insns. */
9131 :
9132 : void
9133 1517128 : ix86_expand_prologue (void)
9134 : {
9135 1517128 : struct machine_function *m = cfun->machine;
9136 1517128 : rtx insn, t;
9137 1517128 : HOST_WIDE_INT allocate;
9138 1517128 : bool int_registers_saved;
9139 1517128 : bool sse_registers_saved;
9140 1517128 : bool save_stub_call_needed;
9141 1517128 : rtx static_chain = NULL_RTX;
9142 :
9143 1517128 : ix86_last_zero_store_uid = 0;
9144 1517128 : if (ix86_function_naked (current_function_decl))
9145 : {
9146 74 : if (flag_stack_usage_info)
9147 0 : current_function_static_stack_size = 0;
9148 74 : return;
9149 : }
9150 :
9151 1517054 : ix86_finalize_stack_frame_flags ();
9152 :
9153 : /* DRAP should not coexist with stack_realign_fp */
9154 1517054 : gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9155 :
9156 1517054 : memset (&m->fs, 0, sizeof (m->fs));
9157 :
9158 : /* Initialize CFA state for before the prologue. */
9159 1517054 : m->fs.cfa_reg = stack_pointer_rtx;
9160 1517054 : m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9161 :
9162 : /* Track SP offset to the CFA. We continue tracking this after we've
9163 : swapped the CFA register away from SP. In the case of re-alignment
9164 : this is fudged; we're interested to offsets within the local frame. */
9165 1517054 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9166 1517054 : m->fs.sp_valid = true;
9167 1517054 : m->fs.sp_realigned = false;
9168 :
9169 1517054 : const struct ix86_frame &frame = cfun->machine->frame;
9170 :
9171 1517054 : if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9172 : {
9173 : /* We should have already generated an error for any use of
9174 : ms_hook on a nested function. */
9175 0 : gcc_checking_assert (!ix86_static_chain_on_stack);
9176 :
9177 : /* Check if profiling is active and we shall use profiling before
9178 : prologue variant. If so sorry. */
9179 0 : if (crtl->profile && flag_fentry != 0)
9180 0 : sorry ("%<ms_hook_prologue%> attribute is not compatible "
9181 : "with %<-mfentry%> for 32-bit");
9182 :
9183 : /* In ix86_asm_output_function_label we emitted:
9184 : 8b ff movl.s %edi,%edi
9185 : 55 push %ebp
9186 : 8b ec movl.s %esp,%ebp
9187 :
9188 : This matches the hookable function prologue in Win32 API
9189 : functions in Microsoft Windows XP Service Pack 2 and newer.
9190 : Wine uses this to enable Windows apps to hook the Win32 API
9191 : functions provided by Wine.
9192 :
9193 : What that means is that we've already set up the frame pointer. */
9194 :
9195 0 : if (frame_pointer_needed
9196 0 : && !(crtl->drap_reg && crtl->stack_realign_needed))
9197 : {
9198 0 : rtx push, mov;
9199 :
9200 : /* We've decided to use the frame pointer already set up.
9201 : Describe this to the unwinder by pretending that both
9202 : push and mov insns happen right here.
9203 :
9204 : Putting the unwind info here at the end of the ms_hook
9205 : is done so that we can make absolutely certain we get
9206 : the required byte sequence at the start of the function,
9207 : rather than relying on an assembler that can produce
9208 : the exact encoding required.
9209 :
9210 : However it does mean (in the unpatched case) that we have
9211 : a 1 insn window where the asynchronous unwind info is
9212 : incorrect. However, if we placed the unwind info at
9213 : its correct location we would have incorrect unwind info
9214 : in the patched case. Which is probably all moot since
9215 : I don't expect Wine generates dwarf2 unwind info for the
9216 : system libraries that use this feature. */
9217 :
9218 0 : insn = emit_insn (gen_blockage ());
9219 :
9220 0 : push = gen_push (hard_frame_pointer_rtx);
9221 0 : mov = gen_rtx_SET (hard_frame_pointer_rtx,
9222 : stack_pointer_rtx);
9223 0 : RTX_FRAME_RELATED_P (push) = 1;
9224 0 : RTX_FRAME_RELATED_P (mov) = 1;
9225 :
9226 0 : RTX_FRAME_RELATED_P (insn) = 1;
9227 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9228 : gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9229 :
9230 : /* Note that gen_push incremented m->fs.cfa_offset, even
9231 : though we didn't emit the push insn here. */
9232 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9233 0 : m->fs.fp_offset = m->fs.cfa_offset;
9234 0 : m->fs.fp_valid = true;
9235 0 : }
9236 : else
9237 : {
9238 : /* The frame pointer is not needed so pop %ebp again.
9239 : This leaves us with a pristine state. */
9240 0 : emit_insn (gen_pop (hard_frame_pointer_rtx));
9241 : }
9242 : }
9243 :
9244 : /* The first insn of a function that accepts its static chain on the
9245 : stack is to push the register that would be filled in by a direct
9246 : call. This insn will be skipped by the trampoline. */
9247 1517054 : else if (ix86_static_chain_on_stack)
9248 : {
9249 0 : static_chain = ix86_static_chain (cfun->decl, false);
9250 0 : insn = emit_insn (gen_push (static_chain));
9251 0 : emit_insn (gen_blockage ());
9252 :
9253 : /* We don't want to interpret this push insn as a register save,
9254 : only as a stack adjustment. The real copy of the register as
9255 : a save will be done later, if needed. */
9256 0 : t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9257 0 : t = gen_rtx_SET (stack_pointer_rtx, t);
9258 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9259 0 : RTX_FRAME_RELATED_P (insn) = 1;
9260 : }
9261 :
9262 : /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9263 : of DRAP is needed and stack realignment is really needed after reload */
9264 1517054 : if (stack_realign_drap)
9265 : {
9266 7065 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9267 :
9268 : /* Can't use DRAP in interrupt function. */
9269 7065 : if (cfun->machine->func_type != TYPE_NORMAL)
9270 0 : sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9271 : "in interrupt service routine. This may be worked "
9272 : "around by avoiding functions with aggregate return.");
9273 :
9274 : /* Only need to push parameter pointer reg if it is caller saved. */
9275 7065 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9276 : {
9277 : /* Push arg pointer reg */
9278 136 : insn = emit_insn (gen_push (crtl->drap_reg));
9279 136 : RTX_FRAME_RELATED_P (insn) = 1;
9280 : }
9281 :
9282 : /* Grab the argument pointer. */
9283 7350 : t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9284 7065 : insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9285 7065 : RTX_FRAME_RELATED_P (insn) = 1;
9286 7065 : m->fs.cfa_reg = crtl->drap_reg;
9287 7065 : m->fs.cfa_offset = 0;
9288 :
9289 : /* Align the stack. */
9290 7065 : insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9291 7065 : GEN_INT (-align_bytes)));
9292 7065 : RTX_FRAME_RELATED_P (insn) = 1;
9293 :
9294 : /* Replicate the return address on the stack so that return
9295 : address can be reached via (argp - 1) slot. This is needed
9296 : to implement macro RETURN_ADDR_RTX and intrinsic function
9297 : expand_builtin_return_addr etc. */
9298 7635 : t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9299 7065 : t = gen_frame_mem (word_mode, t);
9300 7065 : insn = emit_insn (gen_push (t));
9301 7065 : RTX_FRAME_RELATED_P (insn) = 1;
9302 :
9303 : /* For the purposes of frame and register save area addressing,
9304 : we've started over with a new frame. */
9305 7065 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9306 7065 : m->fs.realigned = true;
9307 :
9308 7065 : if (static_chain)
9309 : {
9310 : /* Replicate static chain on the stack so that static chain
9311 : can be reached via (argp - 2) slot. This is needed for
9312 : nested function with stack realignment. */
9313 0 : insn = emit_insn (gen_push (static_chain));
9314 0 : RTX_FRAME_RELATED_P (insn) = 1;
9315 : }
9316 : }
9317 :
9318 1517054 : int_registers_saved = (frame.nregs == 0);
9319 1517054 : sse_registers_saved = (frame.nsseregs == 0);
9320 1517054 : save_stub_call_needed = (m->call_ms2sysv);
9321 1517054 : gcc_assert (sse_registers_saved || !save_stub_call_needed);
9322 :
9323 1517054 : if (frame_pointer_needed && !m->fs.fp_valid)
9324 : {
9325 : /* Note: AT&T enter does NOT have reversed args. Enter is probably
9326 : slower on all targets. Also sdb didn't like it. */
9327 473088 : insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9328 473088 : RTX_FRAME_RELATED_P (insn) = 1;
9329 :
9330 473088 : if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9331 : {
9332 473088 : insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9333 473088 : RTX_FRAME_RELATED_P (insn) = 1;
9334 :
9335 473088 : if (m->fs.cfa_reg == stack_pointer_rtx)
9336 466023 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9337 473088 : m->fs.fp_offset = m->fs.sp_offset;
9338 473088 : m->fs.fp_valid = true;
9339 : }
9340 : }
9341 :
9342 1517054 : if (!int_registers_saved)
9343 : {
9344 : /* If saving registers via PUSH, do so now. */
9345 472559 : if (!frame.save_regs_using_mov)
9346 : {
9347 426733 : ix86_emit_save_regs ();
9348 426733 : m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9349 426733 : int_registers_saved = true;
9350 426733 : gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9351 : }
9352 :
9353 : /* When using red zone we may start register saving before allocating
9354 : the stack frame saving one cycle of the prologue. However, avoid
9355 : doing this if we have to probe the stack; at least on x86_64 the
9356 : stack probe can turn into a call that clobbers a red zone location. */
9357 45826 : else if (ix86_using_red_zone ()
9358 45826 : && (! TARGET_STACK_PROBE
9359 0 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9360 : {
9361 41446 : HOST_WIDE_INT allocate_offset;
9362 41446 : if (crtl->shrink_wrapped_separate)
9363 : {
9364 41390 : allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
9365 :
9366 : /* Adjust the total offset at the beginning of the function. */
9367 41390 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9368 : GEN_INT (allocate_offset), -1,
9369 41390 : m->fs.cfa_reg == stack_pointer_rtx);
9370 41390 : m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
9371 : }
9372 :
9373 41446 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9374 41446 : int_registers_saved = true;
9375 : }
9376 : }
9377 :
9378 1517054 : if (frame.red_zone_size != 0)
9379 137627 : cfun->machine->red_zone_used = true;
9380 :
9381 1517054 : if (stack_realign_fp)
9382 : {
9383 24639 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9384 24991 : gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9385 :
9386 : /* Record last valid frame pointer offset. */
9387 24639 : m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9388 :
9389 : /* The computation of the size of the re-aligned stack frame means
9390 : that we must allocate the size of the register save area before
9391 : performing the actual alignment. Otherwise we cannot guarantee
9392 : that there's enough storage above the realignment point. */
9393 24639 : allocate = frame.reg_save_offset - m->fs.sp_offset
9394 24639 : + frame.stack_realign_allocate;
9395 24639 : if (allocate)
9396 2691 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9397 : GEN_INT (-allocate), -1, false);
9398 :
9399 : /* Align the stack. */
9400 24639 : emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9401 24639 : m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9402 24639 : m->fs.sp_realigned_offset = m->fs.sp_offset
9403 24639 : - frame.stack_realign_allocate;
9404 : /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9405 : Beyond this point, stack access should be done via choose_baseaddr or
9406 : by using sp_valid_at and fp_valid_at to determine the correct base
9407 : register. Henceforth, any CFA offset should be thought of as logical
9408 : and not physical. */
9409 24639 : gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9410 24639 : gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9411 24639 : m->fs.sp_realigned = true;
9412 :
9413 : /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9414 : is needed to describe where a register is saved using a realigned
9415 : stack pointer, so we need to invalidate the stack pointer for that
9416 : target. */
9417 24639 : if (TARGET_SEH)
9418 : m->fs.sp_valid = false;
9419 :
9420 : /* If SP offset is non-immediate after allocation of the stack frame,
9421 : then emit SSE saves or stub call prior to allocating the rest of the
9422 : stack frame. This is less efficient for the out-of-line stub because
9423 : we can't combine allocations across the call barrier, but it's better
9424 : than using a scratch register. */
9425 24639 : else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9426 : - m->fs.sp_realigned_offset),
9427 24639 : Pmode))
9428 : {
9429 3 : if (!sse_registers_saved)
9430 : {
9431 1 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9432 1 : sse_registers_saved = true;
9433 : }
9434 2 : else if (save_stub_call_needed)
9435 : {
9436 1 : ix86_emit_outlined_ms2sysv_save (frame);
9437 1 : save_stub_call_needed = false;
9438 : }
9439 : }
9440 : }
9441 :
9442 1517054 : allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9443 :
9444 1517054 : if (flag_stack_usage_info)
9445 : {
9446 : /* We start to count from ARG_POINTER. */
9447 355 : HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9448 :
9449 : /* If it was realigned, take into account the fake frame. */
9450 355 : if (stack_realign_drap)
9451 : {
9452 1 : if (ix86_static_chain_on_stack)
9453 0 : stack_size += UNITS_PER_WORD;
9454 :
9455 1 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9456 0 : stack_size += UNITS_PER_WORD;
9457 :
9458 : /* This over-estimates by 1 minimal-stack-alignment-unit but
9459 : mitigates that by counting in the new return address slot. */
9460 1 : current_function_dynamic_stack_size
9461 1 : += crtl->stack_alignment_needed / BITS_PER_UNIT;
9462 : }
9463 :
9464 355 : current_function_static_stack_size = stack_size;
9465 : }
9466 :
9467 : /* On SEH target with very large frame size, allocate an area to save
9468 : SSE registers (as the very large allocation won't be described). */
9469 1517054 : if (TARGET_SEH
9470 : && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9471 : && !sse_registers_saved)
9472 : {
9473 : HOST_WIDE_INT sse_size
9474 : = frame.sse_reg_save_offset - frame.reg_save_offset;
9475 :
9476 : gcc_assert (int_registers_saved);
9477 :
9478 : /* No need to do stack checking as the area will be immediately
9479 : written. */
9480 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9481 : GEN_INT (-sse_size), -1,
9482 : m->fs.cfa_reg == stack_pointer_rtx);
9483 : allocate -= sse_size;
9484 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9485 : sse_registers_saved = true;
9486 : }
9487 :
9488 : /* If stack clash protection is requested, then probe the stack, unless it
9489 : is already probed on the target. */
9490 1517054 : if (allocate >= 0
9491 1517050 : && flag_stack_clash_protection
9492 1517151 : && !ix86_target_stack_probe ())
9493 : {
9494 97 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9495 97 : allocate = 0;
9496 : }
9497 :
9498 : /* The stack has already been decremented by the instruction calling us
9499 : so probe if the size is non-negative to preserve the protection area. */
9500 1516957 : else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9501 : {
9502 46 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
9503 :
9504 46 : if (STACK_CHECK_MOVING_SP)
9505 : {
9506 46 : if (crtl->is_leaf
9507 18 : && !cfun->calls_alloca
9508 18 : && allocate <= probe_interval)
9509 : ;
9510 :
9511 : else
9512 : {
9513 29 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9514 29 : allocate = 0;
9515 : }
9516 : }
9517 :
9518 : else
9519 : {
9520 : HOST_WIDE_INT size = allocate;
9521 :
9522 : if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9523 : size = 0x80000000 - get_stack_check_protect () - 1;
9524 :
9525 : if (TARGET_STACK_PROBE)
9526 : {
9527 : if (crtl->is_leaf && !cfun->calls_alloca)
9528 : {
9529 : if (size > probe_interval)
9530 : ix86_emit_probe_stack_range (0, size, int_registers_saved);
9531 : }
9532 : else
9533 : ix86_emit_probe_stack_range (0,
9534 : size + get_stack_check_protect (),
9535 : int_registers_saved);
9536 : }
9537 : else
9538 : {
9539 : if (crtl->is_leaf && !cfun->calls_alloca)
9540 : {
9541 : if (size > probe_interval
9542 : && size > get_stack_check_protect ())
9543 : ix86_emit_probe_stack_range (get_stack_check_protect (),
9544 : (size
9545 : - get_stack_check_protect ()),
9546 : int_registers_saved);
9547 : }
9548 : else
9549 : ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9550 : int_registers_saved);
9551 : }
9552 : }
9553 : }
9554 :
9555 1517050 : if (allocate == 0)
9556 : ;
9557 837255 : else if (!ix86_target_stack_probe ()
9558 837255 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9559 : {
9560 837210 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9561 : GEN_INT (-allocate), -1,
9562 837210 : m->fs.cfa_reg == stack_pointer_rtx);
9563 : }
9564 : else
9565 : {
9566 45 : rtx eax = gen_rtx_REG (Pmode, AX_REG);
9567 45 : rtx r10 = NULL;
9568 45 : const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9569 45 : bool eax_live = ix86_eax_live_at_start_p ();
9570 45 : bool r10_live = false;
9571 :
9572 45 : if (TARGET_64BIT)
9573 45 : r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9574 :
9575 45 : if (eax_live)
9576 : {
9577 0 : insn = emit_insn (gen_push (eax));
9578 0 : allocate -= UNITS_PER_WORD;
9579 : /* Note that SEH directives need to continue tracking the stack
9580 : pointer even after the frame pointer has been set up. */
9581 0 : if (sp_is_cfa_reg || TARGET_SEH)
9582 : {
9583 0 : if (sp_is_cfa_reg)
9584 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9585 0 : RTX_FRAME_RELATED_P (insn) = 1;
9586 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9587 0 : gen_rtx_SET (stack_pointer_rtx,
9588 : plus_constant (Pmode,
9589 : stack_pointer_rtx,
9590 : -UNITS_PER_WORD)));
9591 : }
9592 : }
9593 :
9594 45 : if (r10_live)
9595 : {
9596 0 : r10 = gen_rtx_REG (Pmode, R10_REG);
9597 0 : insn = emit_insn (gen_push (r10));
9598 0 : allocate -= UNITS_PER_WORD;
9599 0 : if (sp_is_cfa_reg || TARGET_SEH)
9600 : {
9601 0 : if (sp_is_cfa_reg)
9602 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9603 0 : RTX_FRAME_RELATED_P (insn) = 1;
9604 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9605 0 : gen_rtx_SET (stack_pointer_rtx,
9606 : plus_constant (Pmode,
9607 : stack_pointer_rtx,
9608 : -UNITS_PER_WORD)));
9609 : }
9610 : }
9611 :
9612 45 : emit_move_insn (eax, GEN_INT (allocate));
9613 45 : emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9614 :
9615 : /* Use the fact that AX still contains ALLOCATE. */
9616 45 : insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9617 45 : (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9618 :
9619 45 : if (sp_is_cfa_reg || TARGET_SEH)
9620 : {
9621 37 : if (sp_is_cfa_reg)
9622 37 : m->fs.cfa_offset += allocate;
9623 37 : RTX_FRAME_RELATED_P (insn) = 1;
9624 37 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9625 37 : gen_rtx_SET (stack_pointer_rtx,
9626 : plus_constant (Pmode, stack_pointer_rtx,
9627 : -allocate)));
9628 : }
9629 45 : m->fs.sp_offset += allocate;
9630 :
9631 : /* Use stack_pointer_rtx for relative addressing so that code works for
9632 : realigned stack. But this means that we need a blockage to prevent
9633 : stores based on the frame pointer from being scheduled before. */
9634 45 : if (r10_live && eax_live)
9635 : {
9636 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9637 0 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9638 : gen_frame_mem (word_mode, t));
9639 0 : t = plus_constant (Pmode, t, UNITS_PER_WORD);
9640 0 : emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9641 : gen_frame_mem (word_mode, t));
9642 0 : emit_insn (gen_memory_blockage ());
9643 : }
9644 45 : else if (eax_live || r10_live)
9645 : {
9646 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9647 0 : emit_move_insn (gen_rtx_REG (word_mode,
9648 : (eax_live ? AX_REG : R10_REG)),
9649 : gen_frame_mem (word_mode, t));
9650 0 : emit_insn (gen_memory_blockage ());
9651 : }
9652 : }
9653 1517054 : gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9654 :
9655 : /* If we havn't already set up the frame pointer, do so now. */
9656 1517054 : if (frame_pointer_needed && !m->fs.fp_valid)
9657 : {
9658 0 : insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9659 0 : GEN_INT (frame.stack_pointer_offset
9660 : - frame.hard_frame_pointer_offset));
9661 0 : insn = emit_insn (insn);
9662 0 : RTX_FRAME_RELATED_P (insn) = 1;
9663 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9664 :
9665 0 : if (m->fs.cfa_reg == stack_pointer_rtx)
9666 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9667 0 : m->fs.fp_offset = frame.hard_frame_pointer_offset;
9668 0 : m->fs.fp_valid = true;
9669 : }
9670 :
9671 1517054 : if (!int_registers_saved)
9672 4380 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9673 1517054 : if (!sse_registers_saved)
9674 33352 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9675 1483702 : else if (save_stub_call_needed)
9676 7044 : ix86_emit_outlined_ms2sysv_save (frame);
9677 :
9678 : /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9679 : in PROLOGUE. */
9680 1517054 : if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9681 : {
9682 0 : rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9683 0 : insn = emit_insn (gen_set_got (pic));
9684 0 : RTX_FRAME_RELATED_P (insn) = 1;
9685 0 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9686 0 : emit_insn (gen_prologue_use (pic));
9687 : /* Deleting already emmitted SET_GOT if exist and allocated to
9688 : REAL_PIC_OFFSET_TABLE_REGNUM. */
9689 0 : ix86_elim_entry_set_got (pic);
9690 : }
9691 :
9692 1517054 : if (crtl->drap_reg && !crtl->stack_realign_needed)
9693 : {
9694 : /* vDRAP is setup but after reload it turns out stack realign
9695 : isn't necessary, here we will emit prologue to setup DRAP
9696 : without stack realign adjustment */
9697 177 : t = choose_baseaddr (0, NULL);
9698 177 : emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9699 : }
9700 :
9701 : /* Prevent instructions from being scheduled into register save push
9702 : sequence when access to the redzone area is done through frame pointer.
9703 : The offset between the frame pointer and the stack pointer is calculated
9704 : relative to the value of the stack pointer at the end of the function
9705 : prologue, and moving instructions that access redzone area via frame
9706 : pointer inside push sequence violates this assumption. */
9707 1517054 : if (frame_pointer_needed && frame.red_zone_size)
9708 126623 : emit_insn (gen_memory_blockage ());
9709 :
9710 : /* SEH requires that the prologue end within 256 bytes of the start of
9711 : the function. Prevent instruction schedules that would extend that.
9712 : Further, prevent alloca modifications to the stack pointer from being
9713 : combined with prologue modifications. */
9714 : if (TARGET_SEH)
9715 : emit_insn (gen_prologue_use (stack_pointer_rtx));
9716 : }
9717 :
9718 : /* Emit code to restore REG using a POP or POPP insn. */
9719 :
9720 : static void
9721 1455725 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9722 : {
9723 1455725 : struct machine_function *m = cfun->machine;
9724 1455725 : rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9725 :
9726 1455725 : ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9727 1455725 : m->fs.sp_offset -= UNITS_PER_WORD;
9728 :
9729 1455725 : if (m->fs.cfa_reg == crtl->drap_reg
9730 1455725 : && REGNO (reg) == REGNO (crtl->drap_reg))
9731 : {
9732 : /* Previously we'd represented the CFA as an expression
9733 : like *(%ebp - 8). We've just popped that value from
9734 : the stack, which means we need to reset the CFA to
9735 : the drap register. This will remain until we restore
9736 : the stack pointer. */
9737 4021 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9738 4021 : RTX_FRAME_RELATED_P (insn) = 1;
9739 :
9740 : /* This means that the DRAP register is valid for addressing too. */
9741 4021 : m->fs.drap_valid = true;
9742 4021 : return;
9743 : }
9744 :
9745 1451704 : if (m->fs.cfa_reg == stack_pointer_rtx)
9746 : {
9747 1373382 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9748 1011297 : x = gen_rtx_SET (stack_pointer_rtx, x);
9749 1011297 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9750 1011297 : RTX_FRAME_RELATED_P (insn) = 1;
9751 :
9752 1192332 : m->fs.cfa_offset -= UNITS_PER_WORD;
9753 : }
9754 :
9755 : /* When the frame pointer is the CFA, and we pop it, we are
9756 : swapping back to the stack pointer as the CFA. This happens
9757 : for stack frames that don't allocate other data, so we assume
9758 : the stack pointer is now pointing at the return address, i.e.
9759 : the function entry state, which makes the offset be 1 word. */
9760 1451704 : if (reg == hard_frame_pointer_rtx)
9761 : {
9762 232533 : m->fs.fp_valid = false;
9763 232533 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9764 : {
9765 228499 : m->fs.cfa_reg = stack_pointer_rtx;
9766 228499 : m->fs.cfa_offset -= UNITS_PER_WORD;
9767 :
9768 228499 : add_reg_note (insn, REG_CFA_DEF_CFA,
9769 228499 : plus_constant (Pmode, stack_pointer_rtx,
9770 228499 : m->fs.cfa_offset));
9771 228499 : RTX_FRAME_RELATED_P (insn) = 1;
9772 : }
9773 : }
9774 : }
9775 :
9776 : /* Emit code to restore REG using a POP2 insn. */
9777 : static void
9778 19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9779 : {
9780 19 : struct machine_function *m = cfun->machine;
9781 19 : const int offset = UNITS_PER_WORD * 2;
9782 19 : rtx_insn *insn;
9783 :
9784 19 : rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9785 : stack_pointer_rtx));
9786 :
9787 19 : if (ppx_p)
9788 15 : insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9789 : else
9790 4 : insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9791 :
9792 19 : RTX_FRAME_RELATED_P (insn) = 1;
9793 :
9794 19 : rtx dwarf = NULL_RTX;
9795 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9796 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9797 19 : REG_NOTES (insn) = dwarf;
9798 19 : m->fs.sp_offset -= offset;
9799 :
9800 19 : if (m->fs.cfa_reg == crtl->drap_reg
9801 19 : && (REGNO (reg1) == REGNO (crtl->drap_reg)
9802 3 : || REGNO (reg2) == REGNO (crtl->drap_reg)))
9803 : {
9804 : /* Previously we'd represented the CFA as an expression
9805 : like *(%ebp - 8). We've just popped that value from
9806 : the stack, which means we need to reset the CFA to
9807 : the drap register. This will remain until we restore
9808 : the stack pointer. */
9809 1 : add_reg_note (insn, REG_CFA_DEF_CFA,
9810 1 : REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9811 1 : RTX_FRAME_RELATED_P (insn) = 1;
9812 :
9813 : /* This means that the DRAP register is valid for addressing too. */
9814 1 : m->fs.drap_valid = true;
9815 1 : return;
9816 : }
9817 :
9818 18 : if (m->fs.cfa_reg == stack_pointer_rtx)
9819 : {
9820 14 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9821 14 : x = gen_rtx_SET (stack_pointer_rtx, x);
9822 14 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9823 14 : RTX_FRAME_RELATED_P (insn) = 1;
9824 :
9825 14 : m->fs.cfa_offset -= offset;
9826 : }
9827 :
9828 : /* When the frame pointer is the CFA, and we pop it, we are
9829 : swapping back to the stack pointer as the CFA. This happens
9830 : for stack frames that don't allocate other data, so we assume
9831 : the stack pointer is now pointing at the return address, i.e.
9832 : the function entry state, which makes the offset be 1 word. */
9833 18 : if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9834 : {
9835 0 : m->fs.fp_valid = false;
9836 0 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9837 : {
9838 0 : m->fs.cfa_reg = stack_pointer_rtx;
9839 0 : m->fs.cfa_offset -= offset;
9840 :
9841 0 : add_reg_note (insn, REG_CFA_DEF_CFA,
9842 0 : plus_constant (Pmode, stack_pointer_rtx,
9843 0 : m->fs.cfa_offset));
9844 0 : RTX_FRAME_RELATED_P (insn) = 1;
9845 : }
9846 : }
9847 : }
9848 :
9849 : /* Emit code to restore saved registers using POP insns. */
9850 :
9851 : static void
9852 1348230 : ix86_emit_restore_regs_using_pop (bool ppx_p)
9853 : {
9854 1348230 : unsigned int regno;
9855 :
9856 125385390 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9857 124037160 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9858 1222873 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
9859 1348230 : }
9860 :
9861 : /* Emit code to restore saved registers using POP2 insns. */
9862 :
9863 : static void
9864 558 : ix86_emit_restore_regs_using_pop2 (void)
9865 : {
9866 558 : int regno;
9867 558 : int regno_list[2];
9868 558 : regno_list[0] = regno_list[1] = -1;
9869 558 : int loaded_regnum = 0;
9870 558 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9871 :
9872 51894 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9873 51336 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9874 : {
9875 127 : if (aligned)
9876 : {
9877 120 : regno_list[loaded_regnum++] = regno;
9878 120 : if (loaded_regnum == 2)
9879 : {
9880 19 : gcc_assert (regno_list[0] != -1
9881 : && regno_list[1] != -1
9882 : && regno_list[0] != regno_list[1]);
9883 :
9884 19 : ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
9885 : regno_list[0]),
9886 : gen_rtx_REG (word_mode,
9887 : regno_list[1]),
9888 19 : TARGET_APX_PPX);
9889 19 : loaded_regnum = 0;
9890 19 : regno_list[0] = regno_list[1] = -1;
9891 : }
9892 : }
9893 : else
9894 : {
9895 14 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
9896 7 : TARGET_APX_PPX);
9897 7 : aligned = true;
9898 : }
9899 : }
9900 :
9901 558 : if (loaded_regnum == 1)
9902 82 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
9903 82 : TARGET_APX_PPX);
9904 558 : }
9905 :
9906 : /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9907 : omits the emit and only attaches the notes. */
9908 :
9909 : static void
9910 241854 : ix86_emit_leave (rtx_insn *insn)
9911 : {
9912 241854 : struct machine_function *m = cfun->machine;
9913 :
9914 241854 : if (!insn)
9915 240883 : insn = emit_insn (gen_leave (word_mode));
9916 :
9917 241854 : ix86_add_queued_cfa_restore_notes (insn);
9918 :
9919 241854 : gcc_assert (m->fs.fp_valid);
9920 241854 : m->fs.sp_valid = true;
9921 241854 : m->fs.sp_realigned = false;
9922 241854 : m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9923 241854 : m->fs.fp_valid = false;
9924 :
9925 241854 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9926 : {
9927 238717 : m->fs.cfa_reg = stack_pointer_rtx;
9928 238717 : m->fs.cfa_offset = m->fs.sp_offset;
9929 :
9930 238717 : add_reg_note (insn, REG_CFA_DEF_CFA,
9931 238717 : plus_constant (Pmode, stack_pointer_rtx,
9932 238717 : m->fs.sp_offset));
9933 238717 : RTX_FRAME_RELATED_P (insn) = 1;
9934 : }
9935 241854 : ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9936 : m->fs.fp_offset);
9937 241854 : }
9938 :
9939 : /* Emit code to restore saved registers using MOV insns.
9940 : First register is restored from CFA - CFA_OFFSET. */
9941 : static void
9942 97162 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9943 : bool maybe_eh_return)
9944 : {
9945 97162 : struct machine_function *m = cfun->machine;
9946 97162 : unsigned int regno;
9947 :
9948 9036066 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9949 8938904 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9950 : {
9951 :
9952 : /* Skip registers, already processed by shrink wrap separate. */
9953 268266 : if (!cfun->machine->reg_is_wrapped_separately[regno])
9954 : {
9955 140606 : rtx reg = gen_rtx_REG (word_mode, regno);
9956 140606 : rtx mem;
9957 140606 : rtx_insn *insn;
9958 :
9959 140606 : mem = choose_baseaddr (cfa_offset, NULL);
9960 140606 : mem = gen_frame_mem (word_mode, mem);
9961 140606 : insn = emit_move_insn (reg, mem);
9962 :
9963 140606 : if (m->fs.cfa_reg == crtl->drap_reg
9964 140606 : && regno == REGNO (crtl->drap_reg))
9965 : {
9966 : /* Previously we'd represented the CFA as an expression
9967 : like *(%ebp - 8). We've just popped that value from
9968 : the stack, which means we need to reset the CFA to
9969 : the drap register. This will remain until we restore
9970 : the stack pointer. */
9971 3137 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9972 3137 : RTX_FRAME_RELATED_P (insn) = 1;
9973 :
9974 : /* DRAP register is valid for addressing. */
9975 3137 : m->fs.drap_valid = true;
9976 : }
9977 : else
9978 137469 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9979 : }
9980 288472 : cfa_offset -= UNITS_PER_WORD;
9981 : }
9982 97162 : }
9983 :
9984 : /* Emit code to restore saved registers using MOV insns.
9985 : First register is restored from CFA - CFA_OFFSET. */
9986 : static void
9987 33929 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9988 : bool maybe_eh_return)
9989 : {
9990 33929 : unsigned int regno;
9991 :
9992 3155397 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9993 3121468 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9994 : {
9995 339317 : rtx reg = gen_rtx_REG (V4SFmode, regno);
9996 339317 : rtx mem;
9997 339317 : unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9998 :
9999 339317 : mem = choose_baseaddr (cfa_offset, &align);
10000 339317 : mem = gen_rtx_MEM (V4SFmode, mem);
10001 :
10002 : /* The location aligment depends upon the base register. */
10003 339317 : align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
10004 339317 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
10005 339317 : set_mem_align (mem, align);
10006 339317 : emit_insn (gen_rtx_SET (reg, mem));
10007 :
10008 339317 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10009 :
10010 339317 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
10011 : }
10012 33929 : }
10013 :
10014 : static void
10015 7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
10016 : bool use_call, int style)
10017 : {
10018 7621 : struct machine_function *m = cfun->machine;
10019 7621 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
10020 7621 : + m->call_ms2sysv_extra_regs;
10021 7621 : rtvec v;
10022 7621 : unsigned int elems_needed, align, i, vi = 0;
10023 7621 : rtx_insn *insn;
10024 7621 : rtx sym, tmp;
10025 7621 : rtx rsi = gen_rtx_REG (word_mode, SI_REG);
10026 7621 : rtx r10 = NULL_RTX;
10027 7621 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
10028 7621 : HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
10029 7621 : HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
10030 7621 : rtx rsi_frame_load = NULL_RTX;
10031 7621 : HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
10032 7621 : enum xlogue_stub stub;
10033 :
10034 7621 : gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
10035 :
10036 : /* If using a realigned stack, we should never start with padding. */
10037 7621 : gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
10038 :
10039 : /* Setup RSI as the stub's base pointer. */
10040 7621 : align = GET_MODE_ALIGNMENT (V4SFmode);
10041 7621 : tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
10042 7621 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
10043 :
10044 7621 : emit_insn (gen_rtx_SET (rsi, tmp));
10045 :
10046 : /* Get a symbol for the stub. */
10047 7621 : if (frame_pointer_needed)
10048 5955 : stub = use_call ? XLOGUE_STUB_RESTORE_HFP
10049 : : XLOGUE_STUB_RESTORE_HFP_TAIL;
10050 : else
10051 1666 : stub = use_call ? XLOGUE_STUB_RESTORE
10052 : : XLOGUE_STUB_RESTORE_TAIL;
10053 7621 : sym = xlogue.get_stub_rtx (stub);
10054 :
10055 7621 : elems_needed = ncregs;
10056 7621 : if (use_call)
10057 6498 : elems_needed += 1;
10058 : else
10059 1275 : elems_needed += frame_pointer_needed ? 5 : 3;
10060 7621 : v = rtvec_alloc (elems_needed);
10061 :
10062 : /* We call the epilogue stub when we need to pop incoming args or we are
10063 : doing a sibling call as the tail. Otherwise, we will emit a jmp to the
10064 : epilogue stub and it is the tail-call. */
10065 7621 : if (use_call)
10066 6498 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10067 : else
10068 : {
10069 1123 : RTVEC_ELT (v, vi++) = ret_rtx;
10070 1123 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10071 1123 : if (frame_pointer_needed)
10072 : {
10073 971 : rtx rbp = gen_rtx_REG (DImode, BP_REG);
10074 971 : gcc_assert (m->fs.fp_valid);
10075 971 : gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
10076 :
10077 971 : tmp = plus_constant (DImode, rbp, 8);
10078 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
10079 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
10080 971 : tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10081 971 : RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
10082 : }
10083 : else
10084 : {
10085 : /* If no hard frame pointer, we set R10 to the SP restore value. */
10086 152 : gcc_assert (!m->fs.fp_valid);
10087 152 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10088 152 : gcc_assert (m->fs.sp_valid);
10089 :
10090 152 : r10 = gen_rtx_REG (DImode, R10_REG);
10091 152 : tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
10092 152 : emit_insn (gen_rtx_SET (r10, tmp));
10093 :
10094 152 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
10095 : }
10096 : }
10097 :
10098 : /* Generate frame load insns and restore notes. */
10099 107954 : for (i = 0; i < ncregs; ++i)
10100 : {
10101 100333 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
10102 100333 : machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
10103 100333 : rtx reg, frame_load;
10104 :
10105 100333 : reg = gen_rtx_REG (mode, r.regno);
10106 100333 : frame_load = gen_frame_load (reg, rsi, r.offset);
10107 :
10108 : /* Save RSI frame load insn & note to add last. */
10109 100333 : if (r.regno == SI_REG)
10110 : {
10111 7621 : gcc_assert (!rsi_frame_load);
10112 7621 : rsi_frame_load = frame_load;
10113 7621 : rsi_restore_offset = r.offset;
10114 : }
10115 : else
10116 : {
10117 92712 : RTVEC_ELT (v, vi++) = frame_load;
10118 92712 : ix86_add_cfa_restore_note (NULL, reg, r.offset);
10119 : }
10120 : }
10121 :
10122 : /* Add RSI frame load & restore note at the end. */
10123 7621 : gcc_assert (rsi_frame_load);
10124 7621 : gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
10125 7621 : RTVEC_ELT (v, vi++) = rsi_frame_load;
10126 7621 : ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
10127 : rsi_restore_offset);
10128 :
10129 : /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
10130 7621 : if (!use_call && !frame_pointer_needed)
10131 : {
10132 152 : gcc_assert (m->fs.sp_valid);
10133 152 : gcc_assert (!m->fs.sp_realigned);
10134 :
10135 : /* At this point, R10 should point to frame.stack_realign_offset. */
10136 152 : if (m->fs.cfa_reg == stack_pointer_rtx)
10137 152 : m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
10138 152 : m->fs.sp_offset = frame.stack_realign_offset;
10139 : }
10140 :
10141 7621 : gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
10142 7621 : tmp = gen_rtx_PARALLEL (VOIDmode, v);
10143 7621 : if (use_call)
10144 6498 : insn = emit_insn (tmp);
10145 : else
10146 : {
10147 1123 : insn = emit_jump_insn (tmp);
10148 1123 : JUMP_LABEL (insn) = ret_rtx;
10149 :
10150 1123 : if (frame_pointer_needed)
10151 971 : ix86_emit_leave (insn);
10152 : else
10153 : {
10154 : /* Need CFA adjust note. */
10155 152 : tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10156 152 : add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10157 : }
10158 : }
10159 :
10160 7621 : RTX_FRAME_RELATED_P (insn) = true;
10161 7621 : ix86_add_queued_cfa_restore_notes (insn);
10162 :
10163 : /* If we're not doing a tail-call, we need to adjust the stack. */
10164 7621 : if (use_call && m->fs.sp_valid)
10165 : {
10166 3706 : HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10167 3706 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10168 : GEN_INT (dealloc), style,
10169 3706 : m->fs.cfa_reg == stack_pointer_rtx);
10170 : }
10171 7621 : }
10172 :
10173 : /* Restore function stack, frame, and registers. */
10174 :
10175 : void
10176 1642516 : ix86_expand_epilogue (int style)
10177 : {
10178 1642516 : struct machine_function *m = cfun->machine;
10179 1642516 : struct machine_frame_state frame_state_save = m->fs;
10180 1642516 : bool restore_regs_via_mov;
10181 1642516 : bool using_drap;
10182 1642516 : bool restore_stub_is_tail = false;
10183 :
10184 1642516 : if (ix86_function_naked (current_function_decl))
10185 : {
10186 : /* The program should not reach this point. */
10187 74 : emit_insn (gen_ud2 ());
10188 125507 : return;
10189 : }
10190 :
10191 1642442 : ix86_finalize_stack_frame_flags ();
10192 1642442 : const struct ix86_frame &frame = cfun->machine->frame;
10193 :
10194 1642442 : m->fs.sp_realigned = stack_realign_fp;
10195 31844 : m->fs.sp_valid = stack_realign_fp
10196 1617757 : || !frame_pointer_needed
10197 2092171 : || crtl->sp_is_unchanging;
10198 1642442 : gcc_assert (!m->fs.sp_valid
10199 : || m->fs.sp_offset == frame.stack_pointer_offset);
10200 :
10201 : /* The FP must be valid if the frame pointer is present. */
10202 1642442 : gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10203 1642442 : gcc_assert (!m->fs.fp_valid
10204 : || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10205 :
10206 : /* We must have *some* valid pointer to the stack frame. */
10207 1642442 : gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10208 :
10209 : /* The DRAP is never valid at this point. */
10210 1642442 : gcc_assert (!m->fs.drap_valid);
10211 :
10212 : /* See the comment about red zone and frame
10213 : pointer usage in ix86_expand_prologue. */
10214 1642442 : if (frame_pointer_needed && frame.red_zone_size)
10215 126656 : emit_insn (gen_memory_blockage ());
10216 :
10217 1642442 : using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10218 7159 : gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10219 :
10220 : /* Determine the CFA offset of the end of the red-zone. */
10221 1642442 : m->fs.red_zone_offset = 0;
10222 1642442 : if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10223 : {
10224 : /* The red-zone begins below return address and error code in
10225 : exception handler. */
10226 1465368 : m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10227 :
10228 : /* When the register save area is in the aligned portion of
10229 : the stack, determine the maximum runtime displacement that
10230 : matches up with the aligned frame. */
10231 1465368 : if (stack_realign_drap)
10232 8588 : m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10233 4294 : + UNITS_PER_WORD);
10234 : }
10235 :
10236 1642442 : HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10237 :
10238 : /* Special care must be taken for the normal return case of a function
10239 : using eh_return: the eax and edx registers are marked as saved, but
10240 : not restored along this path. Adjust the save location to match. */
10241 1642442 : if (crtl->calls_eh_return && style != 2)
10242 37 : reg_save_offset -= 2 * UNITS_PER_WORD;
10243 :
10244 : /* EH_RETURN requires the use of moves to function properly. */
10245 1642442 : if (crtl->calls_eh_return)
10246 : restore_regs_via_mov = true;
10247 : /* SEH requires the use of pops to identify the epilogue. */
10248 1642384 : else if (TARGET_SEH)
10249 : restore_regs_via_mov = false;
10250 : /* If we already save reg with pushp, don't use move at epilogue. */
10251 1642384 : else if (m->fs.apx_ppx_used)
10252 : restore_regs_via_mov = false;
10253 : /* If we're only restoring one register and sp cannot be used then
10254 : using a move instruction to restore the register since it's
10255 : less work than reloading sp and popping the register. */
10256 1642297 : else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
10257 : restore_regs_via_mov = true;
10258 1581497 : else if (crtl->shrink_wrapped_separate
10259 1527658 : || (TARGET_EPILOGUE_USING_MOVE
10260 56735 : && cfun->machine->use_fast_prologue_epilogue
10261 56679 : && (frame.nregs > 1
10262 56666 : || m->fs.sp_offset != reg_save_offset)))
10263 : restore_regs_via_mov = true;
10264 1527423 : else if (frame_pointer_needed
10265 411007 : && !frame.nregs
10266 316609 : && m->fs.sp_offset != reg_save_offset)
10267 : restore_regs_via_mov = true;
10268 1378061 : else if (frame_pointer_needed
10269 261645 : && TARGET_USE_LEAVE
10270 261570 : && cfun->machine->use_fast_prologue_epilogue
10271 204984 : && frame.nregs == 1)
10272 : restore_regs_via_mov = true;
10273 : else
10274 1642442 : restore_regs_via_mov = false;
10275 :
10276 1642442 : if (crtl->shrink_wrapped_separate)
10277 53870 : gcc_assert (restore_regs_via_mov);
10278 :
10279 1588572 : if (restore_regs_via_mov || frame.nsseregs)
10280 : {
10281 : /* Ensure that the entire register save area is addressable via
10282 : the stack pointer, if we will restore SSE regs via sp. */
10283 327229 : if (TARGET_64BIT
10284 314691 : && m->fs.sp_offset > 0x7fffffff
10285 23 : && sp_valid_at (frame.stack_realign_offset + 1)
10286 327251 : && (frame.nsseregs + frame.nregs) != 0)
10287 : {
10288 6 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10289 6 : GEN_INT (m->fs.sp_offset
10290 : - frame.sse_reg_save_offset),
10291 : style,
10292 6 : m->fs.cfa_reg == stack_pointer_rtx);
10293 : }
10294 : }
10295 :
10296 : /* If there are any SSE registers to restore, then we have to do it
10297 : via moves, since there's obviously no pop for SSE regs. */
10298 1642442 : if (frame.nsseregs)
10299 33929 : ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10300 : style == 2);
10301 :
10302 1642442 : if (m->call_ms2sysv)
10303 : {
10304 7621 : int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10305 :
10306 : /* We cannot use a tail-call for the stub if:
10307 : 1. We have to pop incoming args,
10308 : 2. We have additional int regs to restore, or
10309 : 3. A sibling call will be the tail-call, or
10310 : 4. We are emitting an eh_return_internal epilogue.
10311 :
10312 : TODO: Item 4 has not yet tested!
10313 :
10314 : If any of the above are true, we will call the stub rather than
10315 : jump to it. */
10316 7621 : restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10317 7621 : ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
10318 : }
10319 :
10320 : /* If using out-of-line stub that is a tail-call, then...*/
10321 1642442 : if (m->call_ms2sysv && restore_stub_is_tail)
10322 : {
10323 : /* TODO: parinoid tests. (remove eventually) */
10324 1123 : gcc_assert (m->fs.sp_valid);
10325 1123 : gcc_assert (!m->fs.sp_realigned);
10326 1123 : gcc_assert (!m->fs.fp_valid);
10327 1123 : gcc_assert (!m->fs.realigned);
10328 1123 : gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10329 1123 : gcc_assert (!crtl->drap_reg);
10330 1123 : gcc_assert (!frame.nregs);
10331 1123 : gcc_assert (!crtl->shrink_wrapped_separate);
10332 : }
10333 1641319 : else if (restore_regs_via_mov)
10334 : {
10335 292531 : rtx t;
10336 :
10337 292531 : if (frame.nregs)
10338 97162 : ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
10339 :
10340 : /* eh_return epilogues need %ecx added to the stack pointer. */
10341 292531 : if (style == 2)
10342 : {
10343 37 : rtx sa = EH_RETURN_STACKADJ_RTX;
10344 29 : rtx_insn *insn;
10345 :
10346 29 : gcc_assert (!crtl->shrink_wrapped_separate);
10347 :
10348 : /* Stack realignment doesn't work with eh_return. */
10349 29 : if (crtl->stack_realign_needed)
10350 0 : sorry ("Stack realignment not supported with "
10351 : "%<__builtin_eh_return%>");
10352 :
10353 : /* regparm nested functions don't work with eh_return. */
10354 29 : if (ix86_static_chain_on_stack)
10355 0 : sorry ("regparm nested function not supported with "
10356 : "%<__builtin_eh_return%>");
10357 :
10358 29 : if (frame_pointer_needed)
10359 : {
10360 35 : t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10361 43 : t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10362 27 : emit_insn (gen_rtx_SET (sa, t));
10363 :
10364 : /* NB: eh_return epilogues must restore the frame pointer
10365 : in word_mode since the upper 32 bits of RBP register
10366 : can have any values. */
10367 27 : t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10368 27 : rtx frame_reg = gen_rtx_REG (word_mode,
10369 : HARD_FRAME_POINTER_REGNUM);
10370 27 : insn = emit_move_insn (frame_reg, t);
10371 :
10372 : /* Note that we use SA as a temporary CFA, as the return
10373 : address is at the proper place relative to it. We
10374 : pretend this happens at the FP restore insn because
10375 : prior to this insn the FP would be stored at the wrong
10376 : offset relative to SA, and after this insn we have no
10377 : other reasonable register to use for the CFA. We don't
10378 : bother resetting the CFA to the SP for the duration of
10379 : the return insn, unless the control flow instrumentation
10380 : is done. In this case the SP is used later and we have
10381 : to reset CFA to SP. */
10382 27 : add_reg_note (insn, REG_CFA_DEF_CFA,
10383 35 : plus_constant (Pmode, sa, UNITS_PER_WORD));
10384 27 : ix86_add_queued_cfa_restore_notes (insn);
10385 27 : add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10386 27 : RTX_FRAME_RELATED_P (insn) = 1;
10387 :
10388 27 : m->fs.cfa_reg = sa;
10389 27 : m->fs.cfa_offset = UNITS_PER_WORD;
10390 27 : m->fs.fp_valid = false;
10391 :
10392 27 : pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10393 : const0_rtx, style,
10394 27 : flag_cf_protection);
10395 : }
10396 : else
10397 : {
10398 2 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10399 2 : t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10400 2 : insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10401 2 : ix86_add_queued_cfa_restore_notes (insn);
10402 :
10403 2 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10404 2 : if (m->fs.cfa_offset != UNITS_PER_WORD)
10405 : {
10406 2 : m->fs.cfa_offset = UNITS_PER_WORD;
10407 2 : add_reg_note (insn, REG_CFA_DEF_CFA,
10408 2 : plus_constant (Pmode, stack_pointer_rtx,
10409 2 : UNITS_PER_WORD));
10410 2 : RTX_FRAME_RELATED_P (insn) = 1;
10411 : }
10412 : }
10413 29 : m->fs.sp_offset = UNITS_PER_WORD;
10414 29 : m->fs.sp_valid = true;
10415 29 : m->fs.sp_realigned = false;
10416 : }
10417 : }
10418 : else
10419 : {
10420 : /* SEH requires that the function end with (1) a stack adjustment
10421 : if necessary, (2) a sequence of pops, and (3) a return or
10422 : jump instruction. Prevent insns from the function body from
10423 : being scheduled into this sequence. */
10424 1348788 : if (TARGET_SEH)
10425 : {
10426 : /* Prevent a catch region from being adjacent to the standard
10427 : epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10428 : nor several other flags that would be interesting to test are
10429 : set up yet. */
10430 : if (flag_non_call_exceptions)
10431 : emit_insn (gen_nops (const1_rtx));
10432 : else
10433 : emit_insn (gen_blockage ());
10434 : }
10435 :
10436 : /* First step is to deallocate the stack frame so that we can
10437 : pop the registers. If the stack pointer was realigned, it needs
10438 : to be restored now. Also do it on SEH target for very large
10439 : frame as the emitted instructions aren't allowed by the ABI
10440 : in epilogues. */
10441 1348788 : if (!m->fs.sp_valid || m->fs.sp_realigned
10442 : || (TARGET_SEH
10443 : && (m->fs.sp_offset - reg_save_offset
10444 : >= SEH_MAX_FRAME_SIZE)))
10445 : {
10446 29762 : pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10447 29762 : GEN_INT (m->fs.fp_offset
10448 : - reg_save_offset),
10449 : style, false);
10450 : }
10451 1319026 : else if (m->fs.sp_offset != reg_save_offset)
10452 : {
10453 611767 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10454 : GEN_INT (m->fs.sp_offset
10455 : - reg_save_offset),
10456 : style,
10457 611767 : m->fs.cfa_reg == stack_pointer_rtx);
10458 : }
10459 :
10460 1348788 : if (TARGET_APX_PUSH2POP2
10461 561 : && ix86_can_use_push2pop2 ()
10462 1349347 : && m->func_type == TYPE_NORMAL)
10463 558 : ix86_emit_restore_regs_using_pop2 ();
10464 : else
10465 1348230 : ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10466 : }
10467 :
10468 : /* If we used a stack pointer and haven't already got rid of it,
10469 : then do so now. */
10470 1642442 : if (m->fs.fp_valid)
10471 : {
10472 : /* If the stack pointer is valid and pointing at the frame
10473 : pointer store address, then we only need a pop. */
10474 473416 : if (sp_valid_at (frame.hfp_save_offset)
10475 473416 : && m->fs.sp_offset == frame.hfp_save_offset)
10476 232521 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10477 : /* Leave results in shorter dependency chains on CPUs that are
10478 : able to grok it fast. */
10479 240895 : else if (TARGET_USE_LEAVE
10480 12 : || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10481 240907 : || !cfun->machine->use_fast_prologue_epilogue)
10482 240883 : ix86_emit_leave (NULL);
10483 : else
10484 : {
10485 12 : pro_epilogue_adjust_stack (stack_pointer_rtx,
10486 : hard_frame_pointer_rtx,
10487 12 : const0_rtx, style, !using_drap);
10488 12 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10489 : }
10490 : }
10491 :
10492 1642442 : if (using_drap)
10493 : {
10494 7159 : int param_ptr_offset = UNITS_PER_WORD;
10495 7159 : rtx_insn *insn;
10496 :
10497 7159 : gcc_assert (stack_realign_drap);
10498 :
10499 7159 : if (ix86_static_chain_on_stack)
10500 0 : param_ptr_offset += UNITS_PER_WORD;
10501 7159 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10502 230 : param_ptr_offset += UNITS_PER_WORD;
10503 :
10504 7464 : insn = emit_insn (gen_rtx_SET
10505 : (stack_pointer_rtx,
10506 : plus_constant (Pmode, crtl->drap_reg,
10507 : -param_ptr_offset)));
10508 7159 : m->fs.cfa_reg = stack_pointer_rtx;
10509 7159 : m->fs.cfa_offset = param_ptr_offset;
10510 7159 : m->fs.sp_offset = param_ptr_offset;
10511 7159 : m->fs.realigned = false;
10512 :
10513 7464 : add_reg_note (insn, REG_CFA_DEF_CFA,
10514 7159 : plus_constant (Pmode, stack_pointer_rtx,
10515 7159 : param_ptr_offset));
10516 7159 : RTX_FRAME_RELATED_P (insn) = 1;
10517 :
10518 7159 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10519 230 : ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10520 : }
10521 :
10522 : /* At this point the stack pointer must be valid, and we must have
10523 : restored all of the registers. We may not have deallocated the
10524 : entire stack frame. We've delayed this until now because it may
10525 : be possible to merge the local stack deallocation with the
10526 : deallocation forced by ix86_static_chain_on_stack. */
10527 1642442 : gcc_assert (m->fs.sp_valid);
10528 1642442 : gcc_assert (!m->fs.sp_realigned);
10529 1642442 : gcc_assert (!m->fs.fp_valid);
10530 1642442 : gcc_assert (!m->fs.realigned);
10531 1777800 : if (m->fs.sp_offset != UNITS_PER_WORD)
10532 : {
10533 51593 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10534 : GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10535 : style, true);
10536 : }
10537 : else
10538 1590849 : ix86_add_queued_cfa_restore_notes (get_last_insn ());
10539 :
10540 : /* Sibcall epilogues don't want a return instruction. */
10541 1642442 : if (style == 0)
10542 : {
10543 125359 : m->fs = frame_state_save;
10544 125359 : return;
10545 : }
10546 :
10547 1517083 : if (cfun->machine->func_type != TYPE_NORMAL)
10548 120 : emit_jump_insn (gen_interrupt_return ());
10549 1516963 : else if (crtl->args.pops_args && crtl->args.size)
10550 : {
10551 25896 : rtx popc = GEN_INT (crtl->args.pops_args);
10552 :
10553 : /* i386 can only pop 64K bytes. If asked to pop more, pop return
10554 : address, do explicit add, and jump indirectly to the caller. */
10555 :
10556 25896 : if (crtl->args.pops_args >= 65536)
10557 : {
10558 0 : rtx ecx = gen_rtx_REG (SImode, CX_REG);
10559 0 : rtx_insn *insn;
10560 :
10561 : /* There is no "pascal" calling convention in any 64bit ABI. */
10562 0 : gcc_assert (!TARGET_64BIT);
10563 :
10564 0 : insn = emit_insn (gen_pop (ecx));
10565 0 : m->fs.cfa_offset -= UNITS_PER_WORD;
10566 0 : m->fs.sp_offset -= UNITS_PER_WORD;
10567 :
10568 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10569 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
10570 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10571 0 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10572 0 : RTX_FRAME_RELATED_P (insn) = 1;
10573 :
10574 0 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10575 : popc, -1, true);
10576 0 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10577 : }
10578 : else
10579 25896 : emit_jump_insn (gen_simple_return_pop_internal (popc));
10580 : }
10581 1491067 : else if (!m->call_ms2sysv || !restore_stub_is_tail)
10582 : {
10583 : /* In case of return from EH a simple return cannot be used
10584 : as a return address will be compared with a shadow stack
10585 : return address. Use indirect jump instead. */
10586 1489944 : if (style == 2 && flag_cf_protection)
10587 : {
10588 : /* Register used in indirect jump must be in word_mode. But
10589 : Pmode may not be the same as word_mode for x32. */
10590 17 : rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10591 17 : rtx_insn *insn;
10592 :
10593 17 : insn = emit_insn (gen_pop (ecx));
10594 17 : m->fs.cfa_offset -= UNITS_PER_WORD;
10595 17 : m->fs.sp_offset -= UNITS_PER_WORD;
10596 :
10597 33 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10598 17 : x = gen_rtx_SET (stack_pointer_rtx, x);
10599 17 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10600 17 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10601 17 : RTX_FRAME_RELATED_P (insn) = 1;
10602 :
10603 17 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10604 17 : }
10605 : else
10606 1489927 : emit_jump_insn (gen_simple_return_internal ());
10607 : }
10608 :
10609 : /* Restore the state back to the state from the prologue,
10610 : so that it's correct for the next epilogue. */
10611 1517083 : m->fs = frame_state_save;
10612 : }
10613 :
10614 : /* Reset from the function's potential modifications. */
10615 :
10616 : static void
10617 1476712 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10618 : {
10619 1476712 : if (pic_offset_table_rtx
10620 1476712 : && !ix86_use_pseudo_pic_reg ())
10621 0 : SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10622 :
10623 1476712 : if (TARGET_MACHO)
10624 : {
10625 : rtx_insn *insn = get_last_insn ();
10626 : rtx_insn *deleted_debug_label = NULL;
10627 :
10628 : /* Mach-O doesn't support labels at the end of objects, so if
10629 : it looks like we might want one, take special action.
10630 : First, collect any sequence of deleted debug labels. */
10631 : while (insn
10632 : && NOTE_P (insn)
10633 : && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10634 : {
10635 : /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10636 : notes only, instead set their CODE_LABEL_NUMBER to -1,
10637 : otherwise there would be code generation differences
10638 : in between -g and -g0. */
10639 : if (NOTE_P (insn) && NOTE_KIND (insn)
10640 : == NOTE_INSN_DELETED_DEBUG_LABEL)
10641 : deleted_debug_label = insn;
10642 : insn = PREV_INSN (insn);
10643 : }
10644 :
10645 : /* If we have:
10646 : label:
10647 : barrier
10648 : then this needs to be detected, so skip past the barrier. */
10649 :
10650 : if (insn && BARRIER_P (insn))
10651 : insn = PREV_INSN (insn);
10652 :
10653 : /* Up to now we've only seen notes or barriers. */
10654 : if (insn)
10655 : {
10656 : if (LABEL_P (insn)
10657 : || (NOTE_P (insn)
10658 : && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10659 : /* Trailing label. */
10660 : fputs ("\tnop\n", file);
10661 : else if (cfun && ! cfun->is_thunk)
10662 : {
10663 : /* See if we have a completely empty function body, skipping
10664 : the special case of the picbase thunk emitted as asm. */
10665 : while (insn && ! INSN_P (insn))
10666 : insn = PREV_INSN (insn);
10667 : /* If we don't find any insns, we've got an empty function body;
10668 : I.e. completely empty - without a return or branch. This is
10669 : taken as the case where a function body has been removed
10670 : because it contains an inline __builtin_unreachable(). GCC
10671 : declares that reaching __builtin_unreachable() means UB so
10672 : we're not obliged to do anything special; however, we want
10673 : non-zero-sized function bodies. To meet this, and help the
10674 : user out, let's trap the case. */
10675 : if (insn == NULL)
10676 : fputs ("\tud2\n", file);
10677 : }
10678 : }
10679 : else if (deleted_debug_label)
10680 : for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10681 : if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10682 : CODE_LABEL_NUMBER (insn) = -1;
10683 : }
10684 1476712 : }
10685 :
10686 : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10687 :
10688 : void
10689 59 : ix86_print_patchable_function_entry (FILE *file,
10690 : unsigned HOST_WIDE_INT patch_area_size,
10691 : bool record_p)
10692 : {
10693 59 : if (cfun->machine->function_label_emitted)
10694 : {
10695 : /* NB: When ix86_print_patchable_function_entry is called after
10696 : function table has been emitted, we have inserted or queued
10697 : a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10698 : place. There is nothing to do here. */
10699 : return;
10700 : }
10701 :
10702 8 : default_print_patchable_function_entry (file, patch_area_size,
10703 : record_p);
10704 : }
10705 :
10706 : /* Output patchable area. NB: default_print_patchable_function_entry
10707 : isn't available in i386.md. */
10708 :
10709 : void
10710 51 : ix86_output_patchable_area (unsigned int patch_area_size,
10711 : bool record_p)
10712 : {
10713 51 : default_print_patchable_function_entry (asm_out_file,
10714 : patch_area_size,
10715 : record_p);
10716 51 : }
10717 :
10718 : /* Return a scratch register to use in the split stack prologue. The
10719 : split stack prologue is used for -fsplit-stack. It is the first
10720 : instructions in the function, even before the regular prologue.
10721 : The scratch register can be any caller-saved register which is not
10722 : used for parameters or for the static chain. */
10723 :
10724 : static unsigned int
10725 24609 : split_stack_prologue_scratch_regno (void)
10726 : {
10727 24609 : if (TARGET_64BIT)
10728 : return R11_REG;
10729 : else
10730 : {
10731 6949 : bool is_fastcall, is_thiscall;
10732 6949 : int regparm;
10733 :
10734 6949 : is_fastcall = (lookup_attribute ("fastcall",
10735 6949 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10736 : != NULL);
10737 6949 : is_thiscall = (lookup_attribute ("thiscall",
10738 6949 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10739 : != NULL);
10740 6949 : regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10741 :
10742 6949 : if (is_fastcall)
10743 : {
10744 0 : if (DECL_STATIC_CHAIN (cfun->decl))
10745 : {
10746 0 : sorry ("%<-fsplit-stack%> does not support fastcall with "
10747 : "nested function");
10748 0 : return INVALID_REGNUM;
10749 : }
10750 : return AX_REG;
10751 : }
10752 6949 : else if (is_thiscall)
10753 : {
10754 0 : if (!DECL_STATIC_CHAIN (cfun->decl))
10755 : return DX_REG;
10756 0 : return AX_REG;
10757 : }
10758 6949 : else if (regparm < 3)
10759 : {
10760 6949 : if (!DECL_STATIC_CHAIN (cfun->decl))
10761 : return CX_REG;
10762 : else
10763 : {
10764 459 : if (regparm >= 2)
10765 : {
10766 0 : sorry ("%<-fsplit-stack%> does not support 2 register "
10767 : "parameters for a nested function");
10768 0 : return INVALID_REGNUM;
10769 : }
10770 : return DX_REG;
10771 : }
10772 : }
10773 : else
10774 : {
10775 : /* FIXME: We could make this work by pushing a register
10776 : around the addition and comparison. */
10777 0 : sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10778 0 : return INVALID_REGNUM;
10779 : }
10780 : }
10781 : }
10782 :
10783 : /* A SYMBOL_REF for the function which allocates new stackspace for
10784 : -fsplit-stack. */
10785 :
10786 : static GTY(()) rtx split_stack_fn;
10787 :
10788 : /* A SYMBOL_REF for the more stack function when using the large model. */
10789 :
10790 : static GTY(()) rtx split_stack_fn_large;
10791 :
10792 : /* Return location of the stack guard value in the TLS block. */
10793 :
10794 : rtx
10795 259942 : ix86_split_stack_guard (void)
10796 : {
10797 259942 : int offset;
10798 259942 : addr_space_t as = DEFAULT_TLS_SEG_REG;
10799 259942 : rtx r;
10800 :
10801 259942 : gcc_assert (flag_split_stack);
10802 :
10803 : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10804 259942 : offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10805 : #else
10806 : gcc_unreachable ();
10807 : #endif
10808 :
10809 259942 : r = GEN_INT (offset);
10810 357899 : r = gen_const_mem (Pmode, r);
10811 259942 : set_mem_addr_space (r, as);
10812 :
10813 259942 : return r;
10814 : }
10815 :
10816 : /* Handle -fsplit-stack. These are the first instructions in the
10817 : function, even before the regular prologue. */
10818 :
10819 : void
10820 259932 : ix86_expand_split_stack_prologue (void)
10821 : {
10822 259932 : HOST_WIDE_INT allocate;
10823 259932 : unsigned HOST_WIDE_INT args_size;
10824 259932 : rtx_code_label *label;
10825 259932 : rtx limit, current, allocate_rtx, call_fusage;
10826 259932 : rtx_insn *call_insn;
10827 259932 : unsigned int scratch_regno = INVALID_REGNUM;
10828 259932 : rtx scratch_reg = NULL_RTX;
10829 259932 : rtx_code_label *varargs_label = NULL;
10830 259932 : rtx fn;
10831 :
10832 259932 : gcc_assert (flag_split_stack && reload_completed);
10833 :
10834 259932 : ix86_finalize_stack_frame_flags ();
10835 259932 : struct ix86_frame &frame = cfun->machine->frame;
10836 259932 : allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10837 :
10838 : /* This is the label we will branch to if we have enough stack
10839 : space. We expect the basic block reordering pass to reverse this
10840 : branch if optimizing, so that we branch in the unlikely case. */
10841 259932 : label = gen_label_rtx ();
10842 :
10843 : /* We need to compare the stack pointer minus the frame size with
10844 : the stack boundary in the TCB. The stack boundary always gives
10845 : us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10846 : can compare directly. Otherwise we need to do an addition. */
10847 :
10848 259932 : limit = ix86_split_stack_guard ();
10849 :
10850 259932 : if (allocate >= SPLIT_STACK_AVAILABLE
10851 235486 : || flag_force_indirect_call)
10852 : {
10853 24461 : scratch_regno = split_stack_prologue_scratch_regno ();
10854 24461 : if (scratch_regno == INVALID_REGNUM)
10855 0 : return;
10856 : }
10857 :
10858 259932 : if (allocate >= SPLIT_STACK_AVAILABLE)
10859 : {
10860 24446 : rtx offset;
10861 :
10862 : /* We need a scratch register to hold the stack pointer minus
10863 : the required frame size. Since this is the very start of the
10864 : function, the scratch register can be any caller-saved
10865 : register which is not used for parameters. */
10866 24446 : offset = GEN_INT (- allocate);
10867 :
10868 31341 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10869 24446 : if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10870 : {
10871 : /* We don't use gen_add in this case because it will
10872 : want to split to lea, but when not optimizing the insn
10873 : will not be split after this point. */
10874 31341 : emit_insn (gen_rtx_SET (scratch_reg,
10875 : gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10876 : offset)));
10877 : }
10878 : else
10879 : {
10880 0 : emit_move_insn (scratch_reg, offset);
10881 0 : emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10882 : }
10883 : current = scratch_reg;
10884 : }
10885 : else
10886 235486 : current = stack_pointer_rtx;
10887 :
10888 259932 : ix86_expand_branch (GEU, current, limit, label);
10889 259932 : rtx_insn *jump_insn = get_last_insn ();
10890 259932 : JUMP_LABEL (jump_insn) = label;
10891 :
10892 : /* Mark the jump as very likely to be taken. */
10893 259932 : add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10894 :
10895 259932 : if (split_stack_fn == NULL_RTX)
10896 : {
10897 5451 : split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10898 4347 : SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10899 : }
10900 259932 : fn = split_stack_fn;
10901 :
10902 : /* Get more stack space. We pass in the desired stack space and the
10903 : size of the arguments to copy to the new stack. In 32-bit mode
10904 : we push the parameters; __morestack will return on a new stack
10905 : anyhow. In 64-bit mode we pass the parameters in r10 and
10906 : r11. */
10907 259932 : allocate_rtx = GEN_INT (allocate);
10908 259932 : args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10909 259932 : call_fusage = NULL_RTX;
10910 259932 : rtx pop = NULL_RTX;
10911 259932 : if (TARGET_64BIT)
10912 : {
10913 161975 : rtx reg10, reg11;
10914 :
10915 161975 : reg10 = gen_rtx_REG (DImode, R10_REG);
10916 161975 : reg11 = gen_rtx_REG (DImode, R11_REG);
10917 :
10918 : /* If this function uses a static chain, it will be in %r10.
10919 : Preserve it across the call to __morestack. */
10920 161975 : if (DECL_STATIC_CHAIN (cfun->decl))
10921 : {
10922 7505 : rtx rax;
10923 :
10924 7505 : rax = gen_rtx_REG (word_mode, AX_REG);
10925 7505 : emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10926 7505 : use_reg (&call_fusage, rax);
10927 : }
10928 :
10929 161975 : if (flag_force_indirect_call
10930 161960 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10931 : {
10932 16 : HOST_WIDE_INT argval;
10933 :
10934 16 : if (split_stack_fn_large == NULL_RTX)
10935 : {
10936 7 : split_stack_fn_large
10937 7 : = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10938 7 : SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10939 : }
10940 :
10941 16 : fn = split_stack_fn_large;
10942 :
10943 16 : if (ix86_cmodel == CM_LARGE_PIC)
10944 : {
10945 3 : rtx_code_label *label;
10946 3 : rtx x;
10947 :
10948 3 : gcc_assert (Pmode == DImode);
10949 :
10950 3 : label = gen_label_rtx ();
10951 3 : emit_label (label);
10952 3 : LABEL_PRESERVE_P (label) = 1;
10953 3 : emit_insn (gen_set_rip_rex64 (reg10, label));
10954 3 : emit_insn (gen_set_got_offset_rex64 (reg11, label));
10955 3 : emit_insn (gen_add2_insn (reg10, reg11));
10956 3 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10957 3 : x = gen_rtx_CONST (Pmode, x);
10958 3 : emit_move_insn (reg11, x);
10959 3 : x = gen_rtx_PLUS (Pmode, reg10, reg11);
10960 3 : x = gen_const_mem (Pmode, x);
10961 3 : fn = copy_to_suggested_reg (x, reg11, Pmode);
10962 : }
10963 13 : else if (ix86_cmodel == CM_LARGE)
10964 1 : fn = copy_to_suggested_reg (fn, reg11, Pmode);
10965 :
10966 : /* When using the large model we need to load the address
10967 : into a register, and we've run out of registers. So we
10968 : switch to a different calling convention, and we call a
10969 : different function: __morestack_large. We pass the
10970 : argument size in the upper 32 bits of r10 and pass the
10971 : frame size in the lower 32 bits. */
10972 16 : gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10973 16 : gcc_assert ((args_size & 0xffffffff) == args_size);
10974 :
10975 16 : argval = ((args_size << 16) << 16) + allocate;
10976 16 : emit_move_insn (reg10, GEN_INT (argval));
10977 16 : }
10978 : else
10979 : {
10980 161959 : emit_move_insn (reg10, allocate_rtx);
10981 161959 : emit_move_insn (reg11, GEN_INT (args_size));
10982 161959 : use_reg (&call_fusage, reg11);
10983 : }
10984 :
10985 161975 : use_reg (&call_fusage, reg10);
10986 : }
10987 : else
10988 : {
10989 97957 : if (flag_force_indirect_call && flag_pic)
10990 : {
10991 0 : rtx x;
10992 :
10993 0 : gcc_assert (Pmode == SImode);
10994 :
10995 0 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10996 :
10997 0 : emit_insn (gen_set_got (scratch_reg));
10998 0 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10999 : UNSPEC_GOT);
11000 0 : x = gen_rtx_CONST (Pmode, x);
11001 0 : x = gen_rtx_PLUS (Pmode, scratch_reg, x);
11002 0 : x = gen_const_mem (Pmode, x);
11003 0 : fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
11004 : }
11005 :
11006 97957 : rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
11007 195914 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
11008 97957 : insn = emit_insn (gen_push (allocate_rtx));
11009 195914 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
11010 195914 : pop = GEN_INT (2 * UNITS_PER_WORD);
11011 : }
11012 :
11013 259932 : if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
11014 : {
11015 12 : scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
11016 :
11017 12 : if (GET_MODE (fn) != word_mode)
11018 0 : fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
11019 :
11020 12 : fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
11021 : }
11022 :
11023 259932 : call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11024 259932 : GEN_INT (UNITS_PER_WORD), constm1_rtx,
11025 : pop, false);
11026 259932 : add_function_usage_to (call_insn, call_fusage);
11027 259932 : if (!TARGET_64BIT)
11028 97957 : add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
11029 : /* Indicate that this function can't jump to non-local gotos. */
11030 259932 : make_reg_eh_region_note_nothrow_nononlocal (call_insn);
11031 :
11032 : /* In order to make call/return prediction work right, we now need
11033 : to execute a return instruction. See
11034 : libgcc/config/i386/morestack.S for the details on how this works.
11035 :
11036 : For flow purposes gcc must not see this as a return
11037 : instruction--we need control flow to continue at the subsequent
11038 : label. Therefore, we use an unspec. */
11039 259932 : gcc_assert (crtl->args.pops_args < 65536);
11040 259932 : rtx_insn *ret_insn
11041 259932 : = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11042 :
11043 259932 : if ((flag_cf_protection & CF_BRANCH))
11044 : {
11045 : /* Insert ENDBR since __morestack will jump back here via indirect
11046 : call. */
11047 21 : rtx cet_eb = gen_nop_endbr ();
11048 21 : emit_insn_after (cet_eb, ret_insn);
11049 : }
11050 :
11051 : /* If we are in 64-bit mode and this function uses a static chain,
11052 : we saved %r10 in %rax before calling _morestack. */
11053 259932 : if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11054 7505 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11055 : gen_rtx_REG (word_mode, AX_REG));
11056 :
11057 : /* If this function calls va_start, we need to store a pointer to
11058 : the arguments on the old stack, because they may not have been
11059 : all copied to the new stack. At this point the old stack can be
11060 : found at the frame pointer value used by __morestack, because
11061 : __morestack has set that up before calling back to us. Here we
11062 : store that pointer in a scratch register, and in
11063 : ix86_expand_prologue we store the scratch register in a stack
11064 : slot. */
11065 259932 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11066 : {
11067 12 : rtx frame_reg;
11068 12 : int words;
11069 :
11070 12 : scratch_regno = split_stack_prologue_scratch_regno ();
11071 16 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11072 16 : frame_reg = gen_rtx_REG (Pmode, BP_REG);
11073 :
11074 : /* 64-bit:
11075 : fp -> old fp value
11076 : return address within this function
11077 : return address of caller of this function
11078 : stack arguments
11079 : So we add three words to get to the stack arguments.
11080 :
11081 : 32-bit:
11082 : fp -> old fp value
11083 : return address within this function
11084 : first argument to __morestack
11085 : second argument to __morestack
11086 : return address of caller of this function
11087 : stack arguments
11088 : So we add five words to get to the stack arguments.
11089 : */
11090 12 : words = TARGET_64BIT ? 3 : 5;
11091 20 : emit_insn (gen_rtx_SET (scratch_reg,
11092 : plus_constant (Pmode, frame_reg,
11093 : words * UNITS_PER_WORD)));
11094 :
11095 12 : varargs_label = gen_label_rtx ();
11096 12 : emit_jump_insn (gen_jump (varargs_label));
11097 12 : JUMP_LABEL (get_last_insn ()) = varargs_label;
11098 :
11099 12 : emit_barrier ();
11100 : }
11101 :
11102 259932 : emit_label (label);
11103 259932 : LABEL_NUSES (label) = 1;
11104 :
11105 : /* If this function calls va_start, we now have to set the scratch
11106 : register for the case where we do not call __morestack. In this
11107 : case we need to set it based on the stack pointer. */
11108 259932 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11109 : {
11110 20 : emit_insn (gen_rtx_SET (scratch_reg,
11111 : plus_constant (Pmode, stack_pointer_rtx,
11112 : UNITS_PER_WORD)));
11113 :
11114 12 : emit_label (varargs_label);
11115 12 : LABEL_NUSES (varargs_label) = 1;
11116 : }
11117 : }
11118 :
11119 : /* We may have to tell the dataflow pass that the split stack prologue
11120 : is initializing a scratch register. */
11121 :
11122 : static void
11123 15780298 : ix86_live_on_entry (bitmap regs)
11124 : {
11125 15780298 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11126 : {
11127 124 : gcc_assert (flag_split_stack);
11128 124 : bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11129 : }
11130 15780298 : }
11131 :
11132 : /* Extract the parts of an RTL expression that is a valid memory address
11133 : for an instruction. Return false if the structure of the address is
11134 : grossly off. */
11135 :
11136 : bool
11137 4325279305 : ix86_decompose_address (rtx addr, struct ix86_address *out)
11138 : {
11139 4325279305 : rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11140 4325279305 : rtx base_reg, index_reg;
11141 4325279305 : HOST_WIDE_INT scale = 1;
11142 4325279305 : rtx scale_rtx = NULL_RTX;
11143 4325279305 : rtx tmp;
11144 4325279305 : addr_space_t seg = ADDR_SPACE_GENERIC;
11145 :
11146 : /* Allow zero-extended SImode addresses,
11147 : they will be emitted with addr32 prefix. */
11148 4325279305 : if (TARGET_64BIT && GET_MODE (addr) == DImode)
11149 : {
11150 2285274221 : if (GET_CODE (addr) == ZERO_EXTEND
11151 2290237 : && GET_MODE (XEXP (addr, 0)) == SImode)
11152 : {
11153 2194579 : addr = XEXP (addr, 0);
11154 2194579 : if (CONST_INT_P (addr))
11155 : return false;
11156 : }
11157 2283079642 : else if (GET_CODE (addr) == AND)
11158 : {
11159 2773186 : rtx mask = XEXP (addr, 1);
11160 2773186 : rtx shift_val;
11161 :
11162 2773186 : if (const_32bit_mask (mask, DImode)
11163 : /* For ASHIFT inside AND, combine will not generate
11164 : canonical zero-extend. Merge mask for AND and shift_count
11165 : to check if it is canonical zero-extend. */
11166 2773186 : || (CONST_INT_P (mask)
11167 1781341 : && GET_CODE (XEXP (addr, 0)) == ASHIFT
11168 138858 : && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11169 135770 : && ((UINTVAL (mask)
11170 135770 : | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11171 : == HOST_WIDE_INT_UC (0xffffffff))))
11172 : {
11173 81296 : addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11174 81296 : if (addr == NULL_RTX)
11175 : return false;
11176 :
11177 81296 : if (CONST_INT_P (addr))
11178 : return false;
11179 : }
11180 : }
11181 : }
11182 :
11183 : /* Allow SImode subregs of DImode addresses,
11184 : they will be emitted with addr32 prefix. */
11185 4325279305 : if (TARGET_64BIT && GET_MODE (addr) == SImode)
11186 : {
11187 17790579 : if (SUBREG_P (addr)
11188 215679 : && GET_MODE (SUBREG_REG (addr)) == DImode)
11189 : {
11190 194130 : addr = SUBREG_REG (addr);
11191 194130 : if (CONST_INT_P (addr))
11192 : return false;
11193 : }
11194 : }
11195 :
11196 4325279305 : if (REG_P (addr))
11197 : base = addr;
11198 : else if (SUBREG_P (addr))
11199 : {
11200 454510 : if (REG_P (SUBREG_REG (addr)))
11201 : base = addr;
11202 : else
11203 : return false;
11204 : }
11205 : else if (GET_CODE (addr) == PLUS)
11206 : {
11207 : rtx addends[4], op;
11208 : int n = 0, i;
11209 :
11210 : op = addr;
11211 3157802711 : do
11212 : {
11213 3157802711 : if (n >= 4)
11214 642926351 : return false;
11215 3157796553 : addends[n++] = XEXP (op, 1);
11216 3157796553 : op = XEXP (op, 0);
11217 : }
11218 3157796553 : while (GET_CODE (op) == PLUS);
11219 3093832611 : if (n >= 4)
11220 : return false;
11221 3093824416 : addends[n] = op;
11222 :
11223 8062677401 : for (i = n; i >= 0; --i)
11224 : {
11225 5611764983 : op = addends[i];
11226 5611764983 : switch (GET_CODE (op))
11227 : {
11228 61401076 : case MULT:
11229 61401076 : if (index)
11230 : return false;
11231 61359414 : index = XEXP (op, 0);
11232 61359414 : scale_rtx = XEXP (op, 1);
11233 61359414 : break;
11234 :
11235 12917743 : case ASHIFT:
11236 12917743 : if (index)
11237 : return false;
11238 12846600 : index = XEXP (op, 0);
11239 12846600 : tmp = XEXP (op, 1);
11240 12846600 : if (!CONST_INT_P (tmp))
11241 : return false;
11242 12832015 : scale = INTVAL (tmp);
11243 12832015 : if ((unsigned HOST_WIDE_INT) scale > 3)
11244 : return false;
11245 12417538 : scale = 1 << scale;
11246 12417538 : break;
11247 :
11248 1110683 : case ZERO_EXTEND:
11249 1110683 : op = XEXP (op, 0);
11250 1110683 : if (GET_CODE (op) != UNSPEC)
11251 : return false;
11252 : /* FALLTHRU */
11253 :
11254 707661 : case UNSPEC:
11255 707661 : if (XINT (op, 1) == UNSPEC_TP
11256 699449 : && TARGET_TLS_DIRECT_SEG_REFS
11257 699449 : && seg == ADDR_SPACE_GENERIC)
11258 699449 : seg = DEFAULT_TLS_SEG_REG;
11259 : else
11260 : return false;
11261 : break;
11262 :
11263 547244 : case SUBREG:
11264 547244 : if (!REG_P (SUBREG_REG (op)))
11265 : return false;
11266 : /* FALLTHRU */
11267 :
11268 2521482594 : case REG:
11269 2521482594 : if (!base)
11270 : base = op;
11271 82486855 : else if (!index)
11272 : index = op;
11273 : else
11274 : return false;
11275 : break;
11276 :
11277 2373714550 : case CONST:
11278 2373714550 : case CONST_INT:
11279 2373714550 : case SYMBOL_REF:
11280 2373714550 : case LABEL_REF:
11281 2373714550 : if (disp)
11282 : return false;
11283 : disp = op;
11284 : break;
11285 :
11286 : default:
11287 : return false;
11288 : }
11289 : }
11290 : }
11291 : else if (GET_CODE (addr) == MULT)
11292 : {
11293 3610331 : index = XEXP (addr, 0); /* index*scale */
11294 3610331 : scale_rtx = XEXP (addr, 1);
11295 : }
11296 : else if (GET_CODE (addr) == ASHIFT)
11297 : {
11298 : /* We're called for lea too, which implements ashift on occasion. */
11299 3271145 : index = XEXP (addr, 0);
11300 3271145 : tmp = XEXP (addr, 1);
11301 3271145 : if (!CONST_INT_P (tmp))
11302 : return false;
11303 2889140 : scale = INTVAL (tmp);
11304 2889140 : if ((unsigned HOST_WIDE_INT) scale > 3)
11305 : return false;
11306 2140672 : scale = 1 << scale;
11307 : }
11308 : else
11309 : disp = addr; /* displacement */
11310 :
11311 2456663421 : if (index)
11312 : {
11313 152296721 : if (REG_P (index))
11314 : ;
11315 4050531 : else if (SUBREG_P (index)
11316 289253 : && REG_P (SUBREG_REG (index)))
11317 : ;
11318 : else
11319 : return false;
11320 : }
11321 :
11322 : /* Extract the integral value of scale. */
11323 3677402440 : if (scale_rtx)
11324 : {
11325 56550328 : if (!CONST_INT_P (scale_rtx))
11326 : return false;
11327 55957594 : scale = INTVAL (scale_rtx);
11328 : }
11329 :
11330 3676809706 : base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11331 3676809706 : index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11332 :
11333 : /* Avoid useless 0 displacement. */
11334 3676809706 : if (disp == const0_rtx && (base || index))
11335 3676809706 : disp = NULL_RTX;
11336 :
11337 : /* Allow arg pointer and stack pointer as index if there is not scaling. */
11338 2678400036 : if (base_reg && index_reg && scale == 1
11339 3757806173 : && (REGNO (index_reg) == ARG_POINTER_REGNUM
11340 : || REGNO (index_reg) == FRAME_POINTER_REGNUM
11341 : || REGNO (index_reg) == SP_REG))
11342 : {
11343 : std::swap (base, index);
11344 : std::swap (base_reg, index_reg);
11345 : }
11346 :
11347 : /* Special case: %ebp cannot be encoded as a base without a displacement.
11348 : Similarly %r13. */
11349 323115771 : if (!disp && base_reg
11350 3995700383 : && (REGNO (base_reg) == ARG_POINTER_REGNUM
11351 : || REGNO (base_reg) == FRAME_POINTER_REGNUM
11352 : || REGNO (base_reg) == BP_REG
11353 : || REGNO (base_reg) == R13_REG))
11354 : disp = const0_rtx;
11355 :
11356 : /* Special case: on K6, [%esi] makes the instruction vector decoded.
11357 : Avoid this by transforming to [%esi+0].
11358 : Reload calls address legitimization without cfun defined, so we need
11359 : to test cfun for being non-NULL. */
11360 0 : if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11361 0 : && base_reg && !index_reg && !disp
11362 3676809706 : && REGNO (base_reg) == SI_REG)
11363 0 : disp = const0_rtx;
11364 :
11365 : /* Special case: encode reg+reg instead of reg*2. */
11366 3676809706 : if (!base && index && scale == 2)
11367 998409670 : base = index, base_reg = index_reg, scale = 1;
11368 :
11369 : /* Special case: scaling cannot be encoded without base or displacement. */
11370 998409670 : if (!base && !disp && index && scale != 1)
11371 3304238 : disp = const0_rtx;
11372 :
11373 3676809706 : out->base = base;
11374 3676809706 : out->index = index;
11375 3676809706 : out->disp = disp;
11376 3676809706 : out->scale = scale;
11377 3676809706 : out->seg = seg;
11378 :
11379 3676809706 : return true;
11380 : }
11381 :
11382 : /* Return cost of the memory address x.
11383 : For i386, it is better to use a complex address than let gcc copy
11384 : the address into a reg and make a new pseudo. But not if the address
11385 : requires to two regs - that would mean more pseudos with longer
11386 : lifetimes. */
11387 : static int
11388 10783477 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11389 : {
11390 10783477 : struct ix86_address parts;
11391 10783477 : int cost = 1;
11392 10783477 : int ok = ix86_decompose_address (x, &parts);
11393 :
11394 10783477 : gcc_assert (ok);
11395 :
11396 10783477 : if (parts.base && SUBREG_P (parts.base))
11397 500 : parts.base = SUBREG_REG (parts.base);
11398 10783477 : if (parts.index && SUBREG_P (parts.index))
11399 21 : parts.index = SUBREG_REG (parts.index);
11400 :
11401 : /* Attempt to minimize number of registers in the address by increasing
11402 : address cost for each used register. We don't increase address cost
11403 : for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11404 : is not invariant itself it most likely means that base or index is not
11405 : invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11406 : which is not profitable for x86. */
11407 10783477 : if (parts.base
11408 9375237 : && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11409 19866594 : && (current_pass->type == GIMPLE_PASS
11410 2748058 : || !pic_offset_table_rtx
11411 132665 : || !REG_P (parts.base)
11412 132665 : || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11413 : cost++;
11414 :
11415 10783477 : if (parts.index
11416 5170130 : && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11417 15939452 : && (current_pass->type == GIMPLE_PASS
11418 669497 : || !pic_offset_table_rtx
11419 57276 : || !REG_P (parts.index)
11420 57276 : || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11421 5154780 : cost++;
11422 :
11423 : /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11424 : since it's predecode logic can't detect the length of instructions
11425 : and it degenerates to vector decoded. Increase cost of such
11426 : addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11427 : to split such addresses or even refuse such addresses at all.
11428 :
11429 : Following addressing modes are affected:
11430 : [base+scale*index]
11431 : [scale*index+disp]
11432 : [base+index]
11433 :
11434 : The first and last case may be avoidable by explicitly coding the zero in
11435 : memory address, but I don't have AMD-K6 machine handy to check this
11436 : theory. */
11437 :
11438 10783477 : if (TARGET_CPU_P (K6)
11439 0 : && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11440 0 : || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11441 0 : || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11442 0 : cost += 10;
11443 :
11444 10783477 : return cost;
11445 : }
11446 :
11447 : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
11448 :
11449 : bool
11450 1181140 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
11451 : unsigned int align,
11452 : enum by_pieces_operation op,
11453 : bool speed_p)
11454 : {
11455 : /* Return true when we are currently expanding memcpy/memset epilogue
11456 : with move_by_pieces or store_by_pieces. */
11457 1181140 : if (cfun->machine->by_pieces_in_use)
11458 : return true;
11459 :
11460 1179037 : return default_use_by_pieces_infrastructure_p (size, align, op,
11461 1179037 : speed_p);
11462 : }
11463 :
11464 : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11465 : this is used for to form addresses to local data when -fPIC is in
11466 : use. */
11467 :
11468 : static bool
11469 0 : darwin_local_data_pic (rtx disp)
11470 : {
11471 0 : return (GET_CODE (disp) == UNSPEC
11472 0 : && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11473 : }
11474 :
11475 : /* True if the function symbol operand X should be loaded from GOT.
11476 : If CALL_P is true, X is a call operand.
11477 :
11478 : NB: -mno-direct-extern-access doesn't force load from GOT for
11479 : call.
11480 :
11481 : NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11482 : statements, since a PIC register could not be available at the
11483 : call site. */
11484 :
11485 : bool
11486 1842821850 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
11487 : {
11488 96335015 : return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11489 : && !TARGET_PECOFF && !TARGET_MACHO
11490 1839967055 : && (!flag_pic || this_is_asm_operands)
11491 1819740314 : && ix86_cmodel != CM_LARGE
11492 1819734285 : && ix86_cmodel != CM_LARGE_PIC
11493 1819734284 : && SYMBOL_REF_P (x)
11494 1819734282 : && ((!call_p
11495 1814333208 : && (!ix86_direct_extern_access
11496 1814330938 : || (SYMBOL_REF_DECL (x)
11497 1632308842 : && lookup_attribute ("nodirect_extern_access",
11498 1632308842 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11499 1819731558 : || (SYMBOL_REF_FUNCTION_P (x)
11500 684972132 : && (!flag_plt
11501 684967721 : || (SYMBOL_REF_DECL (x)
11502 684967721 : && lookup_attribute ("noplt",
11503 684967721 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11504 1842829384 : && !SYMBOL_REF_LOCAL_P (x));
11505 : }
11506 :
11507 : /* Determine if a given RTX is a valid constant. We already know this
11508 : satisfies CONSTANT_P. */
11509 :
11510 : static bool
11511 1554420781 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
11512 : {
11513 1554420781 : switch (GET_CODE (x))
11514 : {
11515 138529064 : case CONST:
11516 138529064 : x = XEXP (x, 0);
11517 :
11518 138529064 : if (GET_CODE (x) == PLUS)
11519 : {
11520 138412768 : if (!CONST_INT_P (XEXP (x, 1)))
11521 : return false;
11522 138412768 : x = XEXP (x, 0);
11523 : }
11524 :
11525 138529064 : if (TARGET_MACHO && darwin_local_data_pic (x))
11526 : return true;
11527 :
11528 : /* Only some unspecs are valid as "constants". */
11529 138529064 : if (GET_CODE (x) == UNSPEC)
11530 494253 : switch (XINT (x, 1))
11531 : {
11532 20688 : case UNSPEC_GOT:
11533 20688 : case UNSPEC_GOTOFF:
11534 20688 : case UNSPEC_PLTOFF:
11535 20688 : return TARGET_64BIT;
11536 473202 : case UNSPEC_TPOFF:
11537 473202 : case UNSPEC_NTPOFF:
11538 473202 : x = XVECEXP (x, 0, 0);
11539 473202 : return (SYMBOL_REF_P (x)
11540 473202 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11541 275 : case UNSPEC_DTPOFF:
11542 275 : x = XVECEXP (x, 0, 0);
11543 275 : return (SYMBOL_REF_P (x)
11544 275 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11545 0 : case UNSPEC_SECREL32:
11546 0 : x = XVECEXP (x, 0, 0);
11547 0 : return SYMBOL_REF_P (x);
11548 : default:
11549 : return false;
11550 : }
11551 :
11552 : /* We must have drilled down to a symbol. */
11553 138034811 : if (LABEL_REF_P (x))
11554 : return true;
11555 138030101 : if (!SYMBOL_REF_P (x))
11556 : return false;
11557 : /* FALLTHRU */
11558 :
11559 924014638 : case SYMBOL_REF:
11560 : /* TLS symbols are never valid. */
11561 924014638 : if (SYMBOL_REF_TLS_MODEL (x))
11562 : return false;
11563 :
11564 : /* DLLIMPORT symbols are never valid. */
11565 923910849 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11566 : && SYMBOL_REF_DLLIMPORT_P (x))
11567 : return false;
11568 :
11569 : #if TARGET_MACHO
11570 : /* mdynamic-no-pic */
11571 : if (MACHO_DYNAMIC_NO_PIC_P)
11572 : return machopic_symbol_defined_p (x);
11573 : #endif
11574 :
11575 : /* External function address should be loaded
11576 : via the GOT slot to avoid PLT. */
11577 923910849 : if (ix86_force_load_from_GOT_p (x))
11578 : return false;
11579 :
11580 : break;
11581 :
11582 608890209 : CASE_CONST_SCALAR_INT:
11583 608890209 : if (ix86_endbr_immediate_operand (x, VOIDmode))
11584 : return false;
11585 :
11586 608890008 : switch (mode)
11587 : {
11588 1510869 : case E_TImode:
11589 1510869 : if (TARGET_64BIT)
11590 : return true;
11591 : /* FALLTHRU */
11592 22253 : case E_OImode:
11593 22253 : case E_XImode:
11594 22253 : if (!standard_sse_constant_p (x, mode)
11595 35955 : && GET_MODE_SIZE (TARGET_AVX512F
11596 : ? XImode
11597 : : (TARGET_AVX
11598 : ? OImode
11599 : : (TARGET_SSE2
11600 13702 : ? TImode : DImode))) < GET_MODE_SIZE (mode))
11601 : return false;
11602 : default:
11603 : break;
11604 : }
11605 : break;
11606 :
11607 8631883 : case CONST_VECTOR:
11608 8631883 : if (!standard_sse_constant_p (x, mode))
11609 : return false;
11610 : break;
11611 :
11612 7689561 : case CONST_DOUBLE:
11613 7689561 : if (mode == E_BFmode)
11614 : return false;
11615 :
11616 : default:
11617 : break;
11618 : }
11619 :
11620 : /* Otherwise we handle everything else in the move patterns. */
11621 : return true;
11622 : }
11623 :
11624 : /* Determine if it's legal to put X into the constant pool. This
11625 : is not possible for the address of thread-local symbols, which
11626 : is checked above. */
11627 :
11628 : static bool
11629 61965965 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11630 : {
11631 : /* We can put any immediate constant in memory. */
11632 61965965 : switch (GET_CODE (x))
11633 : {
11634 : CASE_CONST_ANY:
11635 : return false;
11636 :
11637 1786299 : default:
11638 1786299 : break;
11639 : }
11640 :
11641 1786299 : return !ix86_legitimate_constant_p (mode, x);
11642 : }
11643 :
11644 : /* Return a unique alias set for the GOT. */
11645 :
11646 : alias_set_type
11647 188094 : ix86_GOT_alias_set (void)
11648 : {
11649 188094 : static alias_set_type set = -1;
11650 188094 : if (set == -1)
11651 2929 : set = new_alias_set ();
11652 188094 : return set;
11653 : }
11654 :
11655 : /* Nonzero if the constant value X is a legitimate general operand
11656 : when generating PIC code. It is given that flag_pic is on and
11657 : that X satisfies CONSTANT_P. */
11658 :
11659 : bool
11660 124767552 : legitimate_pic_operand_p (rtx x)
11661 : {
11662 124767552 : rtx inner;
11663 :
11664 124767552 : switch (GET_CODE (x))
11665 : {
11666 2493607 : case CONST:
11667 2493607 : inner = XEXP (x, 0);
11668 2493607 : if (GET_CODE (inner) == PLUS
11669 353028 : && CONST_INT_P (XEXP (inner, 1)))
11670 353028 : inner = XEXP (inner, 0);
11671 :
11672 : /* Only some unspecs are valid as "constants". */
11673 2493607 : if (GET_CODE (inner) == UNSPEC)
11674 2247810 : switch (XINT (inner, 1))
11675 : {
11676 2187157 : case UNSPEC_GOT:
11677 2187157 : case UNSPEC_GOTOFF:
11678 2187157 : case UNSPEC_PLTOFF:
11679 2187157 : return TARGET_64BIT;
11680 0 : case UNSPEC_TPOFF:
11681 0 : x = XVECEXP (inner, 0, 0);
11682 0 : return (SYMBOL_REF_P (x)
11683 0 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11684 0 : case UNSPEC_SECREL32:
11685 0 : x = XVECEXP (inner, 0, 0);
11686 0 : return SYMBOL_REF_P (x);
11687 0 : case UNSPEC_MACHOPIC_OFFSET:
11688 0 : return legitimate_pic_address_disp_p (x);
11689 : default:
11690 : return false;
11691 : }
11692 : /* FALLTHRU */
11693 :
11694 6934853 : case SYMBOL_REF:
11695 6934853 : case LABEL_REF:
11696 6934853 : return legitimate_pic_address_disp_p (x);
11697 :
11698 : default:
11699 : return true;
11700 : }
11701 : }
11702 :
11703 : /* Determine if a given CONST RTX is a valid memory displacement
11704 : in PIC mode. */
11705 :
11706 : bool
11707 64777509 : legitimate_pic_address_disp_p (rtx disp)
11708 : {
11709 64777509 : bool saw_plus;
11710 :
11711 : /* In 64bit mode we can allow direct addresses of symbols and labels
11712 : when they are not dynamic symbols. */
11713 64777509 : if (TARGET_64BIT)
11714 : {
11715 39784200 : rtx op0 = disp, op1;
11716 :
11717 39784200 : switch (GET_CODE (disp))
11718 : {
11719 : case LABEL_REF:
11720 : return true;
11721 :
11722 10910779 : case CONST:
11723 10910779 : if (GET_CODE (XEXP (disp, 0)) != PLUS)
11724 : break;
11725 1170351 : op0 = XEXP (XEXP (disp, 0), 0);
11726 1170351 : op1 = XEXP (XEXP (disp, 0), 1);
11727 1170351 : if (!CONST_INT_P (op1))
11728 : break;
11729 1170351 : if (GET_CODE (op0) == UNSPEC
11730 296 : && (XINT (op0, 1) == UNSPEC_DTPOFF
11731 296 : || XINT (op0, 1) == UNSPEC_NTPOFF)
11732 1170647 : && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11733 : return true;
11734 1170055 : if (INTVAL (op1) >= 16*1024*1024
11735 1170055 : || INTVAL (op1) < -16*1024*1024)
11736 : break;
11737 1169967 : if (LABEL_REF_P (op0))
11738 : return true;
11739 1169967 : if (GET_CODE (op0) == CONST
11740 0 : && GET_CODE (XEXP (op0, 0)) == UNSPEC
11741 0 : && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11742 : return true;
11743 1169967 : if (GET_CODE (op0) == UNSPEC
11744 0 : && XINT (op0, 1) == UNSPEC_PCREL)
11745 : return true;
11746 1169967 : if (!SYMBOL_REF_P (op0))
11747 : break;
11748 : /* FALLTHRU */
11749 :
11750 29829277 : case SYMBOL_REF:
11751 : /* TLS references should always be enclosed in UNSPEC.
11752 : The dllimported symbol needs always to be resolved. */
11753 29829277 : if (SYMBOL_REF_TLS_MODEL (op0)
11754 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11755 : return false;
11756 :
11757 29673748 : if (TARGET_PECOFF)
11758 : {
11759 : #if TARGET_PECOFF
11760 : if (is_imported_p (op0))
11761 : return true;
11762 : #endif
11763 :
11764 : if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11765 : break;
11766 :
11767 : /* Non-external-weak function symbols need to be resolved only
11768 : for the large model. Non-external symbols don't need to be
11769 : resolved for large and medium models. For the small model,
11770 : we don't need to resolve anything here. */
11771 : if ((ix86_cmodel != CM_LARGE_PIC
11772 : && SYMBOL_REF_FUNCTION_P (op0)
11773 : && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11774 : || !SYMBOL_REF_EXTERNAL_P (op0)
11775 : || ix86_cmodel == CM_SMALL_PIC)
11776 : return true;
11777 : }
11778 29673748 : else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11779 29673744 : && (SYMBOL_REF_LOCAL_P (op0)
11780 18070635 : || ((ix86_direct_extern_access
11781 35969960 : && !(SYMBOL_REF_DECL (op0)
11782 17899488 : && lookup_attribute ("nodirect_extern_access",
11783 17899488 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11784 : && HAVE_LD_PIE_COPYRELOC
11785 18070309 : && flag_pie
11786 34047 : && !SYMBOL_REF_WEAK (op0)
11787 33659 : && !SYMBOL_REF_FUNCTION_P (op0)))
11788 41280590 : && ix86_cmodel != CM_LARGE_PIC)
11789 : return true;
11790 : break;
11791 :
11792 : default:
11793 : break;
11794 : }
11795 : }
11796 52804437 : if (GET_CODE (disp) != CONST)
11797 : return false;
11798 14926454 : disp = XEXP (disp, 0);
11799 :
11800 14926454 : if (TARGET_64BIT)
11801 : {
11802 : /* We are unsafe to allow PLUS expressions. This limit allowed distance
11803 : of GOT tables. We should not need these anyway. */
11804 9792395 : if (GET_CODE (disp) != UNSPEC
11805 9740428 : || (XINT (disp, 1) != UNSPEC_GOTPCREL
11806 9740428 : && XINT (disp, 1) != UNSPEC_GOTOFF
11807 : && XINT (disp, 1) != UNSPEC_PCREL
11808 : && XINT (disp, 1) != UNSPEC_PLTOFF))
11809 : return false;
11810 :
11811 9740428 : if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11812 9740428 : && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
11813 : return false;
11814 : return true;
11815 : }
11816 :
11817 5134059 : saw_plus = false;
11818 5134059 : if (GET_CODE (disp) == PLUS)
11819 : {
11820 585582 : if (!CONST_INT_P (XEXP (disp, 1)))
11821 : return false;
11822 585582 : disp = XEXP (disp, 0);
11823 585582 : saw_plus = true;
11824 : }
11825 :
11826 5134059 : if (TARGET_MACHO && darwin_local_data_pic (disp))
11827 : return true;
11828 :
11829 5134059 : if (GET_CODE (disp) != UNSPEC)
11830 : return false;
11831 :
11832 4970536 : switch (XINT (disp, 1))
11833 : {
11834 2258793 : case UNSPEC_GOT:
11835 2258793 : if (saw_plus)
11836 : return false;
11837 : /* We need to check for both symbols and labels because VxWorks loads
11838 : text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11839 : details. */
11840 2258792 : return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11841 2258792 : || LABEL_REF_P (XVECEXP (disp, 0, 0)));
11842 2711743 : case UNSPEC_GOTOFF:
11843 : /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11844 : While ABI specify also 32bit relocation but we don't produce it in
11845 : small PIC model at all. */
11846 2711743 : if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11847 2711743 : || LABEL_REF_P (XVECEXP (disp, 0, 0)))
11848 : && !TARGET_64BIT)
11849 5423486 : return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11850 : return false;
11851 0 : case UNSPEC_GOTTPOFF:
11852 0 : case UNSPEC_GOTNTPOFF:
11853 0 : case UNSPEC_INDNTPOFF:
11854 0 : if (saw_plus)
11855 : return false;
11856 0 : disp = XVECEXP (disp, 0, 0);
11857 0 : return (SYMBOL_REF_P (disp)
11858 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11859 0 : case UNSPEC_NTPOFF:
11860 0 : disp = XVECEXP (disp, 0, 0);
11861 0 : return (SYMBOL_REF_P (disp)
11862 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11863 0 : case UNSPEC_DTPOFF:
11864 0 : disp = XVECEXP (disp, 0, 0);
11865 0 : return (SYMBOL_REF_P (disp)
11866 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11867 0 : case UNSPEC_SECREL32:
11868 0 : disp = XVECEXP (disp, 0, 0);
11869 0 : return SYMBOL_REF_P (disp);
11870 : }
11871 :
11872 : return false;
11873 : }
11874 :
11875 : /* Determine if op is suitable RTX for an address register.
11876 : Return naked register if a register or a register subreg is
11877 : found, otherwise return NULL_RTX. */
11878 :
11879 : static rtx
11880 1368794230 : ix86_validate_address_register (rtx op)
11881 : {
11882 1368794230 : machine_mode mode = GET_MODE (op);
11883 :
11884 : /* Only SImode or DImode registers can form the address. */
11885 1368794230 : if (mode != SImode && mode != DImode)
11886 : return NULL_RTX;
11887 :
11888 1368787395 : if (REG_P (op))
11889 : return op;
11890 727528 : else if (SUBREG_P (op))
11891 : {
11892 727528 : rtx reg = SUBREG_REG (op);
11893 :
11894 727528 : if (!REG_P (reg))
11895 : return NULL_RTX;
11896 :
11897 727528 : mode = GET_MODE (reg);
11898 :
11899 : /* Don't allow SUBREGs that span more than a word. It can
11900 : lead to spill failures when the register is one word out
11901 : of a two word structure. */
11902 1500642 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11903 : return NULL_RTX;
11904 :
11905 : /* Allow only SUBREGs of non-eliminable hard registers. */
11906 250514 : if (register_no_elim_operand (reg, mode))
11907 : return reg;
11908 : }
11909 :
11910 : /* Op is not a register. */
11911 : return NULL_RTX;
11912 : }
11913 :
11914 : /* Determine which memory address register set insn can use. */
11915 :
11916 : static enum attr_addr
11917 255216485 : ix86_memory_address_reg_class (rtx_insn* insn)
11918 : {
11919 : /* LRA can do some initialization with NULL insn,
11920 : return maximum register class in this case. */
11921 255216485 : enum attr_addr addr_rclass = ADDR_GPR32;
11922 :
11923 255216485 : if (!insn)
11924 : return addr_rclass;
11925 :
11926 72606370 : if (asm_noperands (PATTERN (insn)) >= 0
11927 72606370 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11928 70404 : return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11929 :
11930 : /* Return maximum register class for unrecognized instructions. */
11931 72571168 : if (INSN_CODE (insn) < 0)
11932 : return addr_rclass;
11933 :
11934 : /* Try to recognize the insn before calling get_attr_addr.
11935 : Save current recog_data and current alternative. */
11936 72571168 : struct recog_data_d saved_recog_data = recog_data;
11937 72571168 : int saved_alternative = which_alternative;
11938 :
11939 : /* Update recog_data for processing of alternatives. */
11940 72571168 : extract_insn_cached (insn);
11941 :
11942 : /* If current alternative is not set, loop throught enabled
11943 : alternatives and get the most limited register class. */
11944 72571168 : if (saved_alternative == -1)
11945 : {
11946 72571168 : alternative_mask enabled = get_enabled_alternatives (insn);
11947 :
11948 1251336713 : for (int i = 0; i < recog_data.n_alternatives; i++)
11949 : {
11950 1178765545 : if (!TEST_BIT (enabled, i))
11951 348377663 : continue;
11952 :
11953 830387882 : which_alternative = i;
11954 830387882 : addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11955 : }
11956 : }
11957 : else
11958 : {
11959 0 : which_alternative = saved_alternative;
11960 0 : addr_rclass = get_attr_addr (insn);
11961 : }
11962 :
11963 72571168 : recog_data = saved_recog_data;
11964 72571168 : which_alternative = saved_alternative;
11965 :
11966 72571168 : return addr_rclass;
11967 : }
11968 :
11969 : /* Return memory address register class insn can use. */
11970 :
11971 : enum reg_class
11972 214517208 : ix86_insn_base_reg_class (rtx_insn* insn)
11973 : {
11974 214517208 : switch (ix86_memory_address_reg_class (insn))
11975 : {
11976 : case ADDR_GPR8:
11977 : return LEGACY_GENERAL_REGS;
11978 : case ADDR_GPR16:
11979 : return GENERAL_GPR16;
11980 : case ADDR_GPR32:
11981 : break;
11982 0 : default:
11983 0 : gcc_unreachable ();
11984 : }
11985 :
11986 : return BASE_REG_CLASS;
11987 : }
11988 :
11989 : bool
11990 1287243 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11991 : {
11992 1287243 : switch (ix86_memory_address_reg_class (insn))
11993 : {
11994 0 : case ADDR_GPR8:
11995 0 : return LEGACY_INT_REGNO_P (regno);
11996 0 : case ADDR_GPR16:
11997 0 : return GENERAL_GPR16_REGNO_P (regno);
11998 1287243 : case ADDR_GPR32:
11999 1287243 : break;
12000 0 : default:
12001 0 : gcc_unreachable ();
12002 : }
12003 :
12004 1287243 : return GENERAL_REGNO_P (regno);
12005 : }
12006 :
12007 : enum reg_class
12008 39412034 : ix86_insn_index_reg_class (rtx_insn* insn)
12009 : {
12010 39412034 : switch (ix86_memory_address_reg_class (insn))
12011 : {
12012 : case ADDR_GPR8:
12013 : return LEGACY_INDEX_REGS;
12014 : case ADDR_GPR16:
12015 : return INDEX_GPR16;
12016 : case ADDR_GPR32:
12017 : break;
12018 0 : default:
12019 0 : gcc_unreachable ();
12020 : }
12021 :
12022 : return INDEX_REG_CLASS;
12023 : }
12024 :
12025 : /* Recognizes RTL expressions that are valid memory addresses for an
12026 : instruction. The MODE argument is the machine mode for the MEM
12027 : expression that wants to use this address.
12028 :
12029 : It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12030 : convert common non-canonical forms to canonical form so that they will
12031 : be recognized. */
12032 :
12033 : static bool
12034 2241872420 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
12035 : code_helper = ERROR_MARK)
12036 : {
12037 2241872420 : struct ix86_address parts;
12038 2241872420 : rtx base, index, disp;
12039 2241872420 : HOST_WIDE_INT scale;
12040 2241872420 : addr_space_t seg;
12041 :
12042 2241872420 : if (ix86_decompose_address (addr, &parts) == 0)
12043 : /* Decomposition failed. */
12044 : return false;
12045 :
12046 2230225497 : base = parts.base;
12047 2230225497 : index = parts.index;
12048 2230225497 : disp = parts.disp;
12049 2230225497 : scale = parts.scale;
12050 2230225497 : seg = parts.seg;
12051 :
12052 : /* Validate base register. */
12053 2230225497 : if (base)
12054 : {
12055 1281558036 : rtx reg = ix86_validate_address_register (base);
12056 :
12057 1281558036 : if (reg == NULL_RTX)
12058 : return false;
12059 :
12060 1281124335 : unsigned int regno = REGNO (reg);
12061 1281124335 : if ((strict && !REGNO_OK_FOR_BASE_P (regno))
12062 1276676914 : || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
12063 : /* Base is not valid. */
12064 : return false;
12065 : }
12066 :
12067 : /* Validate index register. */
12068 2228404341 : if (index)
12069 : {
12070 87236194 : rtx reg = ix86_validate_address_register (index);
12071 :
12072 87236194 : if (reg == NULL_RTX)
12073 : return false;
12074 :
12075 87185870 : unsigned int regno = REGNO (reg);
12076 87185870 : if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
12077 87177294 : || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
12078 : /* Index is not valid. */
12079 : return false;
12080 : }
12081 :
12082 : /* Index and base should have the same mode. */
12083 2228352443 : if (base && index
12084 77577472 : && GET_MODE (base) != GET_MODE (index))
12085 : return false;
12086 :
12087 : /* Address override works only on the (%reg) part of %fs:(%reg). */
12088 2228055848 : if (seg != ADDR_SPACE_GENERIC
12089 2228055848 : && ((base && GET_MODE (base) != word_mode)
12090 339693 : || (index && GET_MODE (index) != word_mode)))
12091 : return false;
12092 :
12093 : /* Validate scale factor. */
12094 2228055819 : if (scale != 1)
12095 : {
12096 39869477 : if (!index)
12097 : /* Scale without index. */
12098 : return false;
12099 :
12100 39869477 : if (scale != 2 && scale != 4 && scale != 8)
12101 : /* Scale is not a valid multiplier. */
12102 : return false;
12103 : }
12104 :
12105 : /* Validate displacement. */
12106 2224927129 : if (disp)
12107 : {
12108 1998669334 : if (ix86_endbr_immediate_operand (disp, VOIDmode))
12109 : return false;
12110 :
12111 1998669291 : if (GET_CODE (disp) == CONST
12112 148963897 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
12113 15359871 : && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12114 15359871 : switch (XINT (XEXP (disp, 0), 1))
12115 : {
12116 : /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
12117 : when used. While ABI specify also 32bit relocations, we
12118 : don't produce them at all and use IP relative instead.
12119 : Allow GOT in 32bit mode for both PIC and non-PIC if symbol
12120 : should be loaded via GOT. */
12121 2258851 : case UNSPEC_GOT:
12122 2258851 : if (!TARGET_64BIT
12123 2258851 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12124 0 : goto is_legitimate_pic;
12125 : /* FALLTHRU */
12126 4548593 : case UNSPEC_GOTOFF:
12127 4548593 : gcc_assert (flag_pic);
12128 4548593 : if (!TARGET_64BIT)
12129 4548477 : goto is_legitimate_pic;
12130 :
12131 : /* 64bit address unspec. */
12132 : return false;
12133 :
12134 9740400 : case UNSPEC_GOTPCREL:
12135 9740400 : if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12136 2534 : goto is_legitimate_pic;
12137 : /* FALLTHRU */
12138 9737866 : case UNSPEC_PCREL:
12139 9737866 : gcc_assert (flag_pic);
12140 9737866 : goto is_legitimate_pic;
12141 :
12142 : case UNSPEC_GOTTPOFF:
12143 : case UNSPEC_GOTNTPOFF:
12144 : case UNSPEC_INDNTPOFF:
12145 : case UNSPEC_NTPOFF:
12146 : case UNSPEC_DTPOFF:
12147 : case UNSPEC_SECREL32:
12148 : break;
12149 :
12150 : default:
12151 : /* Invalid address unspec. */
12152 : return false;
12153 : }
12154 :
12155 1260459612 : else if (SYMBOLIC_CONST (disp)
12156 2116913446 : && (flag_pic
12157 : #if TARGET_MACHO
12158 : || (MACHOPIC_INDIRECT
12159 : && !machopic_operand_p (disp))
12160 : #endif
12161 : ))
12162 : {
12163 :
12164 57682263 : is_legitimate_pic:
12165 57682263 : if (TARGET_64BIT && (index || base))
12166 : {
12167 : /* foo@dtpoff(%rX) is ok. */
12168 36327 : if (GET_CODE (disp) != CONST
12169 7027 : || GET_CODE (XEXP (disp, 0)) != PLUS
12170 7027 : || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12171 4631 : || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12172 4631 : || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12173 4631 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
12174 0 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12175 : /* Non-constant pic memory reference. */
12176 : return false;
12177 : }
12178 57645936 : else if ((!TARGET_MACHO || flag_pic)
12179 57645936 : && ! legitimate_pic_address_disp_p (disp))
12180 : /* Displacement is an invalid pic construct. */
12181 : return false;
12182 : #if TARGET_MACHO
12183 : else if (MACHO_DYNAMIC_NO_PIC_P
12184 : && !ix86_legitimate_constant_p (Pmode, disp))
12185 : /* displacment must be referenced via non_lazy_pointer */
12186 : return false;
12187 : #endif
12188 :
12189 : /* This code used to verify that a symbolic pic displacement
12190 : includes the pic_offset_table_rtx register.
12191 :
12192 : While this is good idea, unfortunately these constructs may
12193 : be created by "adds using lea" optimization for incorrect
12194 : code like:
12195 :
12196 : int a;
12197 : int foo(int i)
12198 : {
12199 : return *(&a+i);
12200 : }
12201 :
12202 : This code is nonsensical, but results in addressing
12203 : GOT table with pic_offset_table_rtx base. We can't
12204 : just refuse it easily, since it gets matched by
12205 : "addsi3" pattern, that later gets split to lea in the
12206 : case output register differs from input. While this
12207 : can be handled by separate addsi pattern for this case
12208 : that never results in lea, this seems to be easier and
12209 : correct fix for crash to disable this test. */
12210 : }
12211 1939916034 : else if (!LABEL_REF_P (disp)
12212 1939701451 : && !CONST_INT_P (disp)
12213 870532309 : && (GET_CODE (disp) != CONST
12214 135048207 : || !ix86_legitimate_constant_p (Pmode, disp))
12215 2678365366 : && (!SYMBOL_REF_P (disp)
12216 745895684 : || !ix86_legitimate_constant_p (Pmode, disp)))
12217 : /* Displacement is not constant. */
12218 57764805 : return false;
12219 1882151229 : else if (TARGET_64BIT
12220 1882151229 : && !x86_64_immediate_operand (disp, VOIDmode))
12221 : /* Displacement is out of range. */
12222 : return false;
12223 : /* In x32 mode, constant addresses are sign extended to 64bit, so
12224 : we have to prevent addresses from 0x80000000 to 0xffffffff. */
12225 45025 : else if (TARGET_X32 && !(index || base)
12226 17348 : && CONST_INT_P (disp)
12227 1881630360 : && val_signbit_known_set_p (SImode, INTVAL (disp)))
12228 : return false;
12229 : }
12230 :
12231 : /* Everything looks valid. */
12232 : return true;
12233 : }
12234 :
12235 : /* Determine if a given RTX is a valid constant address. */
12236 :
12237 : bool
12238 2788630664 : constant_address_p (rtx x)
12239 : {
12240 2868866897 : return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12241 : }
12242 :
12243 :
12244 : /* Return a legitimate reference for ORIG (an address) using the
12245 : register REG. If REG is 0, a new pseudo is generated.
12246 :
12247 : There are two types of references that must be handled:
12248 :
12249 : 1. Global data references must load the address from the GOT, via
12250 : the PIC reg. An insn is emitted to do this load, and the reg is
12251 : returned.
12252 :
12253 : 2. Static data references, constant pool addresses, and code labels
12254 : compute the address as an offset from the GOT, whose base is in
12255 : the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12256 : differentiate them from global data objects. The returned
12257 : address is the PIC reg + an unspec constant.
12258 :
12259 : TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12260 : reg also appears in the address. */
12261 :
12262 : rtx
12263 396198 : legitimize_pic_address (rtx orig, rtx reg)
12264 : {
12265 396198 : rtx addr = orig;
12266 396198 : rtx new_rtx = orig;
12267 :
12268 : #if TARGET_MACHO
12269 : if (TARGET_MACHO && !TARGET_64BIT)
12270 : {
12271 : if (reg == 0)
12272 : reg = gen_reg_rtx (Pmode);
12273 : /* Use the generic Mach-O PIC machinery. */
12274 : return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12275 : }
12276 : #endif
12277 :
12278 396198 : if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12279 : {
12280 : #if TARGET_PECOFF
12281 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12282 : if (tmp)
12283 : return tmp;
12284 : #endif
12285 : }
12286 :
12287 396198 : if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12288 : new_rtx = addr;
12289 300727 : else if ((!TARGET_64BIT
12290 101249 : || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12291 : && !TARGET_PECOFF
12292 500286 : && gotoff_operand (addr, Pmode))
12293 : {
12294 : /* This symbol may be referenced via a displacement
12295 : from the PIC base address (@GOTOFF). */
12296 96623 : if (GET_CODE (addr) == CONST)
12297 2993 : addr = XEXP (addr, 0);
12298 :
12299 96623 : if (GET_CODE (addr) == PLUS)
12300 : {
12301 5986 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12302 : UNSPEC_GOTOFF);
12303 5986 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12304 : }
12305 : else
12306 187247 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12307 :
12308 193233 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12309 :
12310 96623 : if (TARGET_64BIT)
12311 13 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12312 :
12313 96623 : if (reg != 0)
12314 : {
12315 3 : gcc_assert (REG_P (reg));
12316 3 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12317 : new_rtx, reg, 1, OPTAB_DIRECT);
12318 : }
12319 : else
12320 193230 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12321 : }
12322 381400 : else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
12323 : /* We can't always use @GOTOFF for text labels
12324 : on VxWorks, see gotoff_operand. */
12325 204104 : || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
12326 : {
12327 : #if TARGET_PECOFF
12328 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12329 : if (tmp)
12330 : return tmp;
12331 : #endif
12332 :
12333 : /* For x64 PE-COFF there is no GOT table,
12334 : so we use address directly. */
12335 177293 : if (TARGET_64BIT && TARGET_PECOFF)
12336 : {
12337 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12338 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12339 : }
12340 177293 : else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12341 : {
12342 94117 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12343 : UNSPEC_GOTPCREL);
12344 94117 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12345 94117 : new_rtx = gen_const_mem (Pmode, new_rtx);
12346 94114 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12347 : }
12348 : else
12349 : {
12350 : /* This symbol must be referenced via a load
12351 : from the Global Offset Table (@GOT). */
12352 166335 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12353 166335 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12354 :
12355 83179 : if (TARGET_64BIT)
12356 23 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12357 :
12358 83179 : if (reg != 0)
12359 : {
12360 0 : gcc_assert (REG_P (reg));
12361 0 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12362 : new_rtx, reg, 1, OPTAB_DIRECT);
12363 : }
12364 : else
12365 166335 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12366 :
12367 166335 : new_rtx = gen_const_mem (Pmode, new_rtx);
12368 83179 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12369 : }
12370 :
12371 260452 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12372 : }
12373 : else
12374 : {
12375 26811 : if (CONST_INT_P (addr)
12376 26811 : && !x86_64_immediate_operand (addr, VOIDmode))
12377 8 : new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12378 26803 : else if (GET_CODE (addr) == CONST)
12379 : {
12380 16589 : addr = XEXP (addr, 0);
12381 :
12382 : /* We must match stuff we generate before. Assume the only
12383 : unspecs that can get here are ours. Not that we could do
12384 : anything with them anyway.... */
12385 16589 : if (GET_CODE (addr) == UNSPEC
12386 8839 : || (GET_CODE (addr) == PLUS
12387 8839 : && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12388 : return orig;
12389 6717 : gcc_assert (GET_CODE (addr) == PLUS);
12390 : }
12391 :
12392 16939 : if (GET_CODE (addr) == PLUS)
12393 : {
12394 8470 : rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12395 :
12396 : /* Check first to see if this is a constant
12397 : offset from a @GOTOFF symbol reference. */
12398 8470 : if (!TARGET_PECOFF
12399 13390 : && gotoff_operand (op0, Pmode)
12400 8470 : && CONST_INT_P (op1))
12401 : {
12402 4 : if (!TARGET_64BIT)
12403 : {
12404 0 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12405 : UNSPEC_GOTOFF);
12406 0 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12407 0 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12408 :
12409 0 : if (reg != 0)
12410 : {
12411 0 : gcc_assert (REG_P (reg));
12412 0 : new_rtx = expand_simple_binop (Pmode, PLUS,
12413 : pic_offset_table_rtx,
12414 : new_rtx, reg, 1,
12415 : OPTAB_DIRECT);
12416 : }
12417 : else
12418 0 : new_rtx
12419 0 : = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12420 : }
12421 : else
12422 : {
12423 4 : if (INTVAL (op1) < -16*1024*1024
12424 4 : || INTVAL (op1) >= 16*1024*1024)
12425 : {
12426 4 : if (!x86_64_immediate_operand (op1, Pmode))
12427 4 : op1 = force_reg (Pmode, op1);
12428 :
12429 4 : new_rtx
12430 4 : = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12431 : }
12432 : }
12433 : }
12434 : else
12435 : {
12436 8466 : rtx base = legitimize_pic_address (op0, reg);
12437 8466 : machine_mode mode = GET_MODE (base);
12438 8466 : new_rtx
12439 8466 : = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12440 :
12441 8466 : if (CONST_INT_P (new_rtx))
12442 : {
12443 6705 : if (INTVAL (new_rtx) < -16*1024*1024
12444 6705 : || INTVAL (new_rtx) >= 16*1024*1024)
12445 : {
12446 0 : if (!x86_64_immediate_operand (new_rtx, mode))
12447 0 : new_rtx = force_reg (mode, new_rtx);
12448 :
12449 0 : new_rtx
12450 0 : = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12451 : }
12452 : else
12453 6705 : new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12454 : }
12455 : else
12456 : {
12457 : /* For %rip addressing, we have to use
12458 : just disp32, not base nor index. */
12459 1761 : if (TARGET_64BIT
12460 100 : && (SYMBOL_REF_P (base)
12461 100 : || LABEL_REF_P (base)))
12462 7 : base = force_reg (mode, base);
12463 1761 : if (GET_CODE (new_rtx) == PLUS
12464 1640 : && CONSTANT_P (XEXP (new_rtx, 1)))
12465 : {
12466 1636 : base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12467 1636 : new_rtx = XEXP (new_rtx, 1);
12468 : }
12469 1761 : new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12470 : }
12471 : }
12472 : }
12473 : }
12474 : return new_rtx;
12475 : }
12476 :
12477 : /* Load the thread pointer. If TO_REG is true, force it into a register. */
12478 :
12479 : static rtx
12480 24438 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
12481 : {
12482 24438 : rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12483 :
12484 24438 : if (GET_MODE (tp) != tp_mode)
12485 : {
12486 11 : gcc_assert (GET_MODE (tp) == SImode);
12487 11 : gcc_assert (tp_mode == DImode);
12488 :
12489 11 : tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12490 : }
12491 :
12492 24438 : if (to_reg)
12493 8122 : tp = copy_to_mode_reg (tp_mode, tp);
12494 :
12495 24438 : return tp;
12496 : }
12497 :
12498 : /* Construct the SYMBOL_REF for the _tls_index symbol. */
12499 :
12500 : static GTY(()) rtx ix86_tls_index_symbol;
12501 :
12502 : static rtx
12503 0 : ix86_tls_index (void)
12504 : {
12505 0 : if (!ix86_tls_index_symbol)
12506 0 : ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12507 :
12508 0 : if (flag_pic)
12509 0 : return gen_rtx_CONST (Pmode,
12510 : gen_rtx_UNSPEC (Pmode,
12511 : gen_rtvec (1, ix86_tls_index_symbol),
12512 : UNSPEC_PCREL));
12513 : else
12514 0 : return ix86_tls_index_symbol;
12515 : }
12516 :
12517 : /* Construct the SYMBOL_REF for the tls_get_addr function. */
12518 :
12519 : static GTY(()) rtx ix86_tls_symbol;
12520 :
12521 : rtx
12522 6713 : ix86_tls_get_addr (void)
12523 : {
12524 6713 : if (cfun->machine->call_saved_registers
12525 6713 : == TYPE_NO_CALLER_SAVED_REGISTERS)
12526 : {
12527 : /* __tls_get_addr doesn't preserve vector registers. When a
12528 : function with no_caller_saved_registers attribute calls
12529 : __tls_get_addr, YMM and ZMM registers will be clobbered.
12530 : Issue an error and suggest -mtls-dialect=gnu2 in this case. */
12531 3 : if (cfun->machine->func_type == TYPE_NORMAL)
12532 1 : error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
12533 : " with the %<no_caller_saved_registers%> attribute"));
12534 : else
12535 3 : error (cfun->machine->func_type == TYPE_EXCEPTION
12536 : ? G_("%<-mtls-dialect=gnu2%> must be used with an"
12537 : " exception service routine")
12538 : : G_("%<-mtls-dialect=gnu2%> must be used with an"
12539 : " interrupt service routine"));
12540 : /* Don't issue the same error twice. */
12541 3 : cfun->machine->func_type = TYPE_NORMAL;
12542 3 : cfun->machine->call_saved_registers
12543 3 : = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
12544 : }
12545 :
12546 6713 : if (!ix86_tls_symbol)
12547 : {
12548 204 : const char *sym
12549 241 : = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12550 241 : ? "___tls_get_addr" : "__tls_get_addr");
12551 :
12552 278 : ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12553 : }
12554 :
12555 6713 : if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12556 : {
12557 2 : rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12558 : UNSPEC_PLTOFF);
12559 2 : return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12560 : gen_rtx_CONST (Pmode, unspec));
12561 : }
12562 :
12563 6711 : return ix86_tls_symbol;
12564 : }
12565 :
12566 : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12567 :
12568 : static GTY(()) rtx ix86_tls_module_base_symbol;
12569 :
12570 : rtx
12571 87 : ix86_tls_module_base (void)
12572 : {
12573 87 : if (!ix86_tls_module_base_symbol)
12574 : {
12575 10 : ix86_tls_module_base_symbol
12576 10 : = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12577 :
12578 10 : SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12579 10 : |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12580 : }
12581 :
12582 87 : return ix86_tls_module_base_symbol;
12583 : }
12584 :
12585 : /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12586 : false if we expect this to be used for a memory address and true if
12587 : we expect to load the address into a register. */
12588 :
12589 : rtx
12590 30859 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12591 : {
12592 30859 : rtx dest, base, off;
12593 30859 : rtx pic = NULL_RTX, tp = NULL_RTX;
12594 30859 : machine_mode tp_mode = Pmode;
12595 30859 : int type;
12596 :
12597 : /* Windows implements a single form of TLS. */
12598 30859 : if (TARGET_WIN32_TLS)
12599 : {
12600 : /* Load the 32-bit index. */
12601 : rtx ind = gen_const_mem (SImode, ix86_tls_index ());
12602 : set_mem_alias_set (ind, GOT_ALIAS_SET);
12603 : if (TARGET_64BIT)
12604 : ind = convert_to_mode (Pmode, ind, 1);
12605 : ind = force_reg (Pmode, ind);
12606 :
12607 : /* Add it to the thread pointer and load the base. */
12608 : tp = get_thread_pointer (Pmode, true);
12609 : rtx addr = gen_rtx_PLUS (Pmode, tp,
12610 : gen_rtx_MULT (Pmode, ind,
12611 : GEN_INT (UNITS_PER_WORD)));
12612 : base = gen_const_mem (Pmode, addr);
12613 : set_mem_alias_set (base, GOT_ALIAS_SET);
12614 :
12615 : /* Add the 32-bit section-relative offset to the base. */
12616 : base = force_reg (Pmode, base);
12617 : off = gen_rtx_CONST (Pmode,
12618 : gen_rtx_UNSPEC (SImode,
12619 : gen_rtvec (1, x),
12620 : UNSPEC_SECREL32));
12621 : return gen_rtx_PLUS (Pmode, base, off);
12622 : }
12623 :
12624 : /* Fall back to global dynamic model if tool chain cannot support local
12625 : dynamic. */
12626 30859 : if (TARGET_SUN_TLS && !TARGET_64BIT
12627 : && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12628 : && model == TLS_MODEL_LOCAL_DYNAMIC)
12629 : model = TLS_MODEL_GLOBAL_DYNAMIC;
12630 :
12631 30859 : switch (model)
12632 : {
12633 6114 : case TLS_MODEL_GLOBAL_DYNAMIC:
12634 6114 : if (!TARGET_64BIT)
12635 : {
12636 1929 : if (flag_pic && !TARGET_PECOFF)
12637 1929 : pic = pic_offset_table_rtx;
12638 : else
12639 : {
12640 0 : pic = gen_reg_rtx (Pmode);
12641 0 : emit_insn (gen_set_got (pic));
12642 : }
12643 : }
12644 :
12645 6114 : if (TARGET_GNU2_TLS)
12646 : {
12647 53 : dest = gen_reg_rtx (ptr_mode);
12648 53 : if (TARGET_64BIT)
12649 53 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12650 : else
12651 0 : emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12652 :
12653 53 : tp = get_thread_pointer (ptr_mode, true);
12654 53 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12655 61 : if (GET_MODE (dest) != Pmode)
12656 6 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12657 61 : dest = force_reg (Pmode, dest);
12658 :
12659 61 : if (GET_MODE (x) != Pmode)
12660 3 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12661 :
12662 53 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12663 : }
12664 : else
12665 : {
12666 6061 : rtx caddr = ix86_tls_get_addr ();
12667 :
12668 7990 : dest = gen_reg_rtx (Pmode);
12669 6061 : if (TARGET_64BIT)
12670 : {
12671 4132 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12672 4132 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12673 4132 : rtx_insn *insns;
12674 :
12675 4132 : start_sequence ();
12676 4132 : emit_call_insn
12677 4132 : (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
12678 4132 : insns = end_sequence ();
12679 :
12680 4132 : if (GET_MODE (x) != Pmode)
12681 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12682 :
12683 4132 : RTL_CONST_CALL_P (insns) = 1;
12684 4132 : emit_libcall_block (insns, dest, rax, x);
12685 : }
12686 : else
12687 1929 : emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12688 : }
12689 : break;
12690 :
12691 384 : case TLS_MODEL_LOCAL_DYNAMIC:
12692 384 : if (!TARGET_64BIT)
12693 : {
12694 92 : if (flag_pic)
12695 92 : pic = pic_offset_table_rtx;
12696 : else
12697 : {
12698 0 : pic = gen_reg_rtx (Pmode);
12699 0 : emit_insn (gen_set_got (pic));
12700 : }
12701 : }
12702 :
12703 384 : if (TARGET_GNU2_TLS)
12704 : {
12705 24 : rtx tmp = ix86_tls_module_base ();
12706 :
12707 24 : base = gen_reg_rtx (ptr_mode);
12708 24 : if (TARGET_64BIT)
12709 24 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12710 : else
12711 0 : emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12712 :
12713 24 : tp = get_thread_pointer (ptr_mode, true);
12714 30 : if (GET_MODE (base) != Pmode)
12715 2 : base = gen_rtx_ZERO_EXTEND (Pmode, base);
12716 30 : base = force_reg (Pmode, base);
12717 : }
12718 : else
12719 : {
12720 360 : rtx caddr = ix86_tls_get_addr ();
12721 :
12722 452 : base = gen_reg_rtx (Pmode);
12723 360 : if (TARGET_64BIT)
12724 : {
12725 268 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12726 268 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12727 268 : rtx_insn *insns;
12728 268 : rtx eqv;
12729 :
12730 268 : start_sequence ();
12731 268 : emit_call_insn
12732 268 : (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
12733 268 : insns = end_sequence ();
12734 :
12735 : /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12736 : share the LD_BASE result with other LD model accesses. */
12737 268 : eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12738 : UNSPEC_TLS_LD_BASE);
12739 :
12740 268 : RTL_CONST_CALL_P (insns) = 1;
12741 268 : emit_libcall_block (insns, base, rax, eqv);
12742 : }
12743 : else
12744 92 : emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12745 : }
12746 :
12747 482 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12748 482 : off = gen_rtx_CONST (Pmode, off);
12749 :
12750 580 : dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12751 :
12752 384 : if (TARGET_GNU2_TLS)
12753 : {
12754 30 : if (GET_MODE (tp) != Pmode)
12755 : {
12756 2 : dest = lowpart_subreg (ptr_mode, dest, Pmode);
12757 2 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12758 2 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12759 : }
12760 : else
12761 22 : dest = gen_rtx_PLUS (Pmode, tp, dest);
12762 30 : dest = force_reg (Pmode, dest);
12763 :
12764 30 : if (GET_MODE (x) != Pmode)
12765 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12766 :
12767 24 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12768 : }
12769 : break;
12770 :
12771 10781 : case TLS_MODEL_INITIAL_EXEC:
12772 10781 : if (TARGET_64BIT)
12773 : {
12774 : /* Generate DImode references to avoid %fs:(%reg32)
12775 : problems and linker IE->LE relaxation bug. */
12776 : tp_mode = DImode;
12777 : pic = NULL;
12778 : type = UNSPEC_GOTNTPOFF;
12779 : }
12780 761 : else if (flag_pic)
12781 : {
12782 760 : pic = pic_offset_table_rtx;
12783 760 : type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12784 : }
12785 1 : else if (!TARGET_ANY_GNU_TLS)
12786 : {
12787 0 : pic = gen_reg_rtx (Pmode);
12788 0 : emit_insn (gen_set_got (pic));
12789 0 : type = UNSPEC_GOTTPOFF;
12790 : }
12791 : else
12792 : {
12793 : pic = NULL;
12794 : type = UNSPEC_INDNTPOFF;
12795 : }
12796 :
12797 10781 : off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12798 10781 : off = gen_rtx_CONST (tp_mode, off);
12799 10781 : if (pic)
12800 760 : off = gen_rtx_PLUS (tp_mode, pic, off);
12801 10781 : off = gen_const_mem (tp_mode, off);
12802 10781 : set_mem_alias_set (off, GOT_ALIAS_SET);
12803 :
12804 10781 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12805 : {
12806 10781 : base = get_thread_pointer (tp_mode,
12807 10781 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12808 10781 : off = force_reg (tp_mode, off);
12809 10781 : dest = gen_rtx_PLUS (tp_mode, base, off);
12810 11546 : if (tp_mode != Pmode)
12811 4 : dest = convert_to_mode (Pmode, dest, 1);
12812 : }
12813 : else
12814 : {
12815 0 : base = get_thread_pointer (Pmode, true);
12816 0 : dest = gen_reg_rtx (Pmode);
12817 0 : emit_insn (gen_sub3_insn (dest, base, off));
12818 : }
12819 : break;
12820 :
12821 13580 : case TLS_MODEL_LOCAL_EXEC:
12822 27928 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12823 : (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12824 : ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12825 14348 : off = gen_rtx_CONST (Pmode, off);
12826 :
12827 13580 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12828 : {
12829 14348 : base = get_thread_pointer (Pmode,
12830 13580 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12831 14348 : return gen_rtx_PLUS (Pmode, base, off);
12832 : }
12833 : else
12834 : {
12835 0 : base = get_thread_pointer (Pmode, true);
12836 0 : dest = gen_reg_rtx (Pmode);
12837 0 : emit_insn (gen_sub3_insn (dest, base, off));
12838 : }
12839 0 : break;
12840 :
12841 0 : default:
12842 0 : gcc_unreachable ();
12843 : }
12844 :
12845 : return dest;
12846 : }
12847 :
12848 : /* Return true if the TLS address requires insn using integer registers.
12849 : It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12850 : MOV instructions, refer to PR103275. */
12851 : bool
12852 15211546 : ix86_gpr_tls_address_pattern_p (rtx mem)
12853 : {
12854 15211546 : gcc_assert (MEM_P (mem));
12855 :
12856 15211546 : rtx addr = XEXP (mem, 0);
12857 15211546 : subrtx_var_iterator::array_type array;
12858 52967298 : FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12859 : {
12860 37763284 : rtx op = *iter;
12861 37763284 : if (GET_CODE (op) == UNSPEC)
12862 200818 : switch (XINT (op, 1))
12863 : {
12864 : case UNSPEC_GOTNTPOFF:
12865 7532 : return true;
12866 0 : case UNSPEC_TPOFF:
12867 0 : if (!TARGET_64BIT)
12868 : return true;
12869 : break;
12870 : default:
12871 : break;
12872 : }
12873 : }
12874 :
12875 15204014 : return false;
12876 15211546 : }
12877 :
12878 : /* Return true if OP refers to a TLS address. */
12879 : bool
12880 233034471 : ix86_tls_address_pattern_p (rtx op)
12881 : {
12882 233034471 : subrtx_var_iterator::array_type array;
12883 1387202509 : FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12884 : {
12885 1154186046 : rtx op = *iter;
12886 1154186046 : if (MEM_P (op))
12887 : {
12888 105146516 : rtx *x = &XEXP (op, 0);
12889 166415963 : while (GET_CODE (*x) == PLUS)
12890 : {
12891 : int i;
12892 183826372 : for (i = 0; i < 2; i++)
12893 : {
12894 122556925 : rtx u = XEXP (*x, i);
12895 122556925 : if (GET_CODE (u) == ZERO_EXTEND)
12896 132610 : u = XEXP (u, 0);
12897 122556925 : if (GET_CODE (u) == UNSPEC
12898 18040 : && XINT (u, 1) == UNSPEC_TP)
12899 18008 : return true;
12900 : }
12901 61269447 : x = &XEXP (*x, 0);
12902 : }
12903 :
12904 105128508 : iter.skip_subrtxes ();
12905 : }
12906 : }
12907 :
12908 233016463 : return false;
12909 233034471 : }
12910 :
12911 : /* Rewrite *LOC so that it refers to a default TLS address space. */
12912 : static void
12913 18008 : ix86_rewrite_tls_address_1 (rtx *loc)
12914 : {
12915 18008 : subrtx_ptr_iterator::array_type array;
12916 53307 : FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12917 : {
12918 53307 : rtx *loc = *iter;
12919 53307 : if (MEM_P (*loc))
12920 : {
12921 18195 : rtx addr = XEXP (*loc, 0);
12922 18195 : rtx *x = &addr;
12923 23031 : while (GET_CODE (*x) == PLUS)
12924 : {
12925 : int i;
12926 32539 : for (i = 0; i < 2; i++)
12927 : {
12928 27703 : rtx u = XEXP (*x, i);
12929 27703 : if (GET_CODE (u) == ZERO_EXTEND)
12930 19 : u = XEXP (u, 0);
12931 27703 : if (GET_CODE (u) == UNSPEC
12932 18008 : && XINT (u, 1) == UNSPEC_TP)
12933 : {
12934 : /* NB: Since address override only applies to the
12935 : (reg32) part in fs:(reg32), return if address
12936 : override is used. */
12937 19635 : if (Pmode != word_mode
12938 18008 : && REG_P (XEXP (*x, 1 - i)))
12939 18008 : return;
12940 :
12941 18006 : addr_space_t as = DEFAULT_TLS_SEG_REG;
12942 :
12943 18006 : *x = XEXP (*x, 1 - i);
12944 :
12945 18006 : *loc = replace_equiv_address_nv (*loc, addr, true);
12946 18006 : set_mem_addr_space (*loc, as);
12947 18006 : return;
12948 : }
12949 : }
12950 4836 : x = &XEXP (*x, 0);
12951 : }
12952 :
12953 187 : iter.skip_subrtxes ();
12954 : }
12955 : }
12956 18008 : }
12957 :
12958 : /* Rewrite instruction pattern involvning TLS address
12959 : so that it refers to a default TLS address space. */
12960 : rtx
12961 18008 : ix86_rewrite_tls_address (rtx pattern)
12962 : {
12963 18008 : pattern = copy_insn (pattern);
12964 18008 : ix86_rewrite_tls_address_1 (&pattern);
12965 18008 : return pattern;
12966 : }
12967 :
12968 : /* Try machine-dependent ways of modifying an illegitimate address
12969 : to be legitimate. If we find one, return the new, valid address.
12970 : This macro is used in only one place: `memory_address' in explow.cc.
12971 :
12972 : OLDX is the address as it was before break_out_memory_refs was called.
12973 : In some cases it is useful to look at this to decide what needs to be done.
12974 :
12975 : It is always safe for this macro to do nothing. It exists to recognize
12976 : opportunities to optimize the output.
12977 :
12978 : For the 80386, we handle X+REG by loading X into a register R and
12979 : using R+REG. R will go in a general reg and indexing will be used.
12980 : However, if REG is a broken-out memory address or multiplication,
12981 : nothing needs to be done because REG can certainly go in a general reg.
12982 :
12983 : When -fpic is used, special handling is needed for symbolic references.
12984 : See comments by legitimize_pic_address in i386.cc for details. */
12985 :
12986 : static rtx
12987 672030 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12988 : {
12989 672030 : bool changed = false;
12990 672030 : unsigned log;
12991 :
12992 672030 : log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
12993 151700 : if (log)
12994 20729 : return legitimize_tls_address (x, (enum tls_model) log, false);
12995 651301 : if (GET_CODE (x) == CONST
12996 508 : && GET_CODE (XEXP (x, 0)) == PLUS
12997 508 : && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12998 651809 : && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12999 : {
13000 4 : rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13001 : (enum tls_model) log, false);
13002 5 : return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13003 : }
13004 :
13005 651297 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13006 : {
13007 : #if TARGET_PECOFF
13008 : rtx tmp = legitimize_pe_coff_symbol (x, true);
13009 : if (tmp)
13010 : return tmp;
13011 : #endif
13012 : }
13013 :
13014 651297 : if (flag_pic && SYMBOLIC_CONST (x))
13015 131354 : return legitimize_pic_address (x, 0);
13016 :
13017 : #if TARGET_MACHO
13018 : if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13019 : return machopic_indirect_data_reference (x, 0);
13020 : #endif
13021 :
13022 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13023 519943 : if (GET_CODE (x) == ASHIFT
13024 0 : && CONST_INT_P (XEXP (x, 1))
13025 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13026 : {
13027 0 : changed = true;
13028 0 : log = INTVAL (XEXP (x, 1));
13029 0 : x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13030 : GEN_INT (1 << log));
13031 : }
13032 :
13033 519943 : if (GET_CODE (x) == PLUS)
13034 : {
13035 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13036 :
13037 183413 : if (GET_CODE (XEXP (x, 0)) == ASHIFT
13038 515 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13039 515 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13040 : {
13041 515 : changed = true;
13042 515 : log = INTVAL (XEXP (XEXP (x, 0), 1));
13043 1501 : XEXP (x, 0) = gen_rtx_MULT (Pmode,
13044 : force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13045 : GEN_INT (1 << log));
13046 : }
13047 :
13048 183413 : if (GET_CODE (XEXP (x, 1)) == ASHIFT
13049 0 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13050 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13051 : {
13052 0 : changed = true;
13053 0 : log = INTVAL (XEXP (XEXP (x, 1), 1));
13054 0 : XEXP (x, 1) = gen_rtx_MULT (Pmode,
13055 : force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13056 : GEN_INT (1 << log));
13057 : }
13058 :
13059 : /* Put multiply first if it isn't already. */
13060 183413 : if (GET_CODE (XEXP (x, 1)) == MULT)
13061 : {
13062 0 : std::swap (XEXP (x, 0), XEXP (x, 1));
13063 0 : changed = true;
13064 : }
13065 :
13066 : /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13067 : into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13068 : created by virtual register instantiation, register elimination, and
13069 : similar optimizations. */
13070 183413 : if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13071 : {
13072 9782 : changed = true;
13073 15390 : x = gen_rtx_PLUS (Pmode,
13074 : gen_rtx_PLUS (Pmode, XEXP (x, 0),
13075 : XEXP (XEXP (x, 1), 0)),
13076 : XEXP (XEXP (x, 1), 1));
13077 : }
13078 :
13079 : /* Canonicalize
13080 : (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13081 : into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13082 173631 : else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13083 108449 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13084 51407 : && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13085 0 : && CONSTANT_P (XEXP (x, 1)))
13086 : {
13087 0 : rtx constant;
13088 0 : rtx other = NULL_RTX;
13089 :
13090 0 : if (CONST_INT_P (XEXP (x, 1)))
13091 : {
13092 0 : constant = XEXP (x, 1);
13093 0 : other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13094 : }
13095 0 : else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13096 : {
13097 : constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13098 : other = XEXP (x, 1);
13099 : }
13100 : else
13101 : constant = 0;
13102 :
13103 0 : if (constant)
13104 : {
13105 0 : changed = true;
13106 0 : x = gen_rtx_PLUS (Pmode,
13107 : gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13108 : XEXP (XEXP (XEXP (x, 0), 1), 0)),
13109 : plus_constant (Pmode, other,
13110 : INTVAL (constant)));
13111 : }
13112 : }
13113 :
13114 183413 : if (changed && ix86_legitimate_address_p (mode, x, false))
13115 9818 : return x;
13116 :
13117 173595 : if (GET_CODE (XEXP (x, 0)) == MULT)
13118 : {
13119 19988 : changed = true;
13120 19988 : XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13121 : }
13122 :
13123 173595 : if (GET_CODE (XEXP (x, 1)) == MULT)
13124 : {
13125 0 : changed = true;
13126 0 : XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13127 : }
13128 :
13129 173595 : if (changed
13130 19996 : && REG_P (XEXP (x, 1))
13131 16450 : && REG_P (XEXP (x, 0)))
13132 : return x;
13133 :
13134 157145 : if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13135 : {
13136 1753 : changed = true;
13137 1753 : x = legitimize_pic_address (x, 0);
13138 : }
13139 :
13140 157145 : if (changed && ix86_legitimate_address_p (mode, x, false))
13141 3823 : return x;
13142 :
13143 153322 : if (REG_P (XEXP (x, 0)))
13144 : {
13145 47150 : rtx temp = gen_reg_rtx (Pmode);
13146 44410 : rtx val = force_operand (XEXP (x, 1), temp);
13147 44410 : if (val != temp)
13148 : {
13149 36167 : val = convert_to_mode (Pmode, val, 1);
13150 35881 : emit_move_insn (temp, val);
13151 : }
13152 :
13153 44410 : XEXP (x, 1) = temp;
13154 44410 : return x;
13155 : }
13156 :
13157 108912 : else if (REG_P (XEXP (x, 1)))
13158 : {
13159 3578 : rtx temp = gen_reg_rtx (Pmode);
13160 2842 : rtx val = force_operand (XEXP (x, 0), temp);
13161 2842 : if (val != temp)
13162 : {
13163 0 : val = convert_to_mode (Pmode, val, 1);
13164 0 : emit_move_insn (temp, val);
13165 : }
13166 :
13167 2842 : XEXP (x, 0) = temp;
13168 2842 : return x;
13169 : }
13170 : }
13171 :
13172 : return x;
13173 : }
13174 :
13175 : /* Print an integer constant expression in assembler syntax. Addition
13176 : and subtraction are the only arithmetic that may appear in these
13177 : expressions. FILE is the stdio stream to write to, X is the rtx, and
13178 : CODE is the operand print code from the output string. */
13179 :
13180 : static void
13181 3662359 : output_pic_addr_const (FILE *file, rtx x, int code)
13182 : {
13183 3891643 : char buf[256];
13184 :
13185 3891643 : switch (GET_CODE (x))
13186 : {
13187 0 : case PC:
13188 0 : gcc_assert (flag_pic);
13189 0 : putc ('.', file);
13190 0 : break;
13191 :
13192 862247 : case SYMBOL_REF:
13193 862247 : if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
13194 862247 : output_addr_const (file, x);
13195 : else
13196 : {
13197 : const char *name = XSTR (x, 0);
13198 :
13199 : /* Mark the decl as referenced so that cgraph will
13200 : output the function. */
13201 : if (SYMBOL_REF_DECL (x))
13202 : mark_decl_referenced (SYMBOL_REF_DECL (x));
13203 :
13204 : #if TARGET_MACHO
13205 : if (MACHOPIC_INDIRECT
13206 : && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13207 : name = machopic_indirection_name (x, /*stub_p=*/true);
13208 : #endif
13209 : assemble_name (file, name);
13210 : }
13211 862247 : if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13212 862247 : && code == 'P' && ix86_call_use_plt_p (x))
13213 392568 : fputs ("@PLT", file);
13214 : break;
13215 :
13216 2472 : case LABEL_REF:
13217 2472 : x = XEXP (x, 0);
13218 : /* FALLTHRU */
13219 2472 : case CODE_LABEL:
13220 2472 : ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13221 2472 : assemble_name (asm_out_file, buf);
13222 2472 : break;
13223 :
13224 2596919 : CASE_CONST_SCALAR_INT:
13225 2596919 : output_addr_const (file, x);
13226 2596919 : break;
13227 :
13228 210300 : case CONST:
13229 : /* This used to output parentheses around the expression,
13230 : but that does not work on the 386 (either ATT or BSD assembler). */
13231 210300 : output_pic_addr_const (file, XEXP (x, 0), code);
13232 210300 : break;
13233 :
13234 0 : case CONST_DOUBLE:
13235 : /* We can't handle floating point constants;
13236 : TARGET_PRINT_OPERAND must handle them. */
13237 0 : output_operand_lossage ("floating constant misused");
13238 0 : break;
13239 :
13240 18984 : case PLUS:
13241 : /* Some assemblers need integer constants to appear first. */
13242 18984 : if (CONST_INT_P (XEXP (x, 0)))
13243 : {
13244 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13245 0 : putc ('+', file);
13246 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13247 : }
13248 : else
13249 : {
13250 18984 : gcc_assert (CONST_INT_P (XEXP (x, 1)));
13251 18984 : output_pic_addr_const (file, XEXP (x, 1), code);
13252 18984 : putc ('+', file);
13253 18984 : output_pic_addr_const (file, XEXP (x, 0), code);
13254 : }
13255 : break;
13256 :
13257 0 : case MINUS:
13258 0 : if (!TARGET_MACHO)
13259 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13260 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13261 0 : putc ('-', file);
13262 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13263 0 : if (!TARGET_MACHO)
13264 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13265 0 : break;
13266 :
13267 200721 : case UNSPEC:
13268 200721 : gcc_assert (XVECLEN (x, 0) == 1);
13269 200721 : output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13270 200721 : switch (XINT (x, 1))
13271 : {
13272 43150 : case UNSPEC_GOT:
13273 43150 : fputs ("@GOT", file);
13274 43150 : break;
13275 77837 : case UNSPEC_GOTOFF:
13276 77837 : fputs ("@GOTOFF", file);
13277 77837 : break;
13278 33 : case UNSPEC_PLTOFF:
13279 33 : fputs ("@PLTOFF", file);
13280 33 : break;
13281 0 : case UNSPEC_PCREL:
13282 0 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13283 : "(%rip)" : "[rip]", file);
13284 0 : break;
13285 75517 : case UNSPEC_GOTPCREL:
13286 75517 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13287 : "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13288 75517 : break;
13289 0 : case UNSPEC_GOTTPOFF:
13290 : /* FIXME: This might be @TPOFF in Sun ld too. */
13291 0 : fputs ("@gottpoff", file);
13292 0 : break;
13293 0 : case UNSPEC_TPOFF:
13294 0 : fputs ("@tpoff", file);
13295 0 : break;
13296 1459 : case UNSPEC_NTPOFF:
13297 1459 : if (TARGET_64BIT)
13298 1459 : fputs ("@tpoff", file);
13299 : else
13300 0 : fputs ("@ntpoff", file);
13301 : break;
13302 315 : case UNSPEC_DTPOFF:
13303 315 : fputs ("@dtpoff", file);
13304 315 : break;
13305 2410 : case UNSPEC_GOTNTPOFF:
13306 2410 : if (TARGET_64BIT)
13307 2147 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13308 : "@gottpoff(%rip)": "@gottpoff[rip]", file);
13309 : else
13310 263 : fputs ("@gotntpoff", file);
13311 : break;
13312 0 : case UNSPEC_INDNTPOFF:
13313 0 : fputs ("@indntpoff", file);
13314 0 : break;
13315 0 : case UNSPEC_SECREL32:
13316 0 : fputs ("@secrel32", file);
13317 0 : break;
13318 : #if TARGET_MACHO
13319 : case UNSPEC_MACHOPIC_OFFSET:
13320 : putc ('-', file);
13321 : machopic_output_function_base_name (file);
13322 : break;
13323 : #endif
13324 0 : default:
13325 0 : output_operand_lossage ("invalid UNSPEC as operand");
13326 0 : break;
13327 : }
13328 : break;
13329 :
13330 0 : default:
13331 0 : output_operand_lossage ("invalid expression as operand");
13332 : }
13333 3662359 : }
13334 :
13335 : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13336 : We need to emit DTP-relative relocations. */
13337 :
13338 : static void ATTRIBUTE_UNUSED
13339 667 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13340 : {
13341 667 : fputs (ASM_LONG, file);
13342 667 : output_addr_const (file, x);
13343 : #if TARGET_WIN32_TLS
13344 : fputs ("@secrel32", file);
13345 : #else
13346 667 : fputs ("@dtpoff", file);
13347 : #endif
13348 667 : switch (size)
13349 : {
13350 : case 4:
13351 : break;
13352 548 : case 8:
13353 548 : fputs (", 0", file);
13354 548 : break;
13355 0 : default:
13356 0 : gcc_unreachable ();
13357 : }
13358 667 : }
13359 :
13360 : /* Return true if X is a representation of the PIC register. This copes
13361 : with calls from ix86_find_base_term, where the register might have
13362 : been replaced by a cselib value. */
13363 :
13364 : static bool
13365 26862218 : ix86_pic_register_p (rtx x)
13366 : {
13367 26862218 : if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13368 748607 : return (pic_offset_table_rtx
13369 748607 : && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13370 26113611 : else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13371 : return true;
13372 26110666 : else if (!REG_P (x))
13373 : return false;
13374 25505935 : else if (pic_offset_table_rtx)
13375 : {
13376 25486051 : if (REGNO (x) == REGNO (pic_offset_table_rtx))
13377 : return true;
13378 403271 : if (HARD_REGISTER_P (x)
13379 382942 : && !HARD_REGISTER_P (pic_offset_table_rtx)
13380 786213 : && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13381 : return true;
13382 : return false;
13383 : }
13384 : else
13385 19884 : return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13386 : }
13387 :
13388 : /* Helper function for ix86_delegitimize_address.
13389 : Attempt to delegitimize TLS local-exec accesses. */
13390 :
13391 : static rtx
13392 3499248796 : ix86_delegitimize_tls_address (rtx orig_x)
13393 : {
13394 3499248796 : rtx x = orig_x, unspec;
13395 3499248796 : struct ix86_address addr;
13396 :
13397 3499248796 : if (!TARGET_TLS_DIRECT_SEG_REFS)
13398 : return orig_x;
13399 3499248796 : if (MEM_P (x))
13400 42860816 : x = XEXP (x, 0);
13401 5026261501 : if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13402 : return orig_x;
13403 1684098128 : if (ix86_decompose_address (x, &addr) == 0
13404 1942846752 : || addr.seg != DEFAULT_TLS_SEG_REG
13405 276759 : || addr.disp == NULL_RTX
13406 1684323421 : || GET_CODE (addr.disp) != CONST)
13407 : return orig_x;
13408 115528 : unspec = XEXP (addr.disp, 0);
13409 115528 : if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13410 67974 : unspec = XEXP (unspec, 0);
13411 115528 : if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13412 : return orig_x;
13413 115462 : x = XVECEXP (unspec, 0, 0);
13414 115462 : gcc_assert (SYMBOL_REF_P (x));
13415 115462 : if (unspec != XEXP (addr.disp, 0))
13416 89811 : x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13417 115462 : if (addr.index)
13418 : {
13419 187 : rtx idx = addr.index;
13420 187 : if (addr.scale != 1)
13421 187 : idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13422 187 : x = gen_rtx_PLUS (Pmode, idx, x);
13423 : }
13424 115462 : if (addr.base)
13425 2 : x = gen_rtx_PLUS (Pmode, addr.base, x);
13426 115462 : if (MEM_P (orig_x))
13427 198 : x = replace_equiv_address_nv (orig_x, x);
13428 : return x;
13429 : }
13430 :
13431 : /* In the name of slightly smaller debug output, and to cater to
13432 : general assembler lossage, recognize PIC+GOTOFF and turn it back
13433 : into a direct symbol reference.
13434 :
13435 : On Darwin, this is necessary to avoid a crash, because Darwin
13436 : has a different PIC label for each routine but the DWARF debugging
13437 : information is not associated with any particular routine, so it's
13438 : necessary to remove references to the PIC label from RTL stored by
13439 : the DWARF output code.
13440 :
13441 : This helper is used in the normal ix86_delegitimize_address
13442 : entrypoint (e.g. used in the target delegitimization hook) and
13443 : in ix86_find_base_term. As compile time memory optimization, we
13444 : avoid allocating rtxes that will not change anything on the outcome
13445 : of the callers (find_base_value and find_base_term). */
13446 :
13447 : static inline rtx
13448 3524079321 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13449 : {
13450 3524079321 : rtx orig_x = delegitimize_mem_from_attrs (x);
13451 : /* addend is NULL or some rtx if x is something+GOTOFF where
13452 : something doesn't include the PIC register. */
13453 3524079321 : rtx addend = NULL_RTX;
13454 : /* reg_addend is NULL or a multiple of some register. */
13455 3524079321 : rtx reg_addend = NULL_RTX;
13456 : /* const_addend is NULL or a const_int. */
13457 3524079321 : rtx const_addend = NULL_RTX;
13458 : /* This is the result, or NULL. */
13459 3524079321 : rtx result = NULL_RTX;
13460 :
13461 3524079321 : x = orig_x;
13462 :
13463 3524079321 : if (MEM_P (x))
13464 62060910 : x = XEXP (x, 0);
13465 :
13466 3524079321 : if (TARGET_64BIT)
13467 : {
13468 254680314 : if (GET_CODE (x) == CONST
13469 8680777 : && GET_CODE (XEXP (x, 0)) == PLUS
13470 6708428 : && GET_MODE (XEXP (x, 0)) == Pmode
13471 6708379 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13472 6708379 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13473 254684449 : && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13474 : {
13475 : /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13476 : base. A CONST can't be arg_pointer_rtx based. */
13477 0 : if (base_term_p && MEM_P (orig_x))
13478 : return orig_x;
13479 0 : rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13480 0 : x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13481 0 : if (MEM_P (orig_x))
13482 0 : x = replace_equiv_address_nv (orig_x, x);
13483 0 : return x;
13484 : }
13485 :
13486 254680314 : if (GET_CODE (x) == CONST
13487 8680777 : && GET_CODE (XEXP (x, 0)) == UNSPEC
13488 1972398 : && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13489 679361 : || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13490 1293037 : && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13491 : {
13492 297414 : x = XVECEXP (XEXP (x, 0), 0, 0);
13493 297414 : if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13494 : {
13495 9 : x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13496 9 : if (x == NULL_RTX)
13497 : return orig_x;
13498 : }
13499 297414 : return x;
13500 : }
13501 :
13502 254382900 : if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13503 254381662 : return ix86_delegitimize_tls_address (orig_x);
13504 :
13505 : /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13506 : and -mcmodel=medium -fpic. */
13507 : }
13508 :
13509 3269400245 : if (GET_CODE (x) != PLUS
13510 1551545279 : || GET_CODE (XEXP (x, 1)) != CONST)
13511 3243083526 : return ix86_delegitimize_tls_address (orig_x);
13512 :
13513 26316719 : if (ix86_pic_register_p (XEXP (x, 0)))
13514 : /* %ebx + GOT/GOTOFF */
13515 : ;
13516 1275847 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
13517 : {
13518 : /* %ebx + %reg * scale + GOT/GOTOFF */
13519 470649 : reg_addend = XEXP (x, 0);
13520 470649 : if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13521 395799 : reg_addend = XEXP (reg_addend, 1);
13522 74850 : else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13523 43364 : reg_addend = XEXP (reg_addend, 0);
13524 : else
13525 : {
13526 31486 : reg_addend = NULL_RTX;
13527 31486 : addend = XEXP (x, 0);
13528 : }
13529 : }
13530 : else
13531 : addend = XEXP (x, 0);
13532 :
13533 26316719 : x = XEXP (XEXP (x, 1), 0);
13534 26316719 : if (GET_CODE (x) == PLUS
13535 1444866 : && CONST_INT_P (XEXP (x, 1)))
13536 : {
13537 1444866 : const_addend = XEXP (x, 1);
13538 1444866 : x = XEXP (x, 0);
13539 : }
13540 :
13541 26316719 : if (GET_CODE (x) == UNSPEC
13542 25645336 : && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13543 6742656 : || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13544 1112229 : || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13545 4 : && !MEM_P (orig_x) && !addend)))
13546 24533111 : result = XVECEXP (x, 0, 0);
13547 :
13548 24533111 : if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13549 : && !MEM_P (orig_x))
13550 : result = XVECEXP (x, 0, 0);
13551 :
13552 24533111 : if (! result)
13553 1783608 : return ix86_delegitimize_tls_address (orig_x);
13554 :
13555 : /* For (PLUS something CONST_INT) both find_base_{value,term} just
13556 : recurse on the first operand. */
13557 24533111 : if (const_addend && !base_term_p)
13558 355710 : result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13559 24533111 : if (reg_addend)
13560 856032 : result = gen_rtx_PLUS (Pmode, reg_addend, result);
13561 24533111 : if (addend)
13562 : {
13563 : /* If the rest of original X doesn't involve the PIC register, add
13564 : addend and subtract pic_offset_table_rtx. This can happen e.g.
13565 : for code like:
13566 : leal (%ebx, %ecx, 4), %ecx
13567 : ...
13568 : movl foo@GOTOFF(%ecx), %edx
13569 : in which case we return (%ecx - %ebx) + foo
13570 : or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13571 : and reload has completed. Don't do the latter for debug,
13572 : as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13573 137045 : if (pic_offset_table_rtx
13574 137045 : && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13575 2364 : result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13576 : pic_offset_table_rtx),
13577 : result);
13578 136257 : else if (base_term_p
13579 130099 : && pic_offset_table_rtx
13580 : && !TARGET_MACHO
13581 : && !TARGET_VXWORKS_VAROFF)
13582 : {
13583 260198 : rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13584 260198 : tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13585 260198 : result = gen_rtx_PLUS (Pmode, tmp, result);
13586 130099 : }
13587 : else
13588 : return orig_x;
13589 : }
13590 49053863 : if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13591 : {
13592 0 : result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13593 0 : if (result == NULL_RTX)
13594 : return orig_x;
13595 : }
13596 : return result;
13597 : }
13598 :
13599 : /* The normal instantiation of the above template. */
13600 :
13601 : static rtx
13602 325135905 : ix86_delegitimize_address (rtx x)
13603 : {
13604 325135905 : return ix86_delegitimize_address_1 (x, false);
13605 : }
13606 :
13607 : /* If X is a machine specific address (i.e. a symbol or label being
13608 : referenced as a displacement from the GOT implemented using an
13609 : UNSPEC), then return the base term. Otherwise return X. */
13610 :
13611 : rtx
13612 6656103796 : ix86_find_base_term (rtx x)
13613 : {
13614 6656103796 : rtx term;
13615 :
13616 6656103796 : if (TARGET_64BIT)
13617 : {
13618 3457160380 : if (GET_CODE (x) != CONST)
13619 : return x;
13620 44302556 : term = XEXP (x, 0);
13621 44302556 : if (GET_CODE (term) == PLUS
13622 44287938 : && CONST_INT_P (XEXP (term, 1)))
13623 44287938 : term = XEXP (term, 0);
13624 44302556 : if (GET_CODE (term) != UNSPEC
13625 40405 : || (XINT (term, 1) != UNSPEC_GOTPCREL
13626 40405 : && XINT (term, 1) != UNSPEC_PCREL))
13627 : return x;
13628 :
13629 0 : return XVECEXP (term, 0, 0);
13630 : }
13631 :
13632 3198943416 : return ix86_delegitimize_address_1 (x, true);
13633 : }
13634 :
13635 : /* Return true if X shouldn't be emitted into the debug info.
13636 : Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13637 : symbol easily into the .debug_info section, so we need not to
13638 : delegitimize, but instead assemble as @gotoff.
13639 : Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13640 : assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13641 :
13642 : static bool
13643 1847445 : ix86_const_not_ok_for_debug_p (rtx x)
13644 : {
13645 1847445 : if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13646 : return true;
13647 :
13648 1847425 : if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13649 0 : return true;
13650 :
13651 : return false;
13652 : }
13653 :
13654 : static void
13655 7161200 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13656 : bool fp, FILE *file)
13657 : {
13658 7161200 : const char *suffix;
13659 :
13660 7161200 : if (mode == CCFPmode)
13661 : {
13662 562081 : code = ix86_fp_compare_code_to_integer (code);
13663 562081 : mode = CCmode;
13664 : }
13665 7161200 : if (reverse)
13666 205228 : code = reverse_condition (code);
13667 :
13668 7161200 : switch (code)
13669 : {
13670 2793442 : case EQ:
13671 2793442 : gcc_assert (mode != CCGZmode);
13672 2793442 : switch (mode)
13673 : {
13674 : case E_CCAmode:
13675 : suffix = "a";
13676 : break;
13677 : case E_CCCmode:
13678 26683 : suffix = "c";
13679 : break;
13680 : case E_CCOmode:
13681 7161200 : suffix = "o";
13682 : break;
13683 : case E_CCPmode:
13684 233369 : suffix = "p";
13685 : break;
13686 : case E_CCSmode:
13687 119835 : suffix = "s";
13688 : break;
13689 2773780 : default:
13690 2773780 : suffix = "e";
13691 2773780 : break;
13692 : }
13693 : break;
13694 2319295 : case NE:
13695 2319295 : gcc_assert (mode != CCGZmode);
13696 2319295 : switch (mode)
13697 : {
13698 : case E_CCAmode:
13699 : suffix = "na";
13700 : break;
13701 : case E_CCCmode:
13702 13126 : suffix = "nc";
13703 : break;
13704 10764 : case E_CCOmode:
13705 10764 : suffix = "no";
13706 10764 : break;
13707 : case E_CCPmode:
13708 4428 : suffix = "np";
13709 : break;
13710 : case E_CCSmode:
13711 49735 : suffix = "ns";
13712 : break;
13713 2306850 : default:
13714 2306850 : suffix = "ne";
13715 2306850 : break;
13716 : }
13717 : break;
13718 245349 : case GT:
13719 245349 : gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13720 : suffix = "g";
13721 : break;
13722 176705 : case GTU:
13723 : /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13724 : Those same assemblers have the same but opposite lossage on cmov. */
13725 176705 : if (mode == CCmode)
13726 176767 : suffix = fp ? "nbe" : "a";
13727 : else
13728 0 : gcc_unreachable ();
13729 : break;
13730 236211 : case LT:
13731 236211 : switch (mode)
13732 : {
13733 : case E_CCNOmode:
13734 : case E_CCGOCmode:
13735 : suffix = "s";
13736 : break;
13737 :
13738 : case E_CCmode:
13739 : case E_CCGCmode:
13740 : case E_CCGZmode:
13741 7161200 : suffix = "l";
13742 : break;
13743 :
13744 0 : default:
13745 0 : gcc_unreachable ();
13746 : }
13747 : break;
13748 446920 : case LTU:
13749 446920 : if (mode == CCmode || mode == CCGZmode)
13750 : suffix = "b";
13751 25353 : else if (mode == CCCmode)
13752 26683 : suffix = fp ? "b" : "c";
13753 : else
13754 0 : gcc_unreachable ();
13755 : break;
13756 144286 : case GE:
13757 144286 : switch (mode)
13758 : {
13759 : case E_CCNOmode:
13760 : case E_CCGOCmode:
13761 : suffix = "ns";
13762 : break;
13763 :
13764 : case E_CCmode:
13765 : case E_CCGCmode:
13766 : case E_CCGZmode:
13767 7161200 : suffix = "ge";
13768 : break;
13769 :
13770 0 : default:
13771 0 : gcc_unreachable ();
13772 : }
13773 : break;
13774 197486 : case GEU:
13775 197486 : if (mode == CCmode || mode == CCGZmode)
13776 : suffix = "nb";
13777 11465 : else if (mode == CCCmode)
13778 13126 : suffix = fp ? "nb" : "nc";
13779 : else
13780 0 : gcc_unreachable ();
13781 : break;
13782 247173 : case LE:
13783 247173 : gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13784 : suffix = "le";
13785 : break;
13786 116534 : case LEU:
13787 116534 : if (mode == CCmode)
13788 : suffix = "be";
13789 : else
13790 0 : gcc_unreachable ();
13791 : break;
13792 233369 : case UNORDERED:
13793 233376 : suffix = fp ? "u" : "p";
13794 : break;
13795 4430 : case ORDERED:
13796 4435 : suffix = fp ? "nu" : "np";
13797 : break;
13798 0 : default:
13799 0 : gcc_unreachable ();
13800 : }
13801 7161200 : fputs (suffix, file);
13802 7161200 : }
13803 :
13804 : /* Print the name of register X to FILE based on its machine mode and number.
13805 : If CODE is 'w', pretend the mode is HImode.
13806 : If CODE is 'b', pretend the mode is QImode.
13807 : If CODE is 'k', pretend the mode is SImode.
13808 : If CODE is 'q', pretend the mode is DImode.
13809 : If CODE is 'x', pretend the mode is V4SFmode.
13810 : If CODE is 't', pretend the mode is V8SFmode.
13811 : If CODE is 'g', pretend the mode is V16SFmode.
13812 : If CODE is 'h', pretend the reg is the 'high' byte register.
13813 : If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13814 : If CODE is 'd', duplicate the operand for AVX instruction.
13815 : If CODE is 'V', print naked full integer register name without %.
13816 : */
13817 :
13818 : void
13819 123385976 : print_reg (rtx x, int code, FILE *file)
13820 : {
13821 123385976 : const char *reg;
13822 123385976 : int msize;
13823 123385976 : unsigned int regno;
13824 123385976 : bool duplicated;
13825 :
13826 123385976 : if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13827 123383794 : putc ('%', file);
13828 :
13829 123385976 : if (x == pc_rtx)
13830 : {
13831 5749133 : gcc_assert (TARGET_64BIT);
13832 5749133 : fputs ("rip", file);
13833 5749133 : return;
13834 : }
13835 :
13836 117636843 : if (code == 'y' && STACK_TOP_P (x))
13837 : {
13838 290937 : fputs ("st(0)", file);
13839 290937 : return;
13840 : }
13841 :
13842 117345906 : if (code == 'w')
13843 : msize = 2;
13844 : else if (code == 'b')
13845 : msize = 1;
13846 : else if (code == 'k')
13847 : msize = 4;
13848 : else if (code == 'q')
13849 : msize = 8;
13850 : else if (code == 'h')
13851 : msize = 0;
13852 : else if (code == 'x')
13853 : msize = 16;
13854 : else if (code == 't')
13855 : msize = 32;
13856 : else if (code == 'g')
13857 : msize = 64;
13858 : else
13859 200505526 : msize = GET_MODE_SIZE (GET_MODE (x));
13860 :
13861 117345906 : regno = REGNO (x);
13862 :
13863 117345906 : if (regno == ARG_POINTER_REGNUM
13864 117345906 : || regno == FRAME_POINTER_REGNUM
13865 117345906 : || regno == FPSR_REG)
13866 : {
13867 0 : output_operand_lossage
13868 0 : ("invalid use of register '%s'", reg_names[regno]);
13869 0 : return;
13870 : }
13871 117345906 : else if (regno == FLAGS_REG)
13872 : {
13873 1 : output_operand_lossage ("invalid use of asm flag output");
13874 1 : return;
13875 : }
13876 :
13877 117345905 : if (code == 'V')
13878 : {
13879 1 : if (GENERAL_REGNO_P (regno))
13880 2 : msize = GET_MODE_SIZE (word_mode);
13881 : else
13882 0 : error ("%<V%> modifier on non-integer register");
13883 : }
13884 :
13885 117345905 : duplicated = code == 'd' && TARGET_AVX;
13886 :
13887 117345905 : switch (msize)
13888 : {
13889 77876346 : case 16:
13890 77876346 : case 12:
13891 77876346 : case 8:
13892 145743750 : if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13893 5 : warning (0, "unsupported size for integer register");
13894 : /* FALLTHRU */
13895 113876579 : case 4:
13896 113876579 : if (LEGACY_INT_REGNO_P (regno))
13897 123092029 : putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
13898 : /* FALLTHRU */
13899 114777387 : case 2:
13900 22277586 : normal:
13901 114777387 : reg = hi_reg_name[regno];
13902 114777387 : break;
13903 2290979 : case 1:
13904 2290979 : if (regno >= ARRAY_SIZE (qi_reg_name))
13905 279663 : goto normal;
13906 2011316 : if (!ANY_QI_REGNO_P (regno))
13907 0 : error ("unsupported size for integer register");
13908 2011316 : reg = qi_reg_name[regno];
13909 2011316 : break;
13910 27229 : case 0:
13911 27229 : if (regno >= ARRAY_SIZE (qi_high_reg_name))
13912 0 : goto normal;
13913 27229 : reg = qi_high_reg_name[regno];
13914 27229 : break;
13915 529973 : case 32:
13916 529973 : case 64:
13917 529973 : if (SSE_REGNO_P (regno))
13918 : {
13919 529973 : gcc_assert (!duplicated);
13920 736298 : putc (msize == 32 ? 'y' : 'z', file);
13921 529973 : reg = hi_reg_name[regno] + 1;
13922 529973 : break;
13923 : }
13924 0 : goto normal;
13925 0 : default:
13926 0 : gcc_unreachable ();
13927 : }
13928 :
13929 117345905 : fputs (reg, file);
13930 :
13931 : /* Irritatingly, AMD extended registers use
13932 : different naming convention: "r%d[bwd]" */
13933 117345905 : if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13934 : {
13935 10556722 : gcc_assert (TARGET_64BIT);
13936 10556722 : switch (msize)
13937 : {
13938 0 : case 0:
13939 0 : error ("extended registers have no high halves");
13940 0 : break;
13941 187738 : case 1:
13942 187738 : putc ('b', file);
13943 187738 : break;
13944 30005 : case 2:
13945 30005 : putc ('w', file);
13946 30005 : break;
13947 2577532 : case 4:
13948 2577532 : putc ('d', file);
13949 2577532 : break;
13950 : case 8:
13951 : /* no suffix */
13952 : break;
13953 0 : default:
13954 0 : error ("unsupported operand size for extended register");
13955 0 : break;
13956 : }
13957 10556722 : return;
13958 : }
13959 :
13960 106789183 : if (duplicated)
13961 : {
13962 16646 : if (ASSEMBLER_DIALECT == ASM_ATT)
13963 16625 : fprintf (file, ", %%%s", reg);
13964 : else
13965 21 : fprintf (file, ", %s", reg);
13966 : }
13967 : }
13968 :
13969 : /* Meaning of CODE:
13970 : L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13971 : C -- print opcode suffix for set/cmov insn.
13972 : c -- like C, but print reversed condition
13973 : F,f -- likewise, but for floating-point.
13974 : O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13975 : otherwise nothing
13976 : R -- print embedded rounding and sae.
13977 : r -- print only sae.
13978 : z -- print the opcode suffix for the size of the current operand.
13979 : Z -- likewise, with special suffixes for x87 instructions.
13980 : * -- print a star (in certain assembler syntax)
13981 : A -- print an absolute memory reference.
13982 : E -- print address with DImode register names if TARGET_64BIT.
13983 : w -- print the operand as if it's a "word" (HImode) even if it isn't.
13984 : s -- print a shift double count, followed by the assemblers argument
13985 : delimiter.
13986 : b -- print the QImode name of the register for the indicated operand.
13987 : %b0 would print %al if operands[0] is reg 0.
13988 : w -- likewise, print the HImode name of the register.
13989 : k -- likewise, print the SImode name of the register.
13990 : q -- likewise, print the DImode name of the register.
13991 : x -- likewise, print the V4SFmode name of the register.
13992 : t -- likewise, print the V8SFmode name of the register.
13993 : g -- likewise, print the V16SFmode name of the register.
13994 : h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13995 : y -- print "st(0)" instead of "st" as a register.
13996 : d -- print duplicated register operand for AVX instruction.
13997 : D -- print condition for SSE cmp instruction.
13998 : P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13999 : address from GOT.
14000 : p -- print raw symbol name.
14001 : X -- don't print any sort of PIC '@' suffix for a symbol.
14002 : & -- print some in-use local-dynamic symbol name.
14003 : H -- print a memory address offset by 8; used for sse high-parts
14004 : Y -- print condition for XOP pcom* instruction.
14005 : V -- print naked full integer register name without %.
14006 : v -- print segment override prefix
14007 : + -- print a branch hint as 'cs' or 'ds' prefix
14008 : ; -- print a semicolon (after prefixes due to bug in older gas).
14009 : ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14010 : ^ -- print addr32 prefix if Pmode != word_mode
14011 : M -- print addr32 prefix for TARGET_X32 with VSIB address.
14012 : ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
14013 : N -- print maskz if it's constant 0 operand.
14014 : G -- print embedded flag for ccmp/ctest.
14015 : */
14016 :
14017 : void
14018 176676608 : ix86_print_operand (FILE *file, rtx x, int code)
14019 : {
14020 176872358 : if (code)
14021 : {
14022 62112632 : switch (code)
14023 : {
14024 195746 : case 'A':
14025 195746 : switch (ASSEMBLER_DIALECT)
14026 : {
14027 195746 : case ASM_ATT:
14028 195746 : putc ('*', file);
14029 195746 : break;
14030 :
14031 0 : case ASM_INTEL:
14032 : /* Intel syntax. For absolute addresses, registers should not
14033 : be surrounded by braces. */
14034 0 : if (!REG_P (x))
14035 : {
14036 0 : putc ('[', file);
14037 0 : ix86_print_operand (file, x, 0);
14038 0 : putc (']', file);
14039 0 : return;
14040 : }
14041 : break;
14042 :
14043 0 : default:
14044 0 : gcc_unreachable ();
14045 : }
14046 :
14047 195746 : ix86_print_operand (file, x, 0);
14048 195746 : return;
14049 :
14050 3557828 : case 'E':
14051 : /* Wrap address in an UNSPEC to declare special handling. */
14052 3557828 : if (TARGET_64BIT)
14053 3070353 : x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14054 :
14055 3557828 : output_address (VOIDmode, x);
14056 3557828 : return;
14057 :
14058 0 : case 'L':
14059 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14060 0 : putc ('l', file);
14061 0 : return;
14062 :
14063 0 : case 'W':
14064 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14065 0 : putc ('w', file);
14066 0 : return;
14067 :
14068 0 : case 'B':
14069 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14070 0 : putc ('b', file);
14071 0 : return;
14072 :
14073 0 : case 'Q':
14074 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14075 0 : putc ('l', file);
14076 0 : return;
14077 :
14078 0 : case 'S':
14079 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14080 0 : putc ('s', file);
14081 0 : return;
14082 :
14083 0 : case 'T':
14084 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14085 0 : putc ('t', file);
14086 0 : return;
14087 :
14088 : case 'O':
14089 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14090 : if (ASSEMBLER_DIALECT != ASM_ATT)
14091 : return;
14092 :
14093 : switch (GET_MODE_SIZE (GET_MODE (x)))
14094 : {
14095 : case 2:
14096 : putc ('w', file);
14097 : break;
14098 :
14099 : case 4:
14100 : putc ('l', file);
14101 : break;
14102 :
14103 : case 8:
14104 : putc ('q', file);
14105 : break;
14106 :
14107 : default:
14108 : output_operand_lossage ("invalid operand size for operand "
14109 : "code 'O'");
14110 : return;
14111 : }
14112 :
14113 : putc ('.', file);
14114 : #endif
14115 : return;
14116 :
14117 37846 : case 'z':
14118 37846 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14119 : {
14120 : /* Opcodes don't get size suffixes if using Intel opcodes. */
14121 37844 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14122 : return;
14123 :
14124 75688 : switch (GET_MODE_SIZE (GET_MODE (x)))
14125 : {
14126 6 : case 1:
14127 6 : putc ('b', file);
14128 6 : return;
14129 :
14130 6 : case 2:
14131 6 : putc ('w', file);
14132 6 : return;
14133 :
14134 37325 : case 4:
14135 37325 : putc ('l', file);
14136 37325 : return;
14137 :
14138 507 : case 8:
14139 507 : putc ('q', file);
14140 507 : return;
14141 :
14142 0 : default:
14143 0 : output_operand_lossage ("invalid operand size for operand "
14144 : "code 'z'");
14145 0 : return;
14146 : }
14147 : }
14148 :
14149 2 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14150 : {
14151 1 : if (this_is_asm_operands)
14152 1 : warning_for_asm (this_is_asm_operands,
14153 : "non-integer operand used with operand code %<z%>");
14154 : else
14155 0 : warning (0, "non-integer operand used with operand code %<z%>");
14156 : }
14157 : /* FALLTHRU */
14158 :
14159 382315 : case 'Z':
14160 : /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14161 382315 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14162 : return;
14163 :
14164 382315 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14165 : {
14166 29242 : switch (GET_MODE_SIZE (GET_MODE (x)))
14167 : {
14168 3501 : case 2:
14169 : #ifdef HAVE_AS_IX86_FILDS
14170 3501 : putc ('s', file);
14171 : #endif
14172 3501 : return;
14173 :
14174 3941 : case 4:
14175 3941 : putc ('l', file);
14176 3941 : return;
14177 :
14178 7179 : case 8:
14179 : #ifdef HAVE_AS_IX86_FILDQ
14180 7179 : putc ('q', file);
14181 : #else
14182 : fputs ("ll", file);
14183 : #endif
14184 7179 : return;
14185 :
14186 : default:
14187 : break;
14188 : }
14189 : }
14190 367694 : else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14191 : {
14192 : /* 387 opcodes don't get size suffixes
14193 : if the operands are registers. */
14194 367692 : if (STACK_REG_P (x))
14195 : return;
14196 :
14197 690630 : switch (GET_MODE_SIZE (GET_MODE (x)))
14198 : {
14199 23079 : case 4:
14200 23079 : putc ('s', file);
14201 23079 : return;
14202 :
14203 32724 : case 8:
14204 32724 : putc ('l', file);
14205 32724 : return;
14206 :
14207 289510 : case 12:
14208 289510 : case 16:
14209 289510 : putc ('t', file);
14210 289510 : return;
14211 :
14212 : default:
14213 : break;
14214 : }
14215 : }
14216 : else
14217 : {
14218 2 : output_operand_lossage ("invalid operand type used with "
14219 : "operand code '%c'", code);
14220 2 : return;
14221 : }
14222 :
14223 2 : output_operand_lossage ("invalid operand size for operand code '%c'",
14224 : code);
14225 2 : return;
14226 :
14227 : case 'd':
14228 : case 'b':
14229 : case 'w':
14230 : case 'k':
14231 : case 'q':
14232 : case 'h':
14233 : case 't':
14234 : case 'g':
14235 : case 'y':
14236 : case 'x':
14237 : case 'X':
14238 : case 'P':
14239 : case 'p':
14240 : case 'V':
14241 : break;
14242 :
14243 0 : case 's':
14244 0 : if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14245 : {
14246 0 : ix86_print_operand (file, x, 0);
14247 0 : fputs (", ", file);
14248 : }
14249 0 : return;
14250 :
14251 494 : case 'Y':
14252 494 : switch (GET_CODE (x))
14253 : {
14254 182 : case NE:
14255 182 : fputs ("neq", file);
14256 182 : break;
14257 32 : case EQ:
14258 32 : fputs ("eq", file);
14259 32 : break;
14260 64 : case GE:
14261 64 : case GEU:
14262 64 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14263 64 : break;
14264 40 : case GT:
14265 40 : case GTU:
14266 40 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14267 40 : break;
14268 64 : case LE:
14269 64 : case LEU:
14270 64 : fputs ("le", file);
14271 64 : break;
14272 112 : case LT:
14273 112 : case LTU:
14274 112 : fputs ("lt", file);
14275 112 : break;
14276 0 : case UNORDERED:
14277 0 : fputs ("unord", file);
14278 0 : break;
14279 0 : case ORDERED:
14280 0 : fputs ("ord", file);
14281 0 : break;
14282 0 : case UNEQ:
14283 0 : fputs ("ueq", file);
14284 0 : break;
14285 0 : case UNGE:
14286 0 : fputs ("nlt", file);
14287 0 : break;
14288 0 : case UNGT:
14289 0 : fputs ("nle", file);
14290 0 : break;
14291 0 : case UNLE:
14292 0 : fputs ("ule", file);
14293 0 : break;
14294 0 : case UNLT:
14295 0 : fputs ("ult", file);
14296 0 : break;
14297 0 : case LTGT:
14298 0 : fputs ("une", file);
14299 0 : break;
14300 0 : default:
14301 0 : output_operand_lossage ("operand is not a condition code, "
14302 : "invalid operand code 'Y'");
14303 0 : return;
14304 : }
14305 494 : return;
14306 :
14307 8817 : case 'D':
14308 : /* Little bit of braindamage here. The SSE compare instructions
14309 : does use completely different names for the comparisons that the
14310 : fp conditional moves. */
14311 8817 : switch (GET_CODE (x))
14312 : {
14313 3 : case UNEQ:
14314 3 : if (TARGET_AVX)
14315 : {
14316 3 : fputs ("eq_us", file);
14317 3 : break;
14318 : }
14319 : /* FALLTHRU */
14320 4339 : case EQ:
14321 4339 : fputs ("eq", file);
14322 4339 : break;
14323 0 : case UNLT:
14324 0 : if (TARGET_AVX)
14325 : {
14326 0 : fputs ("nge", file);
14327 0 : break;
14328 : }
14329 : /* FALLTHRU */
14330 1545 : case LT:
14331 1545 : fputs ("lt", file);
14332 1545 : break;
14333 0 : case UNLE:
14334 0 : if (TARGET_AVX)
14335 : {
14336 0 : fputs ("ngt", file);
14337 0 : break;
14338 : }
14339 : /* FALLTHRU */
14340 798 : case LE:
14341 798 : fputs ("le", file);
14342 798 : break;
14343 90 : case UNORDERED:
14344 90 : fputs ("unord", file);
14345 90 : break;
14346 24 : case LTGT:
14347 24 : if (TARGET_AVX)
14348 : {
14349 24 : fputs ("neq_oq", file);
14350 24 : break;
14351 : }
14352 : /* FALLTHRU */
14353 758 : case NE:
14354 758 : fputs ("neq", file);
14355 758 : break;
14356 0 : case GE:
14357 0 : if (TARGET_AVX)
14358 : {
14359 0 : fputs ("ge", file);
14360 0 : break;
14361 : }
14362 : /* FALLTHRU */
14363 410 : case UNGE:
14364 410 : fputs ("nlt", file);
14365 410 : break;
14366 0 : case GT:
14367 0 : if (TARGET_AVX)
14368 : {
14369 0 : fputs ("gt", file);
14370 0 : break;
14371 : }
14372 : /* FALLTHRU */
14373 767 : case UNGT:
14374 767 : fputs ("nle", file);
14375 767 : break;
14376 83 : case ORDERED:
14377 83 : fputs ("ord", file);
14378 83 : break;
14379 0 : default:
14380 0 : output_operand_lossage ("operand is not a condition code, "
14381 : "invalid operand code 'D'");
14382 0 : return;
14383 : }
14384 8817 : return;
14385 :
14386 7161200 : case 'F':
14387 7161200 : case 'f':
14388 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14389 : if (ASSEMBLER_DIALECT == ASM_ATT)
14390 : putc ('.', file);
14391 : gcc_fallthrough ();
14392 : #endif
14393 :
14394 7161200 : case 'C':
14395 7161200 : case 'c':
14396 7161200 : if (!COMPARISON_P (x))
14397 : {
14398 0 : output_operand_lossage ("operand is not a condition code, "
14399 : "invalid operand code '%c'", code);
14400 0 : return;
14401 : }
14402 7161200 : put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14403 7161200 : code == 'c' || code == 'f',
14404 7161200 : code == 'F' || code == 'f',
14405 : file);
14406 7161200 : return;
14407 :
14408 21 : case 'G':
14409 21 : {
14410 21 : int dfv = INTVAL (x);
14411 21 : const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14412 21 : fputs (dfv_suffix, file);
14413 : }
14414 21 : return;
14415 :
14416 1301 : case 'H':
14417 1301 : if (!offsettable_memref_p (x))
14418 : {
14419 1 : output_operand_lossage ("operand is not an offsettable memory "
14420 : "reference, invalid operand code 'H'");
14421 1 : return;
14422 : }
14423 : /* It doesn't actually matter what mode we use here, as we're
14424 : only going to use this for printing. */
14425 1300 : x = adjust_address_nv (x, DImode, 8);
14426 : /* Output 'qword ptr' for intel assembler dialect. */
14427 1300 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14428 0 : code = 'q';
14429 : break;
14430 :
14431 75748 : case 'K':
14432 75748 : if (!CONST_INT_P (x))
14433 : {
14434 1 : output_operand_lossage ("operand is not an integer, invalid "
14435 : "operand code 'K'");
14436 1 : return;
14437 : }
14438 :
14439 75747 : if (INTVAL (x) & IX86_HLE_ACQUIRE)
14440 : #ifdef HAVE_AS_IX86_HLE
14441 22 : fputs ("xacquire ", file);
14442 : #else
14443 : fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14444 : #endif
14445 75725 : else if (INTVAL (x) & IX86_HLE_RELEASE)
14446 : #ifdef HAVE_AS_IX86_HLE
14447 24 : fputs ("xrelease ", file);
14448 : #else
14449 : fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14450 : #endif
14451 : /* We do not want to print value of the operand. */
14452 75747 : return;
14453 :
14454 42996 : case 'N':
14455 42996 : if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14456 15481 : fputs ("{z}", file);
14457 42996 : return;
14458 :
14459 3999 : case 'r':
14460 3999 : if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14461 : {
14462 2 : output_operand_lossage ("operand is not a specific integer, "
14463 : "invalid operand code 'r'");
14464 2 : return;
14465 : }
14466 :
14467 3997 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14468 1 : fputs (", ", file);
14469 :
14470 3997 : fputs ("{sae}", file);
14471 :
14472 3997 : if (ASSEMBLER_DIALECT == ASM_ATT)
14473 3996 : fputs (", ", file);
14474 :
14475 3997 : return;
14476 :
14477 5975 : case 'R':
14478 5975 : if (!CONST_INT_P (x))
14479 : {
14480 1 : output_operand_lossage ("operand is not an integer, invalid "
14481 : "operand code 'R'");
14482 1 : return;
14483 : }
14484 :
14485 5974 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14486 2 : fputs (", ", file);
14487 :
14488 5974 : switch (INTVAL (x))
14489 : {
14490 5163 : case ROUND_NEAREST_INT | ROUND_SAE:
14491 5163 : fputs ("{rn-sae}", file);
14492 5163 : break;
14493 637 : case ROUND_NEG_INF | ROUND_SAE:
14494 637 : fputs ("{rd-sae}", file);
14495 637 : break;
14496 52 : case ROUND_POS_INF | ROUND_SAE:
14497 52 : fputs ("{ru-sae}", file);
14498 52 : break;
14499 121 : case ROUND_ZERO | ROUND_SAE:
14500 121 : fputs ("{rz-sae}", file);
14501 121 : break;
14502 1 : default:
14503 1 : output_operand_lossage ("operand is not a specific integer, "
14504 : "invalid operand code 'R'");
14505 : }
14506 :
14507 5974 : if (ASSEMBLER_DIALECT == ASM_ATT)
14508 5972 : fputs (", ", file);
14509 :
14510 5974 : return;
14511 :
14512 9168 : case 'v':
14513 9168 : if (MEM_P (x))
14514 : {
14515 9289 : switch (MEM_ADDR_SPACE (x))
14516 : {
14517 : case ADDR_SPACE_GENERIC:
14518 : break;
14519 0 : case ADDR_SPACE_SEG_FS:
14520 0 : fputs ("fs ", file);
14521 0 : break;
14522 0 : case ADDR_SPACE_SEG_GS:
14523 0 : fputs ("gs ", file);
14524 0 : break;
14525 0 : default:
14526 0 : gcc_unreachable ();
14527 : }
14528 : }
14529 : else
14530 0 : output_operand_lossage ("operand is not a memory reference, "
14531 : "invalid operand code 'v'");
14532 9168 : return;
14533 :
14534 0 : case '*':
14535 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14536 0 : putc ('*', file);
14537 0 : return;
14538 :
14539 202 : case '&':
14540 202 : {
14541 202 : const char *name = get_some_local_dynamic_name ();
14542 202 : if (name == NULL)
14543 1 : output_operand_lossage ("'%%&' used without any "
14544 : "local dynamic TLS references");
14545 : else
14546 201 : assemble_name (file, name);
14547 202 : return;
14548 : }
14549 :
14550 6523245 : case '+':
14551 6523245 : {
14552 6523245 : rtx x;
14553 :
14554 6523245 : if (!optimize
14555 5116158 : || optimize_function_for_size_p (cfun)
14556 11448919 : || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14557 4925674 : && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14558 6523245 : return;
14559 :
14560 0 : x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14561 0 : if (x)
14562 : {
14563 0 : int pred_val = profile_probability::from_reg_br_prob_note
14564 0 : (XINT (x, 0)).to_reg_br_prob_base ();
14565 :
14566 0 : bool taken = pred_val > REG_BR_PROB_BASE / 2;
14567 : /* We use 3e (DS) prefix for taken branches and
14568 : 2e (CS) prefix for not taken branches. */
14569 0 : if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14570 0 : fputs ("ds ; ", file);
14571 0 : else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14572 0 : fputs ("cs ; ", file);
14573 : }
14574 0 : return;
14575 : }
14576 :
14577 : case ';':
14578 : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14579 : putc (';', file);
14580 : #endif
14581 : return;
14582 :
14583 3722 : case '~':
14584 3722 : putc (TARGET_AVX2 ? 'i' : 'f', file);
14585 3722 : return;
14586 :
14587 1675 : case 'M':
14588 1675 : if (TARGET_X32)
14589 : {
14590 : /* NB: 32-bit indices in VSIB address are sign-extended
14591 : to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14592 : sign-extended to 0xfffffffff7fa3010 which is invalid
14593 : address. Add addr32 prefix if there is no base
14594 : register nor symbol. */
14595 40 : bool ok;
14596 40 : struct ix86_address parts;
14597 40 : ok = ix86_decompose_address (x, &parts);
14598 40 : gcc_assert (ok && parts.index == NULL_RTX);
14599 40 : if (parts.base == NULL_RTX
14600 40 : && (parts.disp == NULL_RTX
14601 34 : || !symbolic_operand (parts.disp,
14602 34 : GET_MODE (parts.disp))))
14603 34 : fputs ("addr32 ", file);
14604 : }
14605 1675 : return;
14606 :
14607 20184 : case '^':
14608 23346 : if (Pmode != word_mode)
14609 0 : fputs ("addr32 ", file);
14610 20184 : return;
14611 :
14612 14857954 : case '!':
14613 14857954 : if (ix86_notrack_prefixed_insn_p (current_output_insn))
14614 5508 : fputs ("notrack ", file);
14615 14857954 : return;
14616 :
14617 1 : default:
14618 1 : output_operand_lossage ("invalid operand code '%c'", code);
14619 : }
14620 : }
14621 :
14622 143593101 : if (REG_P (x))
14623 85579586 : print_reg (x, code, file);
14624 :
14625 58013515 : else if (MEM_P (x))
14626 : {
14627 33298688 : rtx addr = XEXP (x, 0);
14628 :
14629 : /* No `byte ptr' prefix for call instructions ... */
14630 33298688 : if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14631 : {
14632 266 : machine_mode mode = GET_MODE (x);
14633 266 : const char *size;
14634 :
14635 : /* Check for explicit size override codes. */
14636 266 : if (code == 'b')
14637 : size = "BYTE";
14638 : else if (code == 'w')
14639 : size = "WORD";
14640 : else if (code == 'k')
14641 : size = "DWORD";
14642 : else if (code == 'q')
14643 : size = "QWORD";
14644 : else if (code == 'x')
14645 : size = "XMMWORD";
14646 : else if (code == 't')
14647 : size = "YMMWORD";
14648 : else if (code == 'g')
14649 : size = "ZMMWORD";
14650 191 : else if (mode == BLKmode)
14651 : /* ... or BLKmode operands, when not overridden. */
14652 : size = NULL;
14653 : else
14654 378 : switch (GET_MODE_SIZE (mode))
14655 : {
14656 : case 1: size = "BYTE"; break;
14657 : case 2: size = "WORD"; break;
14658 : case 4: size = "DWORD"; break;
14659 : case 8: size = "QWORD"; break;
14660 : case 12: size = "TBYTE"; break;
14661 4 : case 16:
14662 4 : if (mode == XFmode)
14663 : size = "TBYTE";
14664 : else
14665 : size = "XMMWORD";
14666 : break;
14667 : case 32: size = "YMMWORD"; break;
14668 : case 64: size = "ZMMWORD"; break;
14669 0 : default:
14670 0 : gcc_unreachable ();
14671 : }
14672 : if (size)
14673 : {
14674 264 : fputs (size, file);
14675 264 : fputs (" PTR ", file);
14676 : }
14677 : }
14678 :
14679 33298688 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14680 0 : output_operand_lossage ("invalid constraints for operand");
14681 : else
14682 33298688 : ix86_print_operand_address_as
14683 33993506 : (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14684 : }
14685 :
14686 24714827 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14687 : {
14688 762 : long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14689 762 : REAL_MODE_FORMAT (HFmode));
14690 762 : if (ASSEMBLER_DIALECT == ASM_ATT)
14691 762 : putc ('$', file);
14692 762 : fprintf (file, "0x%04x", (unsigned int) l);
14693 762 : }
14694 :
14695 24714065 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14696 : {
14697 22052 : long l;
14698 :
14699 22052 : REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14700 :
14701 22052 : if (ASSEMBLER_DIALECT == ASM_ATT)
14702 22052 : putc ('$', file);
14703 : /* Sign extend 32bit SFmode immediate to 8 bytes. */
14704 22052 : if (code == 'q')
14705 327 : fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14706 : (unsigned long long) (int) l);
14707 : else
14708 21725 : fprintf (file, "0x%08x", (unsigned int) l);
14709 : }
14710 :
14711 24692013 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14712 : {
14713 3704 : long l[2];
14714 :
14715 3704 : REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14716 :
14717 3704 : if (ASSEMBLER_DIALECT == ASM_ATT)
14718 3704 : putc ('$', file);
14719 3704 : fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14720 3704 : }
14721 :
14722 : /* These float cases don't actually occur as immediate operands. */
14723 24688309 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14724 : {
14725 0 : char dstr[30];
14726 :
14727 0 : real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14728 0 : fputs (dstr, file);
14729 0 : }
14730 :
14731 : /* Print bcst_mem_operand. */
14732 24688309 : else if (GET_CODE (x) == VEC_DUPLICATE)
14733 : {
14734 313 : machine_mode vmode = GET_MODE (x);
14735 : /* Must be bcst_memory_operand. */
14736 313 : gcc_assert (bcst_mem_operand (x, vmode));
14737 :
14738 313 : rtx mem = XEXP (x,0);
14739 313 : ix86_print_operand (file, mem, 0);
14740 :
14741 313 : switch (vmode)
14742 : {
14743 28 : case E_V2DImode:
14744 28 : case E_V2DFmode:
14745 28 : fputs ("{1to2}", file);
14746 28 : break;
14747 74 : case E_V4SImode:
14748 74 : case E_V4SFmode:
14749 74 : case E_V4DImode:
14750 74 : case E_V4DFmode:
14751 74 : fputs ("{1to4}", file);
14752 74 : break;
14753 93 : case E_V8SImode:
14754 93 : case E_V8SFmode:
14755 93 : case E_V8DFmode:
14756 93 : case E_V8DImode:
14757 93 : case E_V8HFmode:
14758 93 : fputs ("{1to8}", file);
14759 93 : break;
14760 110 : case E_V16SFmode:
14761 110 : case E_V16SImode:
14762 110 : case E_V16HFmode:
14763 110 : fputs ("{1to16}", file);
14764 110 : break;
14765 8 : case E_V32HFmode:
14766 8 : fputs ("{1to32}", file);
14767 8 : break;
14768 0 : default:
14769 0 : gcc_unreachable ();
14770 : }
14771 : }
14772 :
14773 : else
14774 : {
14775 : /* We have patterns that allow zero sets of memory, for instance.
14776 : In 64-bit mode, we should probably support all 8-byte vectors,
14777 : since we can in fact encode that into an immediate. */
14778 24687996 : if (CONST_VECTOR_P (x))
14779 : {
14780 118 : if (x != CONST0_RTX (GET_MODE (x)))
14781 2 : output_operand_lossage ("invalid vector immediate");
14782 118 : x = const0_rtx;
14783 : }
14784 :
14785 24687996 : if (code == 'P')
14786 : {
14787 5891245 : if (ix86_force_load_from_GOT_p (x, true))
14788 : {
14789 : /* For inline assembly statement, load function address
14790 : from GOT with 'P' operand modifier to avoid PLT. */
14791 4 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14792 : (TARGET_64BIT
14793 : ? UNSPEC_GOTPCREL
14794 : : UNSPEC_GOT));
14795 4 : x = gen_rtx_CONST (Pmode, x);
14796 4 : x = gen_const_mem (Pmode, x);
14797 4 : ix86_print_operand (file, x, 'A');
14798 4 : return;
14799 : }
14800 : }
14801 18796751 : else if (code != 'p')
14802 : {
14803 18796642 : if (CONST_INT_P (x))
14804 : {
14805 15551435 : if (ASSEMBLER_DIALECT == ASM_ATT)
14806 15551210 : putc ('$', file);
14807 : }
14808 3245207 : else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
14809 9388 : || LABEL_REF_P (x))
14810 : {
14811 3245205 : if (ASSEMBLER_DIALECT == ASM_ATT)
14812 3245181 : putc ('$', file);
14813 : else
14814 24 : fputs ("OFFSET FLAT:", file);
14815 : }
14816 : }
14817 24687992 : if (CONST_INT_P (x))
14818 15551521 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14819 9136471 : else if (flag_pic || MACHOPIC_INDIRECT)
14820 524656 : output_pic_addr_const (file, x, code);
14821 : else
14822 8611815 : output_addr_const (file, x);
14823 : }
14824 : }
14825 :
14826 : static bool
14827 21483708 : ix86_print_operand_punct_valid_p (unsigned char code)
14828 : {
14829 21483708 : return (code == '*' || code == '+' || code == '&' || code == ';'
14830 14878138 : || code == '~' || code == '^' || code == '!');
14831 : }
14832 :
14833 : /* Print a memory operand whose address is ADDR. */
14834 :
14835 : static void
14836 36858791 : ix86_print_operand_address_as (FILE *file, rtx addr,
14837 : addr_space_t as, bool raw)
14838 : {
14839 36858791 : struct ix86_address parts;
14840 36858791 : rtx base, index, disp;
14841 36858791 : int scale;
14842 36858791 : int ok;
14843 36858791 : bool vsib = false;
14844 36858791 : int code = 0;
14845 :
14846 36858791 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14847 : {
14848 1675 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14849 1675 : gcc_assert (parts.index == NULL_RTX);
14850 1675 : parts.index = XVECEXP (addr, 0, 1);
14851 1675 : parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14852 1675 : addr = XVECEXP (addr, 0, 0);
14853 1675 : vsib = true;
14854 : }
14855 36857116 : else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14856 : {
14857 3070353 : gcc_assert (TARGET_64BIT);
14858 3070353 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14859 3070353 : code = 'q';
14860 : }
14861 : else
14862 33786763 : ok = ix86_decompose_address (addr, &parts);
14863 :
14864 36858791 : gcc_assert (ok);
14865 :
14866 36858791 : base = parts.base;
14867 36858791 : index = parts.index;
14868 36858791 : disp = parts.disp;
14869 36858791 : scale = parts.scale;
14870 :
14871 36858791 : if (ADDR_SPACE_GENERIC_P (as))
14872 36577140 : as = parts.seg;
14873 : else
14874 281651 : gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14875 :
14876 36858791 : if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14877 : {
14878 281666 : if (ASSEMBLER_DIALECT == ASM_ATT)
14879 281664 : putc ('%', file);
14880 :
14881 281666 : switch (as)
14882 : {
14883 182026 : case ADDR_SPACE_SEG_FS:
14884 182026 : fputs ("fs:", file);
14885 182026 : break;
14886 99640 : case ADDR_SPACE_SEG_GS:
14887 99640 : fputs ("gs:", file);
14888 99640 : break;
14889 0 : default:
14890 0 : gcc_unreachable ();
14891 : }
14892 : }
14893 :
14894 : /* Use one byte shorter RIP relative addressing for 64bit mode. */
14895 36858791 : if (TARGET_64BIT && !base && !index && !raw)
14896 : {
14897 6010358 : rtx symbol = disp;
14898 :
14899 6010358 : if (GET_CODE (disp) == CONST
14900 2193774 : && GET_CODE (XEXP (disp, 0)) == PLUS
14901 2108940 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14902 2108940 : symbol = XEXP (XEXP (disp, 0), 0);
14903 :
14904 6010358 : if (LABEL_REF_P (symbol)
14905 6010358 : || (SYMBOL_REF_P (symbol)
14906 5749359 : && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14907 5749133 : base = pc_rtx;
14908 : }
14909 :
14910 36858791 : if (!base && !index)
14911 : {
14912 : /* Displacement only requires special attention. */
14913 600530 : if (CONST_INT_P (disp))
14914 : {
14915 269317 : if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14916 0 : fputs ("ds:", file);
14917 269317 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14918 : }
14919 : /* Load the external function address via the GOT slot to avoid PLT. */
14920 331213 : else if (GET_CODE (disp) == CONST
14921 113148 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
14922 85072 : && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14923 9555 : || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14924 406730 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14925 24 : output_pic_addr_const (file, disp, 0);
14926 331189 : else if (flag_pic)
14927 114328 : output_pic_addr_const (file, disp, 0);
14928 : else
14929 216861 : output_addr_const (file, disp);
14930 : }
14931 : else
14932 : {
14933 : /* Print SImode register names to force addr32 prefix. */
14934 36258261 : if (SImode_address_operand (addr, VOIDmode))
14935 : {
14936 37 : if (flag_checking)
14937 : {
14938 37 : gcc_assert (TARGET_64BIT);
14939 37 : switch (GET_CODE (addr))
14940 : {
14941 0 : case SUBREG:
14942 0 : gcc_assert (GET_MODE (addr) == SImode);
14943 0 : gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14944 : break;
14945 37 : case ZERO_EXTEND:
14946 37 : case AND:
14947 37 : gcc_assert (GET_MODE (addr) == DImode);
14948 : break;
14949 0 : default:
14950 0 : gcc_unreachable ();
14951 : }
14952 : }
14953 37 : gcc_assert (!code);
14954 : code = 'k';
14955 : }
14956 36258224 : else if (code == 0
14957 33189502 : && TARGET_X32
14958 482 : && disp
14959 410 : && CONST_INT_P (disp)
14960 311 : && INTVAL (disp) < -16*1024*1024)
14961 : {
14962 : /* X32 runs in 64-bit mode, where displacement, DISP, in
14963 : address DISP(%r64), is encoded as 32-bit immediate sign-
14964 : extended from 32-bit to 64-bit. For -0x40000300(%r64),
14965 : address is %r64 + 0xffffffffbffffd00. When %r64 <
14966 : 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14967 : which is invalid for x32. The correct address is %r64
14968 : - 0x40000300 == 0xf7ffdd64. To properly encode
14969 : -0x40000300(%r64) for x32, we zero-extend negative
14970 : displacement by forcing addr32 prefix which truncates
14971 : 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14972 : zero-extend all negative displacements, including -1(%rsp).
14973 : However, for small negative displacements, sign-extension
14974 : won't cause overflow. We only zero-extend negative
14975 : displacements if they < -16*1024*1024, which is also used
14976 : to check legitimate address displacements for PIC. */
14977 38 : code = 'k';
14978 : }
14979 :
14980 : /* Since the upper 32 bits of RSP are always zero for x32,
14981 : we can encode %esp as %rsp to avoid 0x67 prefix if
14982 : there is no index register. */
14983 976 : if (TARGET_X32 && Pmode == SImode
14984 36258665 : && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14985 : code = 'q';
14986 :
14987 36258261 : if (ASSEMBLER_DIALECT == ASM_ATT)
14988 : {
14989 36257946 : if (disp)
14990 : {
14991 32153089 : if (flag_pic)
14992 2803646 : output_pic_addr_const (file, disp, 0);
14993 29349443 : else if (LABEL_REF_P (disp))
14994 7043 : output_asm_label (disp);
14995 : else
14996 29342400 : output_addr_const (file, disp);
14997 : }
14998 :
14999 36257946 : putc ('(', file);
15000 36257946 : if (base)
15001 35837980 : print_reg (base, code, file);
15002 36257946 : if (index)
15003 : {
15004 1968042 : putc (',', file);
15005 3934457 : print_reg (index, vsib ? 0 : code, file);
15006 1968042 : if (scale != 1 || vsib)
15007 1030674 : fprintf (file, ",%d", scale);
15008 : }
15009 36257946 : putc (')', file);
15010 : }
15011 : else
15012 : {
15013 315 : rtx offset = NULL_RTX;
15014 :
15015 315 : if (disp)
15016 : {
15017 : /* Pull out the offset of a symbol; print any symbol itself. */
15018 255 : if (GET_CODE (disp) == CONST
15019 19 : && GET_CODE (XEXP (disp, 0)) == PLUS
15020 19 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15021 : {
15022 19 : offset = XEXP (XEXP (disp, 0), 1);
15023 19 : disp = gen_rtx_CONST (VOIDmode,
15024 : XEXP (XEXP (disp, 0), 0));
15025 : }
15026 :
15027 255 : if (flag_pic)
15028 0 : output_pic_addr_const (file, disp, 0);
15029 255 : else if (LABEL_REF_P (disp))
15030 0 : output_asm_label (disp);
15031 255 : else if (CONST_INT_P (disp))
15032 : offset = disp;
15033 : else
15034 121 : output_addr_const (file, disp);
15035 : }
15036 :
15037 315 : putc ('[', file);
15038 315 : if (base)
15039 : {
15040 272 : print_reg (base, code, file);
15041 272 : if (offset)
15042 : {
15043 153 : if (INTVAL (offset) >= 0)
15044 19 : putc ('+', file);
15045 153 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15046 : }
15047 : }
15048 43 : else if (offset)
15049 0 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15050 : else
15051 43 : putc ('0', file);
15052 :
15053 315 : if (index)
15054 : {
15055 96 : putc ('+', file);
15056 144 : print_reg (index, vsib ? 0 : code, file);
15057 96 : if (scale != 1 || vsib)
15058 94 : fprintf (file, "*%d", scale);
15059 : }
15060 315 : putc (']', file);
15061 : }
15062 : }
15063 36858791 : }
15064 :
15065 : static void
15066 3560104 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
15067 : {
15068 3560104 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
15069 1 : output_operand_lossage ("invalid constraints for operand");
15070 : else
15071 3560103 : ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
15072 3560104 : }
15073 :
15074 : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15075 :
15076 : static bool
15077 15329 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
15078 : {
15079 15329 : rtx op;
15080 :
15081 15329 : if (GET_CODE (x) != UNSPEC)
15082 : return false;
15083 :
15084 15329 : op = XVECEXP (x, 0, 0);
15085 15329 : switch (XINT (x, 1))
15086 : {
15087 1315 : case UNSPEC_GOTOFF:
15088 1315 : output_addr_const (file, op);
15089 1315 : fputs ("@gotoff", file);
15090 1315 : break;
15091 0 : case UNSPEC_GOTTPOFF:
15092 0 : output_addr_const (file, op);
15093 : /* FIXME: This might be @TPOFF in Sun ld. */
15094 0 : fputs ("@gottpoff", file);
15095 0 : break;
15096 0 : case UNSPEC_TPOFF:
15097 0 : output_addr_const (file, op);
15098 0 : fputs ("@tpoff", file);
15099 0 : break;
15100 10955 : case UNSPEC_NTPOFF:
15101 10955 : output_addr_const (file, op);
15102 10955 : if (TARGET_64BIT)
15103 10209 : fputs ("@tpoff", file);
15104 : else
15105 746 : fputs ("@ntpoff", file);
15106 : break;
15107 0 : case UNSPEC_DTPOFF:
15108 0 : output_addr_const (file, op);
15109 0 : fputs ("@dtpoff", file);
15110 0 : break;
15111 3058 : case UNSPEC_GOTNTPOFF:
15112 3058 : output_addr_const (file, op);
15113 3058 : if (TARGET_64BIT)
15114 3058 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15115 : "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15116 : else
15117 0 : fputs ("@gotntpoff", file);
15118 : break;
15119 1 : case UNSPEC_INDNTPOFF:
15120 1 : output_addr_const (file, op);
15121 1 : fputs ("@indntpoff", file);
15122 1 : break;
15123 0 : case UNSPEC_SECREL32:
15124 0 : output_addr_const (file, op);
15125 0 : fputs ("@secrel32", file);
15126 0 : break;
15127 : #if TARGET_MACHO
15128 : case UNSPEC_MACHOPIC_OFFSET:
15129 : output_addr_const (file, op);
15130 : putc ('-', file);
15131 : machopic_output_function_base_name (file);
15132 : break;
15133 : #endif
15134 :
15135 : default:
15136 : return false;
15137 : }
15138 :
15139 : return true;
15140 : }
15141 :
15142 :
15143 : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15144 : MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15145 : is the expression of the binary operation. The output may either be
15146 : emitted here, or returned to the caller, like all output_* functions.
15147 :
15148 : There is no guarantee that the operands are the same mode, as they
15149 : might be within FLOAT or FLOAT_EXTEND expressions. */
15150 :
15151 : #ifndef SYSV386_COMPAT
15152 : /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15153 : wants to fix the assemblers because that causes incompatibility
15154 : with gcc. No-one wants to fix gcc because that causes
15155 : incompatibility with assemblers... You can use the option of
15156 : -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15157 : #define SYSV386_COMPAT 1
15158 : #endif
15159 :
15160 : const char *
15161 602946 : output_387_binary_op (rtx_insn *insn, rtx *operands)
15162 : {
15163 602946 : static char buf[40];
15164 602946 : const char *p;
15165 602946 : bool is_sse
15166 602946 : = (SSE_REG_P (operands[0])
15167 658008 : || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
15168 :
15169 55062 : if (is_sse)
15170 : p = "%v";
15171 55062 : else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15172 55055 : || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15173 : p = "fi";
15174 : else
15175 602946 : p = "f";
15176 :
15177 602946 : strcpy (buf, p);
15178 :
15179 602946 : switch (GET_CODE (operands[3]))
15180 : {
15181 : case PLUS:
15182 : p = "add"; break;
15183 : case MINUS:
15184 : p = "sub"; break;
15185 93792 : case MULT:
15186 93792 : p = "mul"; break;
15187 27693 : case DIV:
15188 27693 : p = "div"; break;
15189 0 : default:
15190 0 : gcc_unreachable ();
15191 : }
15192 :
15193 602946 : strcat (buf, p);
15194 :
15195 602946 : if (is_sse)
15196 : {
15197 547884 : p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
15198 547884 : strcat (buf, p);
15199 :
15200 547884 : if (TARGET_AVX)
15201 : p = "\t{%2, %1, %0|%0, %1, %2}";
15202 : else
15203 531412 : p = "\t{%2, %0|%0, %2}";
15204 :
15205 547884 : strcat (buf, p);
15206 547884 : return buf;
15207 : }
15208 :
15209 : /* Even if we do not want to check the inputs, this documents input
15210 : constraints. Which helps in understanding the following code. */
15211 55062 : if (flag_checking)
15212 : {
15213 55061 : if (STACK_REG_P (operands[0])
15214 55061 : && ((REG_P (operands[1])
15215 53482 : && REGNO (operands[0]) == REGNO (operands[1])
15216 49493 : && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15217 5568 : || (REG_P (operands[2])
15218 5568 : && REGNO (operands[0]) == REGNO (operands[2])
15219 5568 : && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15220 110122 : && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15221 : ; /* ok */
15222 : else
15223 0 : gcc_unreachable ();
15224 : }
15225 :
15226 55062 : switch (GET_CODE (operands[3]))
15227 : {
15228 40440 : case MULT:
15229 40440 : case PLUS:
15230 40440 : if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15231 1991 : std::swap (operands[1], operands[2]);
15232 :
15233 : /* know operands[0] == operands[1]. */
15234 :
15235 40440 : if (MEM_P (operands[2]))
15236 : {
15237 : p = "%Z2\t%2";
15238 : break;
15239 : }
15240 :
15241 36078 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15242 : {
15243 21087 : if (STACK_TOP_P (operands[0]))
15244 : /* How is it that we are storing to a dead operand[2]?
15245 : Well, presumably operands[1] is dead too. We can't
15246 : store the result to st(0) as st(0) gets popped on this
15247 : instruction. Instead store to operands[2] (which I
15248 : think has to be st(1)). st(1) will be popped later.
15249 : gcc <= 2.8.1 didn't have this check and generated
15250 : assembly code that the Unixware assembler rejected. */
15251 : p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15252 : else
15253 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15254 : break;
15255 : }
15256 :
15257 14991 : if (STACK_TOP_P (operands[0]))
15258 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15259 : else
15260 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15261 : break;
15262 :
15263 14622 : case MINUS:
15264 14622 : case DIV:
15265 14622 : if (MEM_P (operands[1]))
15266 : {
15267 : p = "r%Z1\t%1";
15268 : break;
15269 : }
15270 :
15271 14189 : if (MEM_P (operands[2]))
15272 : {
15273 : p = "%Z2\t%2";
15274 : break;
15275 : }
15276 :
15277 12682 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15278 : {
15279 : #if SYSV386_COMPAT
15280 : /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15281 : derived assemblers, confusingly reverse the direction of
15282 : the operation for fsub{r} and fdiv{r} when the
15283 : destination register is not st(0). The Intel assembler
15284 : doesn't have this brain damage. Read !SYSV386_COMPAT to
15285 : figure out what the hardware really does. */
15286 6093 : if (STACK_TOP_P (operands[0]))
15287 : p = "{p\t%0, %2|rp\t%2, %0}";
15288 : else
15289 : p = "{rp\t%2, %0|p\t%0, %2}";
15290 : #else
15291 : if (STACK_TOP_P (operands[0]))
15292 : /* As above for fmul/fadd, we can't store to st(0). */
15293 : p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15294 : else
15295 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15296 : #endif
15297 : break;
15298 : }
15299 :
15300 6589 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15301 : {
15302 : #if SYSV386_COMPAT
15303 3074 : if (STACK_TOP_P (operands[0]))
15304 : p = "{rp\t%0, %1|p\t%1, %0}";
15305 : else
15306 : p = "{p\t%1, %0|rp\t%0, %1}";
15307 : #else
15308 : if (STACK_TOP_P (operands[0]))
15309 : p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15310 : else
15311 : p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15312 : #endif
15313 : break;
15314 : }
15315 :
15316 3515 : if (STACK_TOP_P (operands[0]))
15317 : {
15318 2674 : if (STACK_TOP_P (operands[1]))
15319 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15320 : else
15321 : p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15322 : break;
15323 : }
15324 841 : else if (STACK_TOP_P (operands[1]))
15325 : {
15326 : #if SYSV386_COMPAT
15327 : p = "{\t%1, %0|r\t%0, %1}";
15328 : #else
15329 : p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15330 : #endif
15331 : }
15332 : else
15333 : {
15334 : #if SYSV386_COMPAT
15335 : p = "{r\t%2, %0|\t%0, %2}";
15336 : #else
15337 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15338 : #endif
15339 : }
15340 : break;
15341 :
15342 0 : default:
15343 0 : gcc_unreachable ();
15344 : }
15345 :
15346 55062 : strcat (buf, p);
15347 55062 : return buf;
15348 : }
15349 :
15350 : /* Return needed mode for entity in optimize_mode_switching pass. */
15351 :
15352 : static int
15353 1654 : ix86_dirflag_mode_needed (rtx_insn *insn)
15354 : {
15355 1654 : if (CALL_P (insn))
15356 : {
15357 339 : if (cfun->machine->func_type == TYPE_NORMAL)
15358 : return X86_DIRFLAG_ANY;
15359 : else
15360 : /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15361 339 : return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15362 : }
15363 :
15364 1315 : if (recog_memoized (insn) < 0)
15365 : return X86_DIRFLAG_ANY;
15366 :
15367 1313 : if (get_attr_type (insn) == TYPE_STR)
15368 : {
15369 : /* Emit cld instruction if stringops are used in the function. */
15370 1 : if (cfun->machine->func_type == TYPE_NORMAL)
15371 0 : return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15372 : else
15373 : return X86_DIRFLAG_RESET;
15374 : }
15375 :
15376 : return X86_DIRFLAG_ANY;
15377 : }
15378 :
15379 : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15380 :
15381 : static bool
15382 2233821 : ix86_check_avx_upper_register (const_rtx exp)
15383 : {
15384 : /* construct_container may return a parallel with expr_list
15385 : which contains the real reg and mode */
15386 2233821 : subrtx_iterator::array_type array;
15387 8519350 : FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15388 : {
15389 6448917 : const_rtx x = *iter;
15390 2596969 : if (SSE_REG_P (x)
15391 834001 : && !EXT_REX_SSE_REG_P (x)
15392 8104071 : && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15393 163388 : return true;
15394 : }
15395 :
15396 2070433 : return false;
15397 2233821 : }
15398 :
15399 : /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15400 :
15401 : static void
15402 52024 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15403 : {
15404 52024 : if (SSE_REG_P (dest)
15405 12854 : && !EXT_REX_SSE_REG_P (dest)
15406 77732 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15407 : {
15408 759 : bool *used = (bool *) data;
15409 759 : *used = true;
15410 : }
15411 52024 : }
15412 :
15413 : /* Return needed mode for entity in optimize_mode_switching pass. */
15414 :
15415 : static int
15416 2095356 : ix86_avx_u128_mode_needed (rtx_insn *insn)
15417 : {
15418 2095356 : if (DEBUG_INSN_P (insn))
15419 : return AVX_U128_ANY;
15420 :
15421 2095356 : if (CALL_P (insn))
15422 : {
15423 49705 : rtx link;
15424 :
15425 : /* Needed mode is set to AVX_U128_CLEAN if there are
15426 : no 256bit or 512bit modes used in function arguments. */
15427 49705 : for (link = CALL_INSN_FUNCTION_USAGE (insn);
15428 135293 : link;
15429 85588 : link = XEXP (link, 1))
15430 : {
15431 86639 : if (GET_CODE (XEXP (link, 0)) == USE)
15432 : {
15433 85193 : rtx arg = XEXP (XEXP (link, 0), 0);
15434 :
15435 85193 : if (ix86_check_avx_upper_register (arg))
15436 : return AVX_U128_DIRTY;
15437 : }
15438 : }
15439 :
15440 : /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15441 : nor 512bit registers used in the function return register. */
15442 48654 : bool avx_upper_reg_found = false;
15443 48654 : note_stores (insn, ix86_check_avx_upper_stores,
15444 : &avx_upper_reg_found);
15445 48654 : if (avx_upper_reg_found)
15446 : return AVX_U128_DIRTY;
15447 :
15448 : /* If the function is known to preserve some SSE registers,
15449 : RA and previous passes can legitimately rely on that for
15450 : modes wider than 256 bits. It's only safe to issue a
15451 : vzeroupper if all SSE registers are clobbered. */
15452 48470 : const function_abi &abi = insn_callee_abi (insn);
15453 48470 : if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15454 : /* Should be safe to issue an vzeroupper before sibling_call_p.
15455 : Also there not mode_exit for sibling_call, so there could be
15456 : missing vzeroupper for that. */
15457 48470 : || !(SIBLING_CALL_P (insn)
15458 47189 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15459 47189 : abi.mode_clobbers (V4DImode))))
15460 8441 : return AVX_U128_ANY;
15461 :
15462 40029 : return AVX_U128_CLEAN;
15463 : }
15464 :
15465 2045651 : rtx set = single_set (insn);
15466 2045651 : if (set)
15467 : {
15468 1973364 : rtx dest = SET_DEST (set);
15469 1973364 : rtx src = SET_SRC (set);
15470 1477604 : if (SSE_REG_P (dest)
15471 557789 : && !EXT_REX_SSE_REG_P (dest)
15472 3076812 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15473 : {
15474 : /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15475 : source isn't zero. */
15476 176045 : if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15477 : return AVX_U128_DIRTY;
15478 : else
15479 : return AVX_U128_ANY;
15480 : }
15481 : else
15482 : {
15483 1797319 : if (ix86_check_avx_upper_register (src))
15484 : return AVX_U128_DIRTY;
15485 : }
15486 :
15487 : /* This isn't YMM/ZMM load/store. */
15488 : return AVX_U128_ANY;
15489 : }
15490 :
15491 : /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15492 : Hardware changes state only when a 256bit register is written to,
15493 : but we need to prevent the compiler from moving optimal insertion
15494 : point above eventual read from 256bit or 512 bit register. */
15495 72287 : if (ix86_check_avx_upper_register (PATTERN (insn)))
15496 : return AVX_U128_DIRTY;
15497 :
15498 : return AVX_U128_ANY;
15499 : }
15500 :
15501 : /* Return mode that i387 must be switched into
15502 : prior to the execution of insn. */
15503 :
15504 : static int
15505 412467 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
15506 : {
15507 412467 : enum attr_i387_cw mode;
15508 :
15509 : /* The mode UNINITIALIZED is used to store control word after a
15510 : function call or ASM pattern. The mode ANY specify that function
15511 : has no requirements on the control word and make no changes in the
15512 : bits we are interested in. */
15513 :
15514 412467 : if (CALL_P (insn)
15515 412467 : || (NONJUMP_INSN_P (insn)
15516 337730 : && (asm_noperands (PATTERN (insn)) >= 0
15517 337677 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15518 14339 : return I387_CW_UNINITIALIZED;
15519 :
15520 398128 : if (recog_memoized (insn) < 0)
15521 : return I387_CW_ANY;
15522 :
15523 397190 : mode = get_attr_i387_cw (insn);
15524 :
15525 397190 : switch (entity)
15526 : {
15527 0 : case I387_ROUNDEVEN:
15528 0 : if (mode == I387_CW_ROUNDEVEN)
15529 : return mode;
15530 : break;
15531 :
15532 391478 : case I387_TRUNC:
15533 391478 : if (mode == I387_CW_TRUNC)
15534 : return mode;
15535 : break;
15536 :
15537 4378 : case I387_FLOOR:
15538 4378 : if (mode == I387_CW_FLOOR)
15539 : return mode;
15540 : break;
15541 :
15542 1334 : case I387_CEIL:
15543 1334 : if (mode == I387_CW_CEIL)
15544 : return mode;
15545 : break;
15546 :
15547 0 : default:
15548 0 : gcc_unreachable ();
15549 : }
15550 :
15551 : return I387_CW_ANY;
15552 : }
15553 :
15554 : /* Return mode that entity must be switched into
15555 : prior to the execution of insn. */
15556 :
15557 : static int
15558 2509477 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15559 : {
15560 2509477 : switch (entity)
15561 : {
15562 1654 : case X86_DIRFLAG:
15563 1654 : return ix86_dirflag_mode_needed (insn);
15564 2095356 : case AVX_U128:
15565 2095356 : return ix86_avx_u128_mode_needed (insn);
15566 412467 : case I387_ROUNDEVEN:
15567 412467 : case I387_TRUNC:
15568 412467 : case I387_FLOOR:
15569 412467 : case I387_CEIL:
15570 412467 : return ix86_i387_mode_needed (entity, insn);
15571 0 : default:
15572 0 : gcc_unreachable ();
15573 : }
15574 : return 0;
15575 : }
15576 :
15577 : /* Calculate mode of upper 128bit AVX registers after the insn. */
15578 :
15579 : static int
15580 2095356 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15581 : {
15582 2095356 : rtx pat = PATTERN (insn);
15583 :
15584 2095356 : if (vzeroupper_pattern (pat, VOIDmode)
15585 2095356 : || vzeroall_pattern (pat, VOIDmode))
15586 170 : return AVX_U128_CLEAN;
15587 :
15588 : /* We know that state is clean after CALL insn if there are no
15589 : 256bit or 512bit registers used in the function return register. */
15590 2095186 : if (CALL_P (insn))
15591 : {
15592 49659 : bool avx_upper_reg_found = false;
15593 49659 : note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15594 :
15595 49659 : if (avx_upper_reg_found)
15596 : return AVX_U128_DIRTY;
15597 :
15598 : /* If the function desn't clobber any sse registers or only clobber
15599 : 128-bit part, Then vzeroupper isn't issued before the function exit.
15600 : the status not CLEAN but ANY after the function. */
15601 49084 : const function_abi &abi = insn_callee_abi (insn);
15602 49084 : if (!(SIBLING_CALL_P (insn)
15603 47808 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15604 47808 : abi.mode_clobbers (V4DImode))))
15605 8737 : return AVX_U128_ANY;
15606 :
15607 40347 : return AVX_U128_CLEAN;
15608 : }
15609 :
15610 : /* Otherwise, return current mode. Remember that if insn
15611 : references AVX 256bit or 512bit registers, the mode was already
15612 : changed to DIRTY from MODE_NEEDED. */
15613 : return mode;
15614 : }
15615 :
15616 : /* Return the mode that an insn results in. */
15617 :
15618 : static int
15619 2508636 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15620 : {
15621 2508636 : switch (entity)
15622 : {
15623 : case X86_DIRFLAG:
15624 : return mode;
15625 2095356 : case AVX_U128:
15626 2095356 : return ix86_avx_u128_mode_after (mode, insn);
15627 : case I387_ROUNDEVEN:
15628 : case I387_TRUNC:
15629 : case I387_FLOOR:
15630 : case I387_CEIL:
15631 : return mode;
15632 0 : default:
15633 0 : gcc_unreachable ();
15634 : }
15635 : }
15636 :
15637 : static int
15638 120 : ix86_dirflag_mode_entry (void)
15639 : {
15640 : /* For TARGET_CLD or in the interrupt handler we can't assume
15641 : direction flag state at function entry. */
15642 120 : if (TARGET_CLD
15643 118 : || cfun->machine->func_type != TYPE_NORMAL)
15644 120 : return X86_DIRFLAG_ANY;
15645 :
15646 : return X86_DIRFLAG_RESET;
15647 : }
15648 :
15649 : static int
15650 122755 : ix86_avx_u128_mode_entry (void)
15651 : {
15652 122755 : tree arg;
15653 :
15654 : /* Entry mode is set to AVX_U128_DIRTY if there are
15655 : 256bit or 512bit modes used in function arguments. */
15656 309893 : for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15657 187138 : arg = TREE_CHAIN (arg))
15658 : {
15659 221087 : rtx incoming = DECL_INCOMING_RTL (arg);
15660 :
15661 221087 : if (incoming && ix86_check_avx_upper_register (incoming))
15662 : return AVX_U128_DIRTY;
15663 : }
15664 :
15665 : return AVX_U128_CLEAN;
15666 : }
15667 :
15668 : /* Return a mode that ENTITY is assumed to be
15669 : switched to at function entry. */
15670 :
15671 : static int
15672 75712 : ix86_mode_entry (int entity)
15673 : {
15674 75712 : switch (entity)
15675 : {
15676 120 : case X86_DIRFLAG:
15677 120 : return ix86_dirflag_mode_entry ();
15678 74443 : case AVX_U128:
15679 74443 : return ix86_avx_u128_mode_entry ();
15680 : case I387_ROUNDEVEN:
15681 : case I387_TRUNC:
15682 : case I387_FLOOR:
15683 : case I387_CEIL:
15684 : return I387_CW_ANY;
15685 0 : default:
15686 0 : gcc_unreachable ();
15687 : }
15688 : }
15689 :
15690 : static int
15691 73200 : ix86_avx_u128_mode_exit (void)
15692 : {
15693 73200 : rtx reg = crtl->return_rtx;
15694 :
15695 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15696 : or 512 bit modes used in the function return register. */
15697 73200 : if (reg && ix86_check_avx_upper_register (reg))
15698 : return AVX_U128_DIRTY;
15699 :
15700 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15701 : modes used in function arguments, otherwise return AVX_U128_CLEAN.
15702 : */
15703 48312 : return ix86_avx_u128_mode_entry ();
15704 : }
15705 :
15706 : /* Return a mode that ENTITY is assumed to be
15707 : switched to at function exit. */
15708 :
15709 : static int
15710 74324 : ix86_mode_exit (int entity)
15711 : {
15712 74324 : switch (entity)
15713 : {
15714 : case X86_DIRFLAG:
15715 : return X86_DIRFLAG_ANY;
15716 73200 : case AVX_U128:
15717 73200 : return ix86_avx_u128_mode_exit ();
15718 1090 : case I387_ROUNDEVEN:
15719 1090 : case I387_TRUNC:
15720 1090 : case I387_FLOOR:
15721 1090 : case I387_CEIL:
15722 1090 : return I387_CW_ANY;
15723 0 : default:
15724 0 : gcc_unreachable ();
15725 : }
15726 : }
15727 :
15728 : static int
15729 2177081 : ix86_mode_priority (int, int n)
15730 : {
15731 2177081 : return n;
15732 : }
15733 :
15734 : /* Output code to initialize control word copies used by trunc?f?i and
15735 : rounding patterns. CURRENT_MODE is set to current control word,
15736 : while NEW_MODE is set to new control word. */
15737 :
15738 : static void
15739 3296 : emit_i387_cw_initialization (int mode)
15740 : {
15741 3296 : rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15742 3296 : rtx new_mode;
15743 :
15744 3296 : enum ix86_stack_slot slot;
15745 :
15746 3296 : rtx reg = gen_reg_rtx (HImode);
15747 :
15748 3296 : emit_insn (gen_x86_fnstcw_1 (stored_mode));
15749 3296 : emit_move_insn (reg, copy_rtx (stored_mode));
15750 :
15751 3296 : switch (mode)
15752 : {
15753 0 : case I387_CW_ROUNDEVEN:
15754 : /* round to nearest */
15755 0 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15756 0 : slot = SLOT_CW_ROUNDEVEN;
15757 0 : break;
15758 :
15759 3076 : case I387_CW_TRUNC:
15760 : /* round toward zero (truncate) */
15761 3076 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15762 3076 : slot = SLOT_CW_TRUNC;
15763 3076 : break;
15764 :
15765 153 : case I387_CW_FLOOR:
15766 : /* round down toward -oo */
15767 153 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15768 153 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15769 153 : slot = SLOT_CW_FLOOR;
15770 153 : break;
15771 :
15772 67 : case I387_CW_CEIL:
15773 : /* round up toward +oo */
15774 67 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15775 67 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15776 67 : slot = SLOT_CW_CEIL;
15777 67 : break;
15778 :
15779 0 : default:
15780 0 : gcc_unreachable ();
15781 : }
15782 :
15783 3296 : gcc_assert (slot < MAX_386_STACK_LOCALS);
15784 :
15785 3296 : new_mode = assign_386_stack_local (HImode, slot);
15786 3296 : emit_move_insn (new_mode, reg);
15787 3296 : }
15788 :
15789 : /* Generate one or more insns to set ENTITY to MODE. */
15790 :
15791 : static void
15792 52561 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15793 : HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15794 : {
15795 52561 : switch (entity)
15796 : {
15797 265 : case X86_DIRFLAG:
15798 265 : if (mode == X86_DIRFLAG_RESET)
15799 265 : emit_insn (gen_cld ());
15800 : break;
15801 44178 : case AVX_U128:
15802 44178 : if (mode == AVX_U128_CLEAN)
15803 22550 : ix86_expand_avx_vzeroupper ();
15804 : break;
15805 8118 : case I387_ROUNDEVEN:
15806 8118 : case I387_TRUNC:
15807 8118 : case I387_FLOOR:
15808 8118 : case I387_CEIL:
15809 8118 : if (mode != I387_CW_ANY
15810 8118 : && mode != I387_CW_UNINITIALIZED)
15811 3296 : emit_i387_cw_initialization (mode);
15812 : break;
15813 0 : default:
15814 0 : gcc_unreachable ();
15815 : }
15816 52561 : }
15817 :
15818 : /* Output code for INSN to convert a float to a signed int. OPERANDS
15819 : are the insn operands. The output may be [HSD]Imode and the input
15820 : operand may be [SDX]Fmode. */
15821 :
15822 : const char *
15823 7425 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15824 : {
15825 7425 : bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15826 7425 : bool dimode_p = GET_MODE (operands[0]) == DImode;
15827 7425 : int round_mode = get_attr_i387_cw (insn);
15828 :
15829 7425 : static char buf[40];
15830 7425 : const char *p;
15831 :
15832 : /* Jump through a hoop or two for DImode, since the hardware has no
15833 : non-popping instruction. We used to do this a different way, but
15834 : that was somewhat fragile and broke with post-reload splitters. */
15835 7425 : if ((dimode_p || fisttp) && !stack_top_dies)
15836 25 : output_asm_insn ("fld\t%y1", operands);
15837 :
15838 7425 : gcc_assert (STACK_TOP_P (operands[1]));
15839 7425 : gcc_assert (MEM_P (operands[0]));
15840 7425 : gcc_assert (GET_MODE (operands[1]) != TFmode);
15841 :
15842 7425 : if (fisttp)
15843 : return "fisttp%Z0\t%0";
15844 :
15845 7424 : strcpy (buf, "fist");
15846 :
15847 7424 : if (round_mode != I387_CW_ANY)
15848 7376 : output_asm_insn ("fldcw\t%3", operands);
15849 :
15850 7424 : p = "p%Z0\t%0";
15851 7424 : strcat (buf, p + !(stack_top_dies || dimode_p));
15852 :
15853 7424 : output_asm_insn (buf, operands);
15854 :
15855 7424 : if (round_mode != I387_CW_ANY)
15856 7376 : output_asm_insn ("fldcw\t%2", operands);
15857 :
15858 : return "";
15859 : }
15860 :
15861 : /* Output code for x87 ffreep insn. The OPNO argument, which may only
15862 : have the values zero or one, indicates the ffreep insn's operand
15863 : from the OPERANDS array. */
15864 :
15865 : static const char *
15866 276208 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15867 : {
15868 0 : if (TARGET_USE_FFREEP)
15869 : #ifdef HAVE_AS_IX86_FFREEP
15870 0 : return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15871 : #else
15872 : {
15873 : static char retval[32];
15874 : int regno = REGNO (operands[opno]);
15875 :
15876 : gcc_assert (STACK_REGNO_P (regno));
15877 :
15878 : regno -= FIRST_STACK_REG;
15879 :
15880 : snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15881 : return retval;
15882 : }
15883 : #endif
15884 :
15885 0 : return opno ? "fstp\t%y1" : "fstp\t%y0";
15886 : }
15887 :
15888 :
15889 : /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15890 : should be used. UNORDERED_P is true when fucom should be used. */
15891 :
15892 : const char *
15893 107600 : output_fp_compare (rtx_insn *insn, rtx *operands,
15894 : bool eflags_p, bool unordered_p)
15895 : {
15896 107600 : rtx *xops = eflags_p ? &operands[0] : &operands[1];
15897 107600 : bool stack_top_dies;
15898 :
15899 107600 : static char buf[40];
15900 107600 : const char *p;
15901 :
15902 107600 : gcc_assert (STACK_TOP_P (xops[0]));
15903 :
15904 107600 : stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15905 :
15906 107600 : if (eflags_p)
15907 : {
15908 107600 : p = unordered_p ? "fucomi" : "fcomi";
15909 107600 : strcpy (buf, p);
15910 :
15911 107600 : p = "p\t{%y1, %0|%0, %y1}";
15912 107600 : strcat (buf, p + !stack_top_dies);
15913 :
15914 107600 : return buf;
15915 : }
15916 :
15917 0 : if (STACK_REG_P (xops[1])
15918 0 : && stack_top_dies
15919 0 : && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15920 : {
15921 0 : gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15922 :
15923 : /* If both the top of the 387 stack die, and the other operand
15924 : is also a stack register that dies, then this must be a
15925 : `fcompp' float compare. */
15926 0 : p = unordered_p ? "fucompp" : "fcompp";
15927 0 : strcpy (buf, p);
15928 : }
15929 0 : else if (const0_operand (xops[1], VOIDmode))
15930 : {
15931 0 : gcc_assert (!unordered_p);
15932 0 : strcpy (buf, "ftst");
15933 : }
15934 : else
15935 : {
15936 0 : if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15937 : {
15938 0 : gcc_assert (!unordered_p);
15939 : p = "ficom";
15940 : }
15941 : else
15942 0 : p = unordered_p ? "fucom" : "fcom";
15943 :
15944 0 : strcpy (buf, p);
15945 :
15946 0 : p = "p%Z2\t%y2";
15947 0 : strcat (buf, p + !stack_top_dies);
15948 : }
15949 :
15950 0 : output_asm_insn (buf, operands);
15951 0 : return "fnstsw\t%0";
15952 : }
15953 :
15954 : void
15955 139075 : ix86_output_addr_vec_elt (FILE *file, int value)
15956 : {
15957 139075 : const char *directive = ASM_LONG;
15958 :
15959 : #ifdef ASM_QUAD
15960 139075 : if (TARGET_LP64)
15961 127436 : directive = ASM_QUAD;
15962 : #else
15963 : gcc_assert (!TARGET_64BIT);
15964 : #endif
15965 :
15966 139075 : fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15967 139075 : }
15968 :
15969 : void
15970 23276 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15971 : {
15972 23276 : const char *directive = ASM_LONG;
15973 :
15974 : #ifdef ASM_QUAD
15975 34745 : if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15976 : directive = ASM_QUAD;
15977 : #else
15978 : gcc_assert (!TARGET_64BIT);
15979 : #endif
15980 : /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15981 23276 : if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
15982 11469 : fprintf (file, "%s%s%d-%s%d\n",
15983 : directive, LPREFIX, value, LPREFIX, rel);
15984 : #if TARGET_MACHO
15985 : else if (TARGET_MACHO)
15986 : {
15987 : fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15988 : machopic_output_function_base_name (file);
15989 : putc ('\n', file);
15990 : }
15991 : #endif
15992 11807 : else if (HAVE_AS_GOTOFF_IN_DATA)
15993 11807 : fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15994 : else
15995 : asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15996 : GOT_SYMBOL_NAME, LPREFIX, value);
15997 23276 : }
15998 :
15999 : #define LEA_MAX_STALL (3)
16000 : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16001 :
16002 : /* Increase given DISTANCE in half-cycles according to
16003 : dependencies between PREV and NEXT instructions.
16004 : Add 1 half-cycle if there is no dependency and
16005 : go to next cycle if there is some dependecy. */
16006 :
16007 : static unsigned int
16008 2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
16009 : {
16010 2129 : df_ref def, use;
16011 :
16012 2129 : if (!prev || !next)
16013 748 : return distance + (distance & 1) + 2;
16014 :
16015 1381 : if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16016 226 : return distance + 1;
16017 :
16018 1920 : FOR_EACH_INSN_USE (use, next)
16019 2448 : FOR_EACH_INSN_DEF (def, prev)
16020 1683 : if (!DF_REF_IS_ARTIFICIAL (def)
16021 1683 : && DF_REF_REGNO (use) == DF_REF_REGNO (def))
16022 735 : return distance + (distance & 1) + 2;
16023 :
16024 420 : return distance + 1;
16025 : }
16026 :
16027 : /* Function checks if instruction INSN defines register number
16028 : REGNO1 or REGNO2. */
16029 :
16030 : bool
16031 2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
16032 : rtx_insn *insn)
16033 : {
16034 2073 : df_ref def;
16035 :
16036 3739 : FOR_EACH_INSN_DEF (def, insn)
16037 2070 : if (DF_REF_REG_DEF_P (def)
16038 2070 : && !DF_REF_IS_ARTIFICIAL (def)
16039 2070 : && (regno1 == DF_REF_REGNO (def)
16040 1682 : || regno2 == DF_REF_REGNO (def)))
16041 : return true;
16042 :
16043 : return false;
16044 : }
16045 :
16046 : /* Function checks if instruction INSN uses register number
16047 : REGNO as a part of address expression. */
16048 :
16049 : static bool
16050 1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
16051 : {
16052 1182 : df_ref use;
16053 :
16054 2475 : FOR_EACH_INSN_USE (use, insn)
16055 1384 : if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
16056 : return true;
16057 :
16058 : return false;
16059 : }
16060 :
16061 : /* Search backward for non-agu definition of register number REGNO1
16062 : or register number REGNO2 in basic block starting from instruction
16063 : START up to head of basic block or instruction INSN.
16064 :
16065 : Function puts true value into *FOUND var if definition was found
16066 : and false otherwise.
16067 :
16068 : Distance in half-cycles between START and found instruction or head
16069 : of BB is added to DISTANCE and returned. */
16070 :
16071 : static int
16072 624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16073 : rtx_insn *insn, int distance,
16074 : rtx_insn *start, bool *found)
16075 : {
16076 624 : basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16077 624 : rtx_insn *prev = start;
16078 624 : rtx_insn *next = NULL;
16079 :
16080 624 : *found = false;
16081 :
16082 624 : while (prev
16083 1861 : && prev != insn
16084 1861 : && distance < LEA_SEARCH_THRESHOLD)
16085 : {
16086 1660 : if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16087 : {
16088 947 : distance = increase_distance (prev, next, distance);
16089 947 : if (insn_defines_reg (regno1, regno2, prev))
16090 : {
16091 243 : if (recog_memoized (prev) < 0
16092 243 : || get_attr_type (prev) != TYPE_LEA)
16093 : {
16094 200 : *found = true;
16095 200 : return distance;
16096 : }
16097 : }
16098 :
16099 : next = prev;
16100 : }
16101 1460 : if (prev == BB_HEAD (bb))
16102 : break;
16103 :
16104 1237 : prev = PREV_INSN (prev);
16105 : }
16106 :
16107 : return distance;
16108 : }
16109 :
16110 : /* Search backward for non-agu definition of register number REGNO1
16111 : or register number REGNO2 in INSN's basic block until
16112 : 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16113 : 2. Reach neighbor BBs boundary, or
16114 : 3. Reach agu definition.
16115 : Returns the distance between the non-agu definition point and INSN.
16116 : If no definition point, returns -1. */
16117 :
16118 : static int
16119 429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16120 : rtx_insn *insn)
16121 : {
16122 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16123 429 : int distance = 0;
16124 429 : bool found = false;
16125 :
16126 429 : if (insn != BB_HEAD (bb))
16127 429 : distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16128 : distance, PREV_INSN (insn),
16129 : &found);
16130 :
16131 429 : if (!found && distance < LEA_SEARCH_THRESHOLD)
16132 : {
16133 167 : edge e;
16134 167 : edge_iterator ei;
16135 167 : bool simple_loop = false;
16136 :
16137 336 : FOR_EACH_EDGE (e, ei, bb->preds)
16138 206 : if (e->src == bb)
16139 : {
16140 : simple_loop = true;
16141 : break;
16142 : }
16143 :
16144 167 : if (simple_loop)
16145 37 : distance = distance_non_agu_define_in_bb (regno1, regno2,
16146 : insn, distance,
16147 37 : BB_END (bb), &found);
16148 : else
16149 : {
16150 130 : int shortest_dist = -1;
16151 130 : bool found_in_bb = false;
16152 :
16153 288 : FOR_EACH_EDGE (e, ei, bb->preds)
16154 : {
16155 158 : int bb_dist
16156 316 : = distance_non_agu_define_in_bb (regno1, regno2,
16157 : insn, distance,
16158 158 : BB_END (e->src),
16159 : &found_in_bb);
16160 158 : if (found_in_bb)
16161 : {
16162 24 : if (shortest_dist < 0)
16163 : shortest_dist = bb_dist;
16164 0 : else if (bb_dist > 0)
16165 0 : shortest_dist = MIN (bb_dist, shortest_dist);
16166 :
16167 24 : found = true;
16168 : }
16169 : }
16170 :
16171 130 : distance = shortest_dist;
16172 : }
16173 : }
16174 :
16175 429 : if (!found)
16176 : return -1;
16177 :
16178 200 : return distance >> 1;
16179 : }
16180 :
16181 : /* Return the distance in half-cycles between INSN and the next
16182 : insn that uses register number REGNO in memory address added
16183 : to DISTANCE. Return -1 if REGNO0 is set.
16184 :
16185 : Put true value into *FOUND if register usage was found and
16186 : false otherwise.
16187 : Put true value into *REDEFINED if register redefinition was
16188 : found and false otherwise. */
16189 :
16190 : static int
16191 767 : distance_agu_use_in_bb (unsigned int regno,
16192 : rtx_insn *insn, int distance, rtx_insn *start,
16193 : bool *found, bool *redefined)
16194 : {
16195 767 : basic_block bb = NULL;
16196 767 : rtx_insn *next = start;
16197 767 : rtx_insn *prev = NULL;
16198 :
16199 767 : *found = false;
16200 767 : *redefined = false;
16201 :
16202 767 : if (start != NULL_RTX)
16203 : {
16204 750 : bb = BLOCK_FOR_INSN (start);
16205 750 : if (start != BB_HEAD (bb))
16206 : /* If insn and start belong to the same bb, set prev to insn,
16207 : so the call to increase_distance will increase the distance
16208 : between insns by 1. */
16209 412 : prev = insn;
16210 : }
16211 :
16212 2566 : while (next
16213 2566 : && next != insn
16214 2566 : && distance < LEA_SEARCH_THRESHOLD)
16215 : {
16216 2378 : if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16217 : {
16218 1182 : distance = increase_distance(prev, next, distance);
16219 1182 : if (insn_uses_reg_mem (regno, next))
16220 : {
16221 : /* Return DISTANCE if OP0 is used in memory
16222 : address in NEXT. */
16223 91 : *found = true;
16224 91 : return distance;
16225 : }
16226 :
16227 1091 : if (insn_defines_reg (regno, INVALID_REGNUM, next))
16228 : {
16229 : /* Return -1 if OP0 is set in NEXT. */
16230 156 : *redefined = true;
16231 156 : return -1;
16232 : }
16233 :
16234 : prev = next;
16235 : }
16236 :
16237 2131 : if (next == BB_END (bb))
16238 : break;
16239 :
16240 1799 : next = NEXT_INSN (next);
16241 : }
16242 :
16243 : return distance;
16244 : }
16245 :
16246 : /* Return the distance between INSN and the next insn that uses
16247 : register number REGNO0 in memory address. Return -1 if no such
16248 : a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16249 :
16250 : static int
16251 429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
16252 : {
16253 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16254 429 : int distance = 0;
16255 429 : bool found = false;
16256 429 : bool redefined = false;
16257 :
16258 429 : if (insn != BB_END (bb))
16259 412 : distance = distance_agu_use_in_bb (regno0, insn, distance,
16260 : NEXT_INSN (insn),
16261 : &found, &redefined);
16262 :
16263 429 : if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16264 : {
16265 250 : edge e;
16266 250 : edge_iterator ei;
16267 250 : bool simple_loop = false;
16268 :
16269 535 : FOR_EACH_EDGE (e, ei, bb->succs)
16270 355 : if (e->dest == bb)
16271 : {
16272 : simple_loop = true;
16273 : break;
16274 : }
16275 :
16276 250 : if (simple_loop)
16277 70 : distance = distance_agu_use_in_bb (regno0, insn,
16278 : distance, BB_HEAD (bb),
16279 : &found, &redefined);
16280 : else
16281 : {
16282 180 : int shortest_dist = -1;
16283 180 : bool found_in_bb = false;
16284 180 : bool redefined_in_bb = false;
16285 :
16286 465 : FOR_EACH_EDGE (e, ei, bb->succs)
16287 : {
16288 285 : int bb_dist
16289 570 : = distance_agu_use_in_bb (regno0, insn,
16290 285 : distance, BB_HEAD (e->dest),
16291 : &found_in_bb, &redefined_in_bb);
16292 285 : if (found_in_bb)
16293 : {
16294 17 : if (shortest_dist < 0)
16295 : shortest_dist = bb_dist;
16296 2 : else if (bb_dist > 0)
16297 2 : shortest_dist = MIN (bb_dist, shortest_dist);
16298 :
16299 17 : found = true;
16300 : }
16301 : }
16302 :
16303 180 : distance = shortest_dist;
16304 : }
16305 : }
16306 :
16307 429 : if (!found || redefined)
16308 : return -1;
16309 :
16310 89 : return distance >> 1;
16311 : }
16312 :
16313 : /* Define this macro to tune LEA priority vs ADD, it take effect when
16314 : there is a dilemma of choosing LEA or ADD
16315 : Negative value: ADD is more preferred than LEA
16316 : Zero: Neutral
16317 : Positive value: LEA is more preferred than ADD. */
16318 : #define IX86_LEA_PRIORITY 0
16319 :
16320 : /* Return true if usage of lea INSN has performance advantage
16321 : over a sequence of instructions. Instructions sequence has
16322 : SPLIT_COST cycles higher latency than lea latency. */
16323 :
16324 : static bool
16325 1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16326 : unsigned int regno2, int split_cost, bool has_scale)
16327 : {
16328 1629 : int dist_define, dist_use;
16329 :
16330 : /* For Atom processors newer than Bonnell, if using a 2-source or
16331 : 3-source LEA for non-destructive destination purposes, or due to
16332 : wanting ability to use SCALE, the use of LEA is justified. */
16333 1629 : if (!TARGET_CPU_P (BONNELL))
16334 : {
16335 1200 : if (has_scale)
16336 : return true;
16337 1181 : if (split_cost < 1)
16338 : return false;
16339 406 : if (regno0 == regno1 || regno0 == regno2)
16340 : return false;
16341 : return true;
16342 : }
16343 :
16344 : /* Remember recog_data content. */
16345 429 : struct recog_data_d recog_data_save = recog_data;
16346 :
16347 429 : dist_define = distance_non_agu_define (regno1, regno2, insn);
16348 429 : dist_use = distance_agu_use (regno0, insn);
16349 :
16350 : /* distance_non_agu_define can call get_attr_type which can call
16351 : recog_memoized, restore recog_data back to previous content. */
16352 429 : recog_data = recog_data_save;
16353 :
16354 429 : if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16355 : {
16356 : /* If there is no non AGU operand definition, no AGU
16357 : operand usage and split cost is 0 then both lea
16358 : and non lea variants have same priority. Currently
16359 : we prefer lea for 64 bit code and non lea on 32 bit
16360 : code. */
16361 232 : if (dist_use < 0 && split_cost == 0)
16362 98 : return TARGET_64BIT || IX86_LEA_PRIORITY;
16363 : else
16364 : return true;
16365 : }
16366 :
16367 : /* With longer definitions distance lea is more preferable.
16368 : Here we change it to take into account splitting cost and
16369 : lea priority. */
16370 197 : dist_define += split_cost + IX86_LEA_PRIORITY;
16371 :
16372 : /* If there is no use in memory addess then we just check
16373 : that split cost exceeds AGU stall. */
16374 197 : if (dist_use < 0)
16375 193 : return dist_define > LEA_MAX_STALL;
16376 :
16377 : /* If this insn has both backward non-agu dependence and forward
16378 : agu dependence, the one with short distance takes effect. */
16379 4 : return dist_define >= dist_use;
16380 : }
16381 :
16382 : /* Return true if we need to split op0 = op1 + op2 into a sequence of
16383 : move and add to avoid AGU stalls. */
16384 :
16385 : bool
16386 9128980 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16387 : {
16388 9128980 : unsigned int regno0, regno1, regno2;
16389 :
16390 : /* Check if we need to optimize. */
16391 9128980 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16392 9128165 : return false;
16393 :
16394 815 : regno0 = true_regnum (operands[0]);
16395 815 : regno1 = true_regnum (operands[1]);
16396 815 : regno2 = true_regnum (operands[2]);
16397 :
16398 : /* We need to split only adds with non destructive
16399 : destination operand. */
16400 815 : if (regno0 == regno1 || regno0 == regno2)
16401 : return false;
16402 : else
16403 245 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
16404 : }
16405 :
16406 : /* Return true if we should emit lea instruction instead of mov
16407 : instruction. */
16408 :
16409 : bool
16410 29480432 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16411 : {
16412 29480432 : unsigned int regno0, regno1;
16413 :
16414 : /* Check if we need to optimize. */
16415 29480432 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16416 29478146 : return false;
16417 :
16418 : /* Use lea for reg to reg moves only. */
16419 2286 : if (!REG_P (operands[0]) || !REG_P (operands[1]))
16420 : return false;
16421 :
16422 464 : regno0 = true_regnum (operands[0]);
16423 464 : regno1 = true_regnum (operands[1]);
16424 :
16425 464 : return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
16426 : }
16427 :
16428 : /* Return true if we need to split lea into a sequence of
16429 : instructions to avoid AGU stalls during peephole2. */
16430 :
16431 : bool
16432 11189509 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16433 : {
16434 11189509 : unsigned int regno0, regno1, regno2;
16435 11189509 : int split_cost;
16436 11189509 : struct ix86_address parts;
16437 11189509 : int ok;
16438 :
16439 : /* The "at least two components" test below might not catch simple
16440 : move or zero extension insns if parts.base is non-NULL and parts.disp
16441 : is const0_rtx as the only components in the address, e.g. if the
16442 : register is %rbp or %r13. As this test is much cheaper and moves or
16443 : zero extensions are the common case, do this check first. */
16444 11189509 : if (REG_P (operands[1])
16445 11189509 : || (SImode_address_operand (operands[1], VOIDmode)
16446 151115 : && REG_P (XEXP (operands[1], 0))))
16447 4118268 : return false;
16448 :
16449 7071241 : ok = ix86_decompose_address (operands[1], &parts);
16450 7071241 : gcc_assert (ok);
16451 :
16452 : /* There should be at least two components in the address. */
16453 7071241 : if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16454 7071241 : + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16455 : return false;
16456 :
16457 : /* We should not split into add if non legitimate pic
16458 : operand is used as displacement. */
16459 2693190 : if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16460 : return false;
16461 :
16462 2643412 : regno0 = true_regnum (operands[0]) ;
16463 2643412 : regno1 = INVALID_REGNUM;
16464 2643412 : regno2 = INVALID_REGNUM;
16465 :
16466 2643412 : if (parts.base)
16467 2567698 : regno1 = true_regnum (parts.base);
16468 2643412 : if (parts.index)
16469 492045 : regno2 = true_regnum (parts.index);
16470 :
16471 : /* Use add for a = a + b and a = b + a since it is faster and shorter
16472 : than lea for most processors. For the processors like BONNELL, if
16473 : the destination register of LEA holds an actual address which will
16474 : be used soon, LEA is better and otherwise ADD is better. */
16475 2643412 : if (!TARGET_CPU_P (BONNELL)
16476 2643283 : && parts.scale == 1
16477 2399298 : && (!parts.disp || parts.disp == const0_rtx)
16478 181510 : && (regno0 == regno1 || regno0 == regno2))
16479 : return true;
16480 :
16481 : /* Split with -Oz if the encoding requires fewer bytes. */
16482 2637371 : if (optimize_size > 1
16483 27 : && parts.scale > 1
16484 4 : && !parts.base
16485 4 : && (!parts.disp || parts.disp == const0_rtx))
16486 : return true;
16487 :
16488 : /* Check we need to optimize. */
16489 2637367 : if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16490 2637026 : return false;
16491 :
16492 341 : split_cost = 0;
16493 :
16494 : /* Compute how many cycles we will add to execution time
16495 : if split lea into a sequence of instructions. */
16496 341 : if (parts.base || parts.index)
16497 : {
16498 : /* Have to use mov instruction if non desctructive
16499 : destination form is used. */
16500 341 : if (regno1 != regno0 && regno2 != regno0)
16501 266 : split_cost += 1;
16502 :
16503 : /* Have to add index to base if both exist. */
16504 341 : if (parts.base && parts.index)
16505 54 : split_cost += 1;
16506 :
16507 : /* Have to use shift and adds if scale is 2 or greater. */
16508 341 : if (parts.scale > 1)
16509 : {
16510 29 : if (regno0 != regno1)
16511 23 : split_cost += 1;
16512 6 : else if (regno2 == regno0)
16513 0 : split_cost += 4;
16514 : else
16515 6 : split_cost += parts.scale;
16516 : }
16517 :
16518 : /* Have to use add instruction with immediate if
16519 : disp is non zero. */
16520 341 : if (parts.disp && parts.disp != const0_rtx)
16521 280 : split_cost += 1;
16522 :
16523 : /* Subtract the price of lea. */
16524 341 : split_cost -= 1;
16525 : }
16526 :
16527 341 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16528 341 : parts.scale > 1);
16529 : }
16530 :
16531 : /* Return true if it is ok to optimize an ADD operation to LEA
16532 : operation to avoid flag register consumation. For most processors,
16533 : ADD is faster than LEA. For the processors like BONNELL, if the
16534 : destination register of LEA holds an actual address which will be
16535 : used soon, LEA is better and otherwise ADD is better. */
16536 :
16537 : bool
16538 9189293 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16539 : {
16540 9189293 : unsigned int regno0 = true_regnum (operands[0]);
16541 9189293 : unsigned int regno1 = true_regnum (operands[1]);
16542 9189293 : unsigned int regno2 = true_regnum (operands[2]);
16543 :
16544 : /* If a = b + c, (a!=b && a!=c), must use lea form. */
16545 9189293 : if (regno0 != regno1 && regno0 != regno2)
16546 : return true;
16547 :
16548 7151644 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16549 7151065 : return false;
16550 :
16551 579 : return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16552 : }
16553 :
16554 : /* Return true if destination reg of SET_BODY is shift count of
16555 : USE_BODY. */
16556 :
16557 : static bool
16558 89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16559 : {
16560 89 : rtx set_dest;
16561 89 : rtx shift_rtx;
16562 89 : int i;
16563 :
16564 : /* Retrieve destination of SET_BODY. */
16565 89 : switch (GET_CODE (set_body))
16566 : {
16567 73 : case SET:
16568 73 : set_dest = SET_DEST (set_body);
16569 73 : if (!set_dest || !REG_P (set_dest))
16570 : return false;
16571 72 : break;
16572 8 : case PARALLEL:
16573 24 : for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16574 16 : if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16575 : use_body))
16576 : return true;
16577 : /* FALLTHROUGH */
16578 : default:
16579 : return false;
16580 : }
16581 :
16582 : /* Retrieve shift count of USE_BODY. */
16583 72 : switch (GET_CODE (use_body))
16584 : {
16585 24 : case SET:
16586 24 : shift_rtx = XEXP (use_body, 1);
16587 24 : break;
16588 24 : case PARALLEL:
16589 72 : for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16590 48 : if (ix86_dep_by_shift_count_body (set_body,
16591 48 : XVECEXP (use_body, 0, i)))
16592 : return true;
16593 : /* FALLTHROUGH */
16594 : default:
16595 : return false;
16596 : }
16597 :
16598 24 : if (shift_rtx
16599 24 : && (GET_CODE (shift_rtx) == ASHIFT
16600 21 : || GET_CODE (shift_rtx) == LSHIFTRT
16601 5 : || GET_CODE (shift_rtx) == ASHIFTRT
16602 0 : || GET_CODE (shift_rtx) == ROTATE
16603 0 : || GET_CODE (shift_rtx) == ROTATERT))
16604 : {
16605 24 : rtx shift_count = XEXP (shift_rtx, 1);
16606 :
16607 : /* Return true if shift count is dest of SET_BODY. */
16608 24 : if (REG_P (shift_count))
16609 : {
16610 : /* Add check since it can be invoked before register
16611 : allocation in pre-reload schedule. */
16612 0 : if (reload_completed
16613 0 : && true_regnum (set_dest) == true_regnum (shift_count))
16614 : return true;
16615 0 : else if (REGNO(set_dest) == REGNO(shift_count))
16616 : return true;
16617 : }
16618 : }
16619 :
16620 : return false;
16621 : }
16622 :
16623 : /* Return true if destination reg of SET_INSN is shift count of
16624 : USE_INSN. */
16625 :
16626 : bool
16627 25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16628 : {
16629 25 : return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16630 25 : PATTERN (use_insn));
16631 : }
16632 :
16633 : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16634 : are ok, keeping in mind the possible movddup alternative. */
16635 :
16636 : bool
16637 89963 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16638 : {
16639 89963 : if (MEM_P (operands[0]))
16640 2041 : return rtx_equal_p (operands[0], operands[1 + high]);
16641 87922 : if (MEM_P (operands[1]) && MEM_P (operands[2]))
16642 951 : return false;
16643 : return true;
16644 : }
16645 :
16646 : /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16647 : then replicate the value for all elements of the vector
16648 : register. */
16649 :
16650 : rtx
16651 74555 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16652 : {
16653 74555 : int i, n_elt;
16654 74555 : rtvec v;
16655 74555 : machine_mode scalar_mode;
16656 :
16657 74555 : switch (mode)
16658 : {
16659 1251 : case E_V64QImode:
16660 1251 : case E_V32QImode:
16661 1251 : case E_V16QImode:
16662 1251 : case E_V32HImode:
16663 1251 : case E_V16HImode:
16664 1251 : case E_V8HImode:
16665 1251 : case E_V16SImode:
16666 1251 : case E_V8SImode:
16667 1251 : case E_V4SImode:
16668 1251 : case E_V2SImode:
16669 1251 : case E_V8DImode:
16670 1251 : case E_V4DImode:
16671 1251 : case E_V2DImode:
16672 1251 : gcc_assert (vect);
16673 : /* FALLTHRU */
16674 74555 : case E_V2HFmode:
16675 74555 : case E_V4HFmode:
16676 74555 : case E_V8HFmode:
16677 74555 : case E_V16HFmode:
16678 74555 : case E_V32HFmode:
16679 74555 : case E_V16SFmode:
16680 74555 : case E_V8SFmode:
16681 74555 : case E_V4SFmode:
16682 74555 : case E_V2SFmode:
16683 74555 : case E_V8DFmode:
16684 74555 : case E_V4DFmode:
16685 74555 : case E_V2DFmode:
16686 74555 : case E_V32BFmode:
16687 74555 : case E_V16BFmode:
16688 74555 : case E_V8BFmode:
16689 74555 : case E_V4BFmode:
16690 74555 : case E_V2BFmode:
16691 74555 : n_elt = GET_MODE_NUNITS (mode);
16692 74555 : v = rtvec_alloc (n_elt);
16693 74555 : scalar_mode = GET_MODE_INNER (mode);
16694 :
16695 74555 : RTVEC_ELT (v, 0) = value;
16696 :
16697 231410 : for (i = 1; i < n_elt; ++i)
16698 156855 : RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16699 :
16700 74555 : return gen_rtx_CONST_VECTOR (mode, v);
16701 :
16702 0 : default:
16703 0 : gcc_unreachable ();
16704 : }
16705 : }
16706 :
16707 : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16708 : and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16709 : for an SSE register. If VECT is true, then replicate the mask for
16710 : all elements of the vector register. If INVERT is true, then create
16711 : a mask excluding the sign bit. */
16712 :
16713 : rtx
16714 76140 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16715 : {
16716 76140 : machine_mode vec_mode, imode;
16717 76140 : wide_int w;
16718 76140 : rtx mask, v;
16719 :
16720 76140 : switch (mode)
16721 : {
16722 : case E_V2HFmode:
16723 : case E_V4HFmode:
16724 : case E_V8HFmode:
16725 : case E_V16HFmode:
16726 : case E_V32HFmode:
16727 : case E_V32BFmode:
16728 : case E_V16BFmode:
16729 : case E_V8BFmode:
16730 : case E_V4BFmode:
16731 : case E_V2BFmode:
16732 : vec_mode = mode;
16733 : imode = HImode;
16734 : break;
16735 :
16736 33974 : case E_V16SImode:
16737 33974 : case E_V16SFmode:
16738 33974 : case E_V8SImode:
16739 33974 : case E_V4SImode:
16740 33974 : case E_V8SFmode:
16741 33974 : case E_V4SFmode:
16742 33974 : case E_V2SFmode:
16743 33974 : case E_V2SImode:
16744 33974 : vec_mode = mode;
16745 33974 : imode = SImode;
16746 33974 : break;
16747 :
16748 39123 : case E_V8DImode:
16749 39123 : case E_V4DImode:
16750 39123 : case E_V2DImode:
16751 39123 : case E_V8DFmode:
16752 39123 : case E_V4DFmode:
16753 39123 : case E_V2DFmode:
16754 39123 : vec_mode = mode;
16755 39123 : imode = DImode;
16756 39123 : break;
16757 :
16758 2544 : case E_TImode:
16759 2544 : case E_TFmode:
16760 2544 : vec_mode = VOIDmode;
16761 2544 : imode = TImode;
16762 2544 : break;
16763 :
16764 0 : default:
16765 0 : gcc_unreachable ();
16766 : }
16767 :
16768 76140 : machine_mode inner_mode = GET_MODE_INNER (mode);
16769 152280 : w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16770 152280 : GET_MODE_BITSIZE (inner_mode));
16771 76140 : if (invert)
16772 39864 : w = wi::bit_not (w);
16773 :
16774 : /* Force this value into the low part of a fp vector constant. */
16775 76140 : mask = immed_wide_int_const (w, imode);
16776 76140 : mask = gen_lowpart (inner_mode, mask);
16777 :
16778 76140 : if (vec_mode == VOIDmode)
16779 2544 : return force_reg (inner_mode, mask);
16780 :
16781 73596 : v = ix86_build_const_vector (vec_mode, vect, mask);
16782 73596 : return force_reg (vec_mode, v);
16783 76140 : }
16784 :
16785 : /* Return HOST_WIDE_INT for const vector OP in MODE. */
16786 :
16787 : HOST_WIDE_INT
16788 159725 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16789 : {
16790 337309 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16791 0 : gcc_unreachable ();
16792 :
16793 159725 : int nunits = GET_MODE_NUNITS (mode);
16794 319450 : wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16795 159725 : machine_mode innermode = GET_MODE_INNER (mode);
16796 159725 : unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16797 :
16798 159725 : switch (mode)
16799 : {
16800 : case E_V2QImode:
16801 : case E_V4QImode:
16802 : case E_V2HImode:
16803 : case E_V8QImode:
16804 : case E_V4HImode:
16805 : case E_V2SImode:
16806 531743 : for (int i = 0; i < nunits; ++i)
16807 : {
16808 377372 : int v = INTVAL (XVECEXP (op, 0, i));
16809 377372 : wide_int wv = wi::shwi (v, innermode_bits);
16810 377372 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16811 377372 : }
16812 : break;
16813 92 : case E_V1SImode:
16814 92 : case E_V1DImode:
16815 92 : op = CONST_VECTOR_ELT (op, 0);
16816 92 : return INTVAL (op);
16817 : case E_V2HFmode:
16818 : case E_V2BFmode:
16819 : case E_V4HFmode:
16820 : case E_V4BFmode:
16821 : case E_V2SFmode:
16822 15810 : for (int i = 0; i < nunits; ++i)
16823 : {
16824 10548 : rtx x = XVECEXP (op, 0, i);
16825 10548 : int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16826 10548 : REAL_MODE_FORMAT (innermode));
16827 10548 : wide_int wv = wi::shwi (v, innermode_bits);
16828 10548 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16829 10548 : }
16830 : break;
16831 0 : default:
16832 0 : gcc_unreachable ();
16833 : }
16834 :
16835 159633 : return val.to_shwi ();
16836 159725 : }
16837 :
16838 32 : int ix86_get_flags_cc (rtx_code code)
16839 : {
16840 32 : switch (code)
16841 : {
16842 : case NE: return X86_CCNE;
16843 : case EQ: return X86_CCE;
16844 : case GE: return X86_CCNL;
16845 : case GT: return X86_CCNLE;
16846 : case LE: return X86_CCLE;
16847 : case LT: return X86_CCL;
16848 : case GEU: return X86_CCNB;
16849 : case GTU: return X86_CCNBE;
16850 : case LEU: return X86_CCBE;
16851 : case LTU: return X86_CCB;
16852 : default: return -1;
16853 : }
16854 : }
16855 :
16856 : /* Return TRUE or FALSE depending on whether the first SET in INSN
16857 : has source and destination with matching CC modes, and that the
16858 : CC mode is at least as constrained as REQ_MODE. */
16859 :
16860 : bool
16861 54444934 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
16862 : {
16863 54444934 : rtx set;
16864 54444934 : machine_mode set_mode;
16865 :
16866 54444934 : set = PATTERN (insn);
16867 54444934 : if (GET_CODE (set) == PARALLEL)
16868 497774 : set = XVECEXP (set, 0, 0);
16869 54444934 : gcc_assert (GET_CODE (set) == SET);
16870 54444934 : gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16871 :
16872 54444934 : set_mode = GET_MODE (SET_DEST (set));
16873 54444934 : switch (set_mode)
16874 : {
16875 1376048 : case E_CCNOmode:
16876 1376048 : if (req_mode != CCNOmode
16877 91121 : && (req_mode != CCmode
16878 0 : || XEXP (SET_SRC (set), 1) != const0_rtx))
16879 : return false;
16880 : break;
16881 5835107 : case E_CCmode:
16882 5835107 : if (req_mode == CCGCmode)
16883 : return false;
16884 : /* FALLTHRU */
16885 9428148 : case E_CCGCmode:
16886 9428148 : if (req_mode == CCGOCmode || req_mode == CCNOmode)
16887 : return false;
16888 : /* FALLTHRU */
16889 10470526 : case E_CCGOCmode:
16890 10470526 : if (req_mode == CCZmode)
16891 : return false;
16892 : /* FALLTHRU */
16893 : case E_CCZmode:
16894 : break;
16895 :
16896 0 : case E_CCGZmode:
16897 :
16898 0 : case E_CCAmode:
16899 0 : case E_CCCmode:
16900 0 : case E_CCOmode:
16901 0 : case E_CCPmode:
16902 0 : case E_CCSmode:
16903 0 : if (set_mode != req_mode)
16904 : return false;
16905 : break;
16906 :
16907 0 : default:
16908 0 : gcc_unreachable ();
16909 : }
16910 :
16911 54344973 : return GET_MODE (SET_SRC (set)) == set_mode;
16912 : }
16913 :
16914 : machine_mode
16915 13744504 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16916 : {
16917 13744504 : machine_mode mode = GET_MODE (op0);
16918 :
16919 13744504 : if (SCALAR_FLOAT_MODE_P (mode))
16920 : {
16921 135902 : gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16922 : return CCFPmode;
16923 : }
16924 :
16925 13608602 : switch (code)
16926 : {
16927 : /* Only zero flag is needed. */
16928 : case EQ: /* ZF=0 */
16929 : case NE: /* ZF!=0 */
16930 : return CCZmode;
16931 : /* Codes needing carry flag. */
16932 994353 : case GEU: /* CF=0 */
16933 994353 : case LTU: /* CF=1 */
16934 994353 : rtx geu;
16935 : /* Detect overflow checks. They need just the carry flag. */
16936 994353 : if (GET_CODE (op0) == PLUS
16937 994353 : && (rtx_equal_p (op1, XEXP (op0, 0))
16938 130365 : || rtx_equal_p (op1, XEXP (op0, 1))))
16939 17559 : return CCCmode;
16940 : /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16941 : Match LTU of op0
16942 : (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16943 : and op1
16944 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16945 : where CC_CCC is either CC or CCC. */
16946 976794 : else if (code == LTU
16947 385047 : && GET_CODE (op0) == NEG
16948 5018 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
16949 3647 : && REG_P (XEXP (geu, 0))
16950 3337 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
16951 37 : || GET_MODE (XEXP (geu, 0)) == CCmode)
16952 3326 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
16953 3326 : && XEXP (geu, 1) == const0_rtx
16954 3326 : && GET_CODE (op1) == LTU
16955 3326 : && REG_P (XEXP (op1, 0))
16956 3326 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16957 3326 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
16958 980120 : && XEXP (op1, 1) == const0_rtx)
16959 : return CCCmode;
16960 : /* Similarly for *x86_cmc pattern.
16961 : Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16962 : and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16963 : It is sufficient to test that the operand modes are CCCmode. */
16964 973468 : else if (code == LTU
16965 381721 : && GET_CODE (op0) == NEG
16966 1692 : && GET_CODE (XEXP (op0, 0)) == LTU
16967 372 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16968 3 : && GET_CODE (op1) == GEU
16969 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode)
16970 : return CCCmode;
16971 : /* Similarly for the comparison of addcarry/subborrow pattern. */
16972 381718 : else if (code == LTU
16973 381718 : && GET_CODE (op0) == ZERO_EXTEND
16974 16050 : && GET_CODE (op1) == PLUS
16975 10122 : && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
16976 10122 : && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
16977 : return CCCmode;
16978 : else
16979 963343 : return CCmode;
16980 : case GTU: /* CF=0 & ZF=0 */
16981 : case LEU: /* CF=1 | ZF=1 */
16982 : return CCmode;
16983 : /* Codes possibly doable only with sign flag when
16984 : comparing against zero. */
16985 770217 : case GE: /* SF=OF or SF=0 */
16986 770217 : case LT: /* SF<>OF or SF=1 */
16987 770217 : if (op1 == const0_rtx)
16988 : return CCGOCmode;
16989 : else
16990 : /* For other cases Carry flag is not required. */
16991 436102 : return CCGCmode;
16992 : /* Codes doable only with sign flag when comparing
16993 : against zero, but we miss jump instruction for it
16994 : so we need to use relational tests against overflow
16995 : that thus needs to be zero. */
16996 890924 : case GT: /* ZF=0 & SF=OF */
16997 890924 : case LE: /* ZF=1 | SF<>OF */
16998 890924 : if (op1 == const0_rtx)
16999 : return CCNOmode;
17000 : else
17001 591416 : return CCGCmode;
17002 : default:
17003 : /* CCmode should be used in all other cases. */
17004 : return CCmode;
17005 : }
17006 : }
17007 :
17008 : /* Return TRUE or FALSE depending on whether the ptest instruction
17009 : INSN has source and destination with suitable matching CC modes. */
17010 :
17011 : bool
17012 94642 : ix86_match_ptest_ccmode (rtx insn)
17013 : {
17014 94642 : rtx set, src;
17015 94642 : machine_mode set_mode;
17016 :
17017 94642 : set = PATTERN (insn);
17018 94642 : gcc_assert (GET_CODE (set) == SET);
17019 94642 : src = SET_SRC (set);
17020 94642 : gcc_assert (GET_CODE (src) == UNSPEC
17021 : && XINT (src, 1) == UNSPEC_PTEST);
17022 :
17023 94642 : set_mode = GET_MODE (src);
17024 94642 : if (set_mode != CCZmode
17025 : && set_mode != CCCmode
17026 : && set_mode != CCmode)
17027 : return false;
17028 94642 : return GET_MODE (SET_DEST (set)) == set_mode;
17029 : }
17030 :
17031 : /* Return the fixed registers used for condition codes. */
17032 :
17033 : static bool
17034 18886541 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17035 : {
17036 18886541 : *p1 = FLAGS_REG;
17037 18886541 : *p2 = INVALID_REGNUM;
17038 18886541 : return true;
17039 : }
17040 :
17041 : /* If two condition code modes are compatible, return a condition code
17042 : mode which is compatible with both. Otherwise, return
17043 : VOIDmode. */
17044 :
17045 : static machine_mode
17046 30997 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
17047 : {
17048 30997 : if (m1 == m2)
17049 : return m1;
17050 :
17051 30344 : if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17052 : return VOIDmode;
17053 :
17054 30344 : if ((m1 == CCGCmode && m2 == CCGOCmode)
17055 30344 : || (m1 == CCGOCmode && m2 == CCGCmode))
17056 : return CCGCmode;
17057 :
17058 30344 : if ((m1 == CCNOmode && m2 == CCGOCmode)
17059 30164 : || (m1 == CCGOCmode && m2 == CCNOmode))
17060 : return CCNOmode;
17061 :
17062 30047 : if (m1 == CCZmode
17063 15730 : && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
17064 : return m2;
17065 17599 : else if (m2 == CCZmode
17066 14064 : && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
17067 : return m1;
17068 :
17069 7146 : switch (m1)
17070 : {
17071 0 : default:
17072 0 : gcc_unreachable ();
17073 :
17074 7146 : case E_CCmode:
17075 7146 : case E_CCGCmode:
17076 7146 : case E_CCGOCmode:
17077 7146 : case E_CCNOmode:
17078 7146 : case E_CCAmode:
17079 7146 : case E_CCCmode:
17080 7146 : case E_CCOmode:
17081 7146 : case E_CCPmode:
17082 7146 : case E_CCSmode:
17083 7146 : case E_CCZmode:
17084 7146 : switch (m2)
17085 : {
17086 : default:
17087 : return VOIDmode;
17088 :
17089 : case E_CCmode:
17090 : case E_CCGCmode:
17091 : case E_CCGOCmode:
17092 : case E_CCNOmode:
17093 : case E_CCAmode:
17094 : case E_CCCmode:
17095 : case E_CCOmode:
17096 : case E_CCPmode:
17097 : case E_CCSmode:
17098 : case E_CCZmode:
17099 : return CCmode;
17100 : }
17101 :
17102 : case E_CCFPmode:
17103 : /* These are only compatible with themselves, which we already
17104 : checked above. */
17105 : return VOIDmode;
17106 : }
17107 : }
17108 :
17109 : /* Return strategy to use for floating-point. We assume that fcomi is always
17110 : preferrable where available, since that is also true when looking at size
17111 : (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17112 :
17113 : enum ix86_fpcmp_strategy
17114 5537122 : ix86_fp_comparison_strategy (enum rtx_code)
17115 : {
17116 : /* Do fcomi/sahf based test when profitable. */
17117 :
17118 5537122 : if (TARGET_CMOVE)
17119 : return IX86_FPCMP_COMI;
17120 :
17121 0 : if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
17122 0 : return IX86_FPCMP_SAHF;
17123 :
17124 : return IX86_FPCMP_ARITH;
17125 : }
17126 :
17127 : /* Convert comparison codes we use to represent FP comparison to integer
17128 : code that will result in proper branch. Return UNKNOWN if no such code
17129 : is available. */
17130 :
17131 : enum rtx_code
17132 582866 : ix86_fp_compare_code_to_integer (enum rtx_code code)
17133 : {
17134 582866 : switch (code)
17135 : {
17136 : case GT:
17137 : return GTU;
17138 18184 : case GE:
17139 18184 : return GEU;
17140 : case ORDERED:
17141 : case UNORDERED:
17142 : return code;
17143 118703 : case UNEQ:
17144 118703 : return EQ;
17145 19542 : case UNLT:
17146 19542 : return LTU;
17147 31120 : case UNLE:
17148 31120 : return LEU;
17149 113272 : case LTGT:
17150 113272 : return NE;
17151 675 : case EQ:
17152 675 : case NE:
17153 675 : if (TARGET_AVX10_2)
17154 : return code;
17155 : /* FALLTHRU. */
17156 215 : default:
17157 215 : return UNKNOWN;
17158 : }
17159 : }
17160 :
17161 : /* Zero extend possibly SImode EXP to Pmode register. */
17162 : rtx
17163 44169 : ix86_zero_extend_to_Pmode (rtx exp)
17164 : {
17165 56053 : return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
17166 : }
17167 :
17168 : /* Return true if the function is called via PLT. */
17169 :
17170 : bool
17171 986037 : ix86_call_use_plt_p (rtx call_op)
17172 : {
17173 986037 : if (SYMBOL_REF_LOCAL_P (call_op))
17174 : {
17175 196995 : if (SYMBOL_REF_DECL (call_op)
17176 196995 : && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
17177 : {
17178 : /* NB: All ifunc functions must be called via PLT. */
17179 113835 : cgraph_node *node
17180 113835 : = cgraph_node::get (SYMBOL_REF_DECL (call_op));
17181 113835 : if (node && node->ifunc_resolver)
17182 : return true;
17183 : }
17184 196975 : return false;
17185 : }
17186 : return true;
17187 : }
17188 :
17189 : /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
17190 : the PLT entry will be used as the function address for local IFUNC
17191 : functions. When the PIC register is needed for PLT call, indirect
17192 : call via the PLT entry will fail since the PIC register may not be
17193 : set up properly for indirect call. In this case, we should return
17194 : false. */
17195 :
17196 : static bool
17197 764553956 : ix86_ifunc_ref_local_ok (void)
17198 : {
17199 764553956 : return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
17200 : }
17201 :
17202 : /* Return true if the function being called was marked with attribute
17203 : "noplt" or using -fno-plt and we are compiling for non-PIC. We need
17204 : to handle the non-PIC case in the backend because there is no easy
17205 : interface for the front-end to force non-PLT calls to use the GOT.
17206 : This is currently used only with 64-bit or 32-bit GOT32X ELF targets
17207 : to call the function marked "noplt" indirectly. */
17208 :
17209 : bool
17210 5875465 : ix86_nopic_noplt_attribute_p (rtx call_op)
17211 : {
17212 5388012 : if (flag_pic || ix86_cmodel == CM_LARGE
17213 : || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17214 : || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17215 11263477 : || SYMBOL_REF_LOCAL_P (call_op))
17216 : return false;
17217 :
17218 3727390 : tree symbol_decl = SYMBOL_REF_DECL (call_op);
17219 :
17220 3727390 : if (!flag_plt
17221 3727390 : || (symbol_decl != NULL_TREE
17222 3727358 : && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
17223 34 : return true;
17224 :
17225 : return false;
17226 : }
17227 :
17228 : /* Helper to output the jmp/call. */
17229 : static void
17230 33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17231 : {
17232 33 : if (thunk_name != NULL)
17233 : {
17234 22 : if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17235 1 : && ix86_indirect_branch_cs_prefix)
17236 1 : fprintf (asm_out_file, "\tcs\n");
17237 22 : fprintf (asm_out_file, "\tjmp\t");
17238 22 : assemble_name (asm_out_file, thunk_name);
17239 22 : putc ('\n', asm_out_file);
17240 22 : if ((ix86_harden_sls & harden_sls_indirect_jmp))
17241 2 : fputs ("\tint3\n", asm_out_file);
17242 : }
17243 : else
17244 11 : output_indirect_thunk (regno);
17245 33 : }
17246 :
17247 : /* Output indirect branch via a call and return thunk. CALL_OP is a
17248 : register which contains the branch target. XASM is the assembly
17249 : template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17250 : A normal call is converted to:
17251 :
17252 : call __x86_indirect_thunk_reg
17253 :
17254 : and a tail call is converted to:
17255 :
17256 : jmp __x86_indirect_thunk_reg
17257 : */
17258 :
17259 : static void
17260 50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17261 : {
17262 50 : char thunk_name_buf[32];
17263 50 : char *thunk_name;
17264 50 : enum indirect_thunk_prefix need_prefix
17265 50 : = indirect_thunk_need_prefix (current_output_insn);
17266 50 : int regno = REGNO (call_op);
17267 :
17268 50 : if (cfun->machine->indirect_branch_type
17269 50 : != indirect_branch_thunk_inline)
17270 : {
17271 39 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17272 16 : SET_HARD_REG_BIT (indirect_thunks_used, regno);
17273 :
17274 39 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17275 39 : thunk_name = thunk_name_buf;
17276 : }
17277 : else
17278 : thunk_name = NULL;
17279 :
17280 50 : if (sibcall_p)
17281 27 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17282 : else
17283 : {
17284 23 : if (thunk_name != NULL)
17285 : {
17286 17 : if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17287 1 : && ix86_indirect_branch_cs_prefix)
17288 1 : fprintf (asm_out_file, "\tcs\n");
17289 17 : fprintf (asm_out_file, "\tcall\t");
17290 17 : assemble_name (asm_out_file, thunk_name);
17291 17 : putc ('\n', asm_out_file);
17292 17 : return;
17293 : }
17294 :
17295 6 : char indirectlabel1[32];
17296 6 : char indirectlabel2[32];
17297 :
17298 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17299 : INDIRECT_LABEL,
17300 : indirectlabelno++);
17301 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17302 : INDIRECT_LABEL,
17303 : indirectlabelno++);
17304 :
17305 : /* Jump. */
17306 6 : fputs ("\tjmp\t", asm_out_file);
17307 6 : assemble_name_raw (asm_out_file, indirectlabel2);
17308 6 : fputc ('\n', asm_out_file);
17309 :
17310 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17311 :
17312 6 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17313 :
17314 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17315 :
17316 : /* Call. */
17317 6 : fputs ("\tcall\t", asm_out_file);
17318 6 : assemble_name_raw (asm_out_file, indirectlabel1);
17319 6 : fputc ('\n', asm_out_file);
17320 : }
17321 : }
17322 :
17323 : /* Output indirect branch via a call and return thunk. CALL_OP is
17324 : the branch target. XASM is the assembly template for CALL_OP.
17325 : Branch is a tail call if SIBCALL_P is true. A normal call is
17326 : converted to:
17327 :
17328 : jmp L2
17329 : L1:
17330 : push CALL_OP
17331 : jmp __x86_indirect_thunk
17332 : L2:
17333 : call L1
17334 :
17335 : and a tail call is converted to:
17336 :
17337 : push CALL_OP
17338 : jmp __x86_indirect_thunk
17339 : */
17340 :
17341 : static void
17342 0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17343 : bool sibcall_p)
17344 : {
17345 0 : char thunk_name_buf[32];
17346 0 : char *thunk_name;
17347 0 : char push_buf[64];
17348 0 : enum indirect_thunk_prefix need_prefix
17349 0 : = indirect_thunk_need_prefix (current_output_insn);
17350 0 : int regno = -1;
17351 :
17352 0 : if (cfun->machine->indirect_branch_type
17353 0 : != indirect_branch_thunk_inline)
17354 : {
17355 0 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17356 0 : indirect_thunk_needed = true;
17357 0 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17358 0 : thunk_name = thunk_name_buf;
17359 : }
17360 : else
17361 : thunk_name = NULL;
17362 :
17363 0 : snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
17364 0 : TARGET_64BIT ? 'q' : 'l', xasm);
17365 :
17366 0 : if (sibcall_p)
17367 : {
17368 0 : output_asm_insn (push_buf, &call_op);
17369 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17370 : }
17371 : else
17372 : {
17373 0 : char indirectlabel1[32];
17374 0 : char indirectlabel2[32];
17375 :
17376 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17377 : INDIRECT_LABEL,
17378 : indirectlabelno++);
17379 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17380 : INDIRECT_LABEL,
17381 : indirectlabelno++);
17382 :
17383 : /* Jump. */
17384 0 : fputs ("\tjmp\t", asm_out_file);
17385 0 : assemble_name_raw (asm_out_file, indirectlabel2);
17386 0 : fputc ('\n', asm_out_file);
17387 :
17388 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17389 :
17390 : /* An external function may be called via GOT, instead of PLT. */
17391 0 : if (MEM_P (call_op))
17392 : {
17393 0 : struct ix86_address parts;
17394 0 : rtx addr = XEXP (call_op, 0);
17395 0 : if (ix86_decompose_address (addr, &parts)
17396 0 : && parts.base == stack_pointer_rtx)
17397 : {
17398 : /* Since call will adjust stack by -UNITS_PER_WORD,
17399 : we must convert "disp(stack, index, scale)" to
17400 : "disp+UNITS_PER_WORD(stack, index, scale)". */
17401 0 : if (parts.index)
17402 : {
17403 0 : addr = gen_rtx_MULT (Pmode, parts.index,
17404 : GEN_INT (parts.scale));
17405 0 : addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17406 : addr);
17407 : }
17408 : else
17409 : addr = stack_pointer_rtx;
17410 :
17411 0 : rtx disp;
17412 0 : if (parts.disp != NULL_RTX)
17413 0 : disp = plus_constant (Pmode, parts.disp,
17414 0 : UNITS_PER_WORD);
17415 : else
17416 0 : disp = GEN_INT (UNITS_PER_WORD);
17417 :
17418 0 : addr = gen_rtx_PLUS (Pmode, addr, disp);
17419 0 : call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17420 : }
17421 : }
17422 :
17423 0 : output_asm_insn (push_buf, &call_op);
17424 :
17425 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17426 :
17427 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17428 :
17429 : /* Call. */
17430 0 : fputs ("\tcall\t", asm_out_file);
17431 0 : assemble_name_raw (asm_out_file, indirectlabel1);
17432 0 : fputc ('\n', asm_out_file);
17433 : }
17434 0 : }
17435 :
17436 : /* Output indirect branch via a call and return thunk. CALL_OP is
17437 : the branch target. XASM is the assembly template for CALL_OP.
17438 : Branch is a tail call if SIBCALL_P is true. */
17439 :
17440 : static void
17441 50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
17442 : bool sibcall_p)
17443 : {
17444 50 : if (REG_P (call_op))
17445 50 : ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17446 : else
17447 0 : ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17448 50 : }
17449 :
17450 : /* Output indirect jump. CALL_OP is the jump target. */
17451 :
17452 : const char *
17453 9341 : ix86_output_indirect_jmp (rtx call_op)
17454 : {
17455 9341 : if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17456 : {
17457 : /* We can't have red-zone since "call" in the indirect thunk
17458 : pushes the return address onto stack, destroying red-zone. */
17459 4 : if (ix86_red_zone_used)
17460 0 : gcc_unreachable ();
17461 :
17462 4 : ix86_output_indirect_branch (call_op, "%0", true);
17463 : }
17464 : else
17465 9337 : output_asm_insn ("%!jmp\t%A0", &call_op);
17466 9341 : return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17467 : }
17468 :
17469 : /* Output return instrumentation for current function if needed. */
17470 :
17471 : static void
17472 1703015 : output_return_instrumentation (void)
17473 : {
17474 1703015 : if (ix86_instrument_return != instrument_return_none
17475 6 : && flag_fentry
17476 1703021 : && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17477 : {
17478 5 : if (ix86_flag_record_return)
17479 5 : fprintf (asm_out_file, "1:\n");
17480 5 : switch (ix86_instrument_return)
17481 : {
17482 2 : case instrument_return_call:
17483 2 : fprintf (asm_out_file, "\tcall\t__return__\n");
17484 2 : break;
17485 3 : case instrument_return_nop5:
17486 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17487 3 : fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17488 3 : break;
17489 : case instrument_return_none:
17490 : break;
17491 : }
17492 :
17493 5 : if (ix86_flag_record_return)
17494 : {
17495 5 : fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17496 5 : fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17497 5 : fprintf (asm_out_file, "\t.previous\n");
17498 : }
17499 : }
17500 1703015 : }
17501 :
17502 : /* Output function return. CALL_OP is the jump target. Add a REP
17503 : prefix to RET if LONG_P is true and function return is kept. */
17504 :
17505 : const char *
17506 1572687 : ix86_output_function_return (bool long_p)
17507 : {
17508 1572687 : output_return_instrumentation ();
17509 :
17510 1572687 : if (cfun->machine->function_return_type != indirect_branch_keep)
17511 : {
17512 18 : char thunk_name[32];
17513 18 : enum indirect_thunk_prefix need_prefix
17514 18 : = indirect_thunk_need_prefix (current_output_insn);
17515 :
17516 18 : if (cfun->machine->function_return_type
17517 18 : != indirect_branch_thunk_inline)
17518 : {
17519 13 : bool need_thunk = (cfun->machine->function_return_type
17520 : == indirect_branch_thunk);
17521 13 : indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17522 : true);
17523 13 : indirect_return_needed |= need_thunk;
17524 13 : fprintf (asm_out_file, "\tjmp\t");
17525 13 : assemble_name (asm_out_file, thunk_name);
17526 13 : putc ('\n', asm_out_file);
17527 : }
17528 : else
17529 5 : output_indirect_thunk (INVALID_REGNUM);
17530 :
17531 18 : return "";
17532 : }
17533 :
17534 3144849 : output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17535 1572669 : return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17536 : }
17537 :
17538 : /* Output indirect function return. RET_OP is the function return
17539 : target. */
17540 :
17541 : const char *
17542 17 : ix86_output_indirect_function_return (rtx ret_op)
17543 : {
17544 17 : if (cfun->machine->function_return_type != indirect_branch_keep)
17545 : {
17546 0 : char thunk_name[32];
17547 0 : enum indirect_thunk_prefix need_prefix
17548 0 : = indirect_thunk_need_prefix (current_output_insn);
17549 0 : unsigned int regno = REGNO (ret_op);
17550 0 : gcc_assert (regno == CX_REG);
17551 :
17552 0 : if (cfun->machine->function_return_type
17553 0 : != indirect_branch_thunk_inline)
17554 : {
17555 0 : bool need_thunk = (cfun->machine->function_return_type
17556 : == indirect_branch_thunk);
17557 0 : indirect_thunk_name (thunk_name, regno, need_prefix, true);
17558 :
17559 0 : if (need_thunk)
17560 : {
17561 0 : indirect_return_via_cx = true;
17562 0 : SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17563 : }
17564 0 : fprintf (asm_out_file, "\tjmp\t");
17565 0 : assemble_name (asm_out_file, thunk_name);
17566 0 : putc ('\n', asm_out_file);
17567 : }
17568 : else
17569 0 : output_indirect_thunk (regno);
17570 : }
17571 : else
17572 : {
17573 17 : output_asm_insn ("%!jmp\t%A0", &ret_op);
17574 17 : if (ix86_harden_sls & harden_sls_indirect_jmp)
17575 1 : fputs ("\tint3\n", asm_out_file);
17576 : }
17577 17 : return "";
17578 : }
17579 :
17580 : /* Output the assembly for a call instruction. */
17581 :
17582 : const char *
17583 6060877 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17584 : {
17585 6060877 : bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17586 6060877 : bool output_indirect_p
17587 : = (!TARGET_SEH
17588 6060877 : && cfun->machine->indirect_branch_type != indirect_branch_keep);
17589 6060877 : bool seh_nop_p = false;
17590 6060877 : const char *xasm;
17591 :
17592 6060877 : if (SIBLING_CALL_P (insn))
17593 : {
17594 130328 : output_return_instrumentation ();
17595 130328 : if (direct_p)
17596 : {
17597 120584 : if (ix86_nopic_noplt_attribute_p (call_op))
17598 : {
17599 4 : direct_p = false;
17600 4 : if (TARGET_64BIT)
17601 : {
17602 4 : if (output_indirect_p)
17603 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17604 : else
17605 4 : xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17606 : }
17607 : else
17608 : {
17609 0 : if (output_indirect_p)
17610 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17611 : else
17612 0 : xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17613 : }
17614 : }
17615 : else
17616 : xasm = "%!jmp\t%P0";
17617 : }
17618 : /* SEH epilogue detection requires the indirect branch case
17619 : to include REX.W. */
17620 9744 : else if (TARGET_SEH)
17621 : xasm = "%!rex.W jmp\t%A0";
17622 : else
17623 : {
17624 9744 : if (output_indirect_p)
17625 : xasm = "%0";
17626 : else
17627 9721 : xasm = "%!jmp\t%A0";
17628 : }
17629 :
17630 130328 : if (output_indirect_p && !direct_p)
17631 23 : ix86_output_indirect_branch (call_op, xasm, true);
17632 : else
17633 : {
17634 130305 : output_asm_insn (xasm, &call_op);
17635 130305 : if (!direct_p
17636 9725 : && (ix86_harden_sls & harden_sls_indirect_jmp))
17637 : return "int3";
17638 : }
17639 130327 : return "";
17640 : }
17641 :
17642 : /* SEH unwinding can require an extra nop to be emitted in several
17643 : circumstances. Determine if we have one of those. */
17644 5930549 : if (TARGET_SEH)
17645 : {
17646 : rtx_insn *i;
17647 :
17648 : for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17649 : {
17650 : /* Prevent a catch region from being adjacent to a jump that would
17651 : be interpreted as an epilogue sequence by the unwinder. */
17652 : if (JUMP_P(i) && CROSSING_JUMP_P (i))
17653 : {
17654 : seh_nop_p = true;
17655 : break;
17656 : }
17657 :
17658 : /* If we get to another real insn, we don't need the nop. */
17659 : if (INSN_P (i))
17660 : break;
17661 :
17662 : /* If we get to the epilogue note, prevent a catch region from
17663 : being adjacent to the standard epilogue sequence. Note that,
17664 : if non-call exceptions are enabled, we already did it during
17665 : epilogue expansion, or else, if the insn can throw internally,
17666 : we already did it during the reorg pass. */
17667 : if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17668 : && !flag_non_call_exceptions
17669 : && !can_throw_internal (insn))
17670 : {
17671 : seh_nop_p = true;
17672 : break;
17673 : }
17674 : }
17675 :
17676 : /* If we didn't find a real insn following the call, prevent the
17677 : unwinder from looking into the next function. */
17678 : if (i == NULL)
17679 : seh_nop_p = true;
17680 : }
17681 :
17682 5930549 : if (direct_p)
17683 : {
17684 5753859 : if (ix86_nopic_noplt_attribute_p (call_op))
17685 : {
17686 6 : direct_p = false;
17687 6 : if (TARGET_64BIT)
17688 : {
17689 6 : if (output_indirect_p)
17690 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17691 : else
17692 6 : xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17693 : }
17694 : else
17695 : {
17696 0 : if (output_indirect_p)
17697 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17698 : else
17699 0 : xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17700 : }
17701 : }
17702 : else
17703 : xasm = "%!call\t%P0";
17704 : }
17705 : else
17706 : {
17707 176690 : if (output_indirect_p)
17708 : xasm = "%0";
17709 : else
17710 176667 : xasm = "%!call\t%A0";
17711 : }
17712 :
17713 5930549 : if (output_indirect_p && !direct_p)
17714 23 : ix86_output_indirect_branch (call_op, xasm, false);
17715 : else
17716 5930526 : output_asm_insn (xasm, &call_op);
17717 :
17718 : if (seh_nop_p)
17719 : return "nop";
17720 :
17721 : return "";
17722 : }
17723 :
17724 : /* Return a MEM corresponding to a stack slot with mode MODE.
17725 : Allocate a new slot if necessary.
17726 :
17727 : The RTL for a function can have several slots available: N is
17728 : which slot to use. */
17729 :
17730 : rtx
17731 22366 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17732 : {
17733 22366 : struct stack_local_entry *s;
17734 :
17735 22366 : gcc_assert (n < MAX_386_STACK_LOCALS);
17736 :
17737 33727 : for (s = ix86_stack_locals; s; s = s->next)
17738 31116 : if (s->mode == mode && s->n == n)
17739 19755 : return validize_mem (copy_rtx (s->rtl));
17740 :
17741 2611 : int align = 0;
17742 : /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17743 : alignment with -m32 -mpreferred-stack-boundary=2. */
17744 2611 : if (mode == DImode
17745 329 : && !TARGET_64BIT
17746 329 : && n == SLOT_FLOATxFDI_387
17747 2940 : && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17748 : align = 32;
17749 2611 : s = ggc_alloc<stack_local_entry> ();
17750 2611 : s->n = n;
17751 2611 : s->mode = mode;
17752 5222 : s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17753 :
17754 2611 : s->next = ix86_stack_locals;
17755 2611 : ix86_stack_locals = s;
17756 2611 : return validize_mem (copy_rtx (s->rtl));
17757 : }
17758 :
17759 : static void
17760 1471357 : ix86_instantiate_decls (void)
17761 : {
17762 1471357 : struct stack_local_entry *s;
17763 :
17764 1471357 : for (s = ix86_stack_locals; s; s = s->next)
17765 0 : if (s->rtl != NULL_RTX)
17766 0 : instantiate_decl_rtl (s->rtl);
17767 1471357 : }
17768 :
17769 : /* Check whether x86 address PARTS is a pc-relative address. */
17770 :
17771 : bool
17772 27212290 : ix86_rip_relative_addr_p (struct ix86_address *parts)
17773 : {
17774 27212290 : rtx base, index, disp;
17775 :
17776 27212290 : base = parts->base;
17777 27212290 : index = parts->index;
17778 27212290 : disp = parts->disp;
17779 :
17780 27212290 : if (disp && !base && !index)
17781 : {
17782 25452983 : if (TARGET_64BIT)
17783 : {
17784 23788244 : rtx symbol = disp;
17785 :
17786 23788244 : if (GET_CODE (disp) == CONST)
17787 7772798 : symbol = XEXP (disp, 0);
17788 23788244 : if (GET_CODE (symbol) == PLUS
17789 7259094 : && CONST_INT_P (XEXP (symbol, 1)))
17790 7259094 : symbol = XEXP (symbol, 0);
17791 :
17792 23788244 : if (LABEL_REF_P (symbol)
17793 23781311 : || (SYMBOL_REF_P (symbol)
17794 22515626 : && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17795 25053929 : || (GET_CODE (symbol) == UNSPEC
17796 532764 : && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17797 : || XINT (symbol, 1) == UNSPEC_PCREL
17798 : || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17799 23027658 : return true;
17800 : }
17801 : }
17802 : return false;
17803 : }
17804 :
17805 : /* Calculate the length of the memory address in the instruction encoding.
17806 : Includes addr32 prefix, does not include the one-byte modrm, opcode,
17807 : or other prefixes. We never generate addr32 prefix for LEA insn. */
17808 :
17809 : int
17810 271560808 : memory_address_length (rtx addr, bool lea)
17811 : {
17812 271560808 : struct ix86_address parts;
17813 271560808 : rtx base, index, disp;
17814 271560808 : int len;
17815 271560808 : int ok;
17816 :
17817 271560808 : if (GET_CODE (addr) == PRE_DEC
17818 263043951 : || GET_CODE (addr) == POST_INC
17819 258557348 : || GET_CODE (addr) == PRE_MODIFY
17820 258557348 : || GET_CODE (addr) == POST_MODIFY)
17821 : return 0;
17822 :
17823 258557348 : ok = ix86_decompose_address (addr, &parts);
17824 258557348 : gcc_assert (ok);
17825 :
17826 258557348 : len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17827 :
17828 : /* If this is not LEA instruction, add the length of addr32 prefix. */
17829 220202934 : if (TARGET_64BIT && !lea
17830 453765746 : && (SImode_address_operand (addr, VOIDmode)
17831 195208245 : || (parts.base && GET_MODE (parts.base) == SImode)
17832 195198015 : || (parts.index && GET_MODE (parts.index) == SImode)))
17833 10383 : len++;
17834 :
17835 258557348 : base = parts.base;
17836 258557348 : index = parts.index;
17837 258557348 : disp = parts.disp;
17838 :
17839 258557348 : if (base && SUBREG_P (base))
17840 2 : base = SUBREG_REG (base);
17841 258557348 : if (index && SUBREG_P (index))
17842 0 : index = SUBREG_REG (index);
17843 :
17844 258557348 : gcc_assert (base == NULL_RTX || REG_P (base));
17845 258557348 : gcc_assert (index == NULL_RTX || REG_P (index));
17846 :
17847 : /* Rule of thumb:
17848 : - esp as the base always wants an index,
17849 : - ebp as the base always wants a displacement,
17850 : - r12 as the base always wants an index,
17851 : - r13 as the base always wants a displacement. */
17852 :
17853 : /* Register Indirect. */
17854 258557348 : if (base && !index && !disp)
17855 : {
17856 : /* esp (for its index) and ebp (for its displacement) need
17857 : the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17858 : code. */
17859 16941837 : if (base == arg_pointer_rtx
17860 16941837 : || base == frame_pointer_rtx
17861 16941837 : || REGNO (base) == SP_REG
17862 10093794 : || REGNO (base) == BP_REG
17863 10093794 : || REGNO (base) == R12_REG
17864 26535536 : || REGNO (base) == R13_REG)
17865 7348138 : len++;
17866 : }
17867 :
17868 : /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17869 : is not disp32, but disp32(%rip), so for disp32
17870 : SIB byte is needed, unless print_operand_address
17871 : optimizes it into disp32(%rip) or (%rip) is implied
17872 : by UNSPEC. */
17873 241615511 : else if (disp && !base && !index)
17874 : {
17875 24477600 : len += 4;
17876 24477600 : if (!ix86_rip_relative_addr_p (&parts))
17877 1852811 : len++;
17878 : }
17879 : else
17880 : {
17881 : /* Find the length of the displacement constant. */
17882 217137911 : if (disp)
17883 : {
17884 212975977 : if (base && satisfies_constraint_K (disp))
17885 124628228 : len += 1;
17886 : else
17887 88347749 : len += 4;
17888 : }
17889 : /* ebp always wants a displacement. Similarly r13. */
17890 4161934 : else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17891 8149 : len++;
17892 :
17893 : /* An index requires the two-byte modrm form.... */
17894 217137911 : if (index
17895 : /* ...like esp (or r12), which always wants an index. */
17896 206149196 : || base == arg_pointer_rtx
17897 206149196 : || base == frame_pointer_rtx
17898 423287107 : || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17899 156060513 : len++;
17900 : }
17901 :
17902 : return len;
17903 : }
17904 :
17905 : /* Compute default value for "length_immediate" attribute. When SHORTFORM
17906 : is set, expect that insn have 8bit immediate alternative. */
17907 : int
17908 317721687 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17909 : {
17910 317721687 : int len = 0;
17911 317721687 : int i;
17912 317721687 : extract_insn_cached (insn);
17913 991340806 : for (i = recog_data.n_operands - 1; i >= 0; --i)
17914 673619119 : if (CONSTANT_P (recog_data.operand[i]))
17915 : {
17916 139282301 : enum attr_mode mode = get_attr_mode (insn);
17917 :
17918 139282301 : gcc_assert (!len);
17919 139282301 : if (shortform && CONST_INT_P (recog_data.operand[i]))
17920 : {
17921 37733319 : HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17922 37733319 : switch (mode)
17923 : {
17924 1379269 : case MODE_QI:
17925 1379269 : len = 1;
17926 1379269 : continue;
17927 438566 : case MODE_HI:
17928 438566 : ival = trunc_int_for_mode (ival, HImode);
17929 438566 : break;
17930 16043714 : case MODE_SI:
17931 16043714 : ival = trunc_int_for_mode (ival, SImode);
17932 16043714 : break;
17933 : default:
17934 : break;
17935 : }
17936 36354050 : if (IN_RANGE (ival, -128, 127))
17937 : {
17938 32241146 : len = 1;
17939 32241146 : continue;
17940 : }
17941 : }
17942 105661886 : switch (mode)
17943 : {
17944 : case MODE_QI:
17945 : len = 1;
17946 : break;
17947 : case MODE_HI:
17948 673619119 : len = 2;
17949 : break;
17950 : case MODE_SI:
17951 99906244 : len = 4;
17952 : break;
17953 : /* Immediates for DImode instructions are encoded
17954 : as 32bit sign extended values. */
17955 : case MODE_DI:
17956 99906244 : len = 4;
17957 : break;
17958 0 : default:
17959 0 : fatal_insn ("unknown insn mode", insn);
17960 : }
17961 : }
17962 317721687 : return len;
17963 : }
17964 :
17965 : /* Compute default value for "length_address" attribute. */
17966 : int
17967 444480112 : ix86_attr_length_address_default (rtx_insn *insn)
17968 : {
17969 444480112 : int i;
17970 :
17971 444480112 : if (get_attr_type (insn) == TYPE_LEA)
17972 : {
17973 27717056 : rtx set = PATTERN (insn), addr;
17974 :
17975 27717056 : if (GET_CODE (set) == PARALLEL)
17976 88124 : set = XVECEXP (set, 0, 0);
17977 :
17978 27717056 : gcc_assert (GET_CODE (set) == SET);
17979 :
17980 27717056 : addr = SET_SRC (set);
17981 :
17982 27717056 : return memory_address_length (addr, true);
17983 : }
17984 :
17985 416763056 : extract_insn_cached (insn);
17986 956534704 : for (i = recog_data.n_operands - 1; i >= 0; --i)
17987 : {
17988 783335782 : rtx op = recog_data.operand[i];
17989 783335782 : if (MEM_P (op))
17990 : {
17991 243841419 : constrain_operands_cached (insn, reload_completed);
17992 243841419 : if (which_alternative != -1)
17993 : {
17994 243841419 : const char *constraints = recog_data.constraints[i];
17995 243841419 : int alt = which_alternative;
17996 :
17997 386493076 : while (*constraints == '=' || *constraints == '+')
17998 142651657 : constraints++;
17999 1109490073 : while (alt-- > 0)
18000 2123742045 : while (*constraints++ != ',')
18001 : ;
18002 : /* Skip ignored operands. */
18003 243841419 : if (*constraints == 'X')
18004 277285 : continue;
18005 : }
18006 :
18007 243564134 : int len = memory_address_length (XEXP (op, 0), false);
18008 :
18009 : /* Account for segment prefix for non-default addr spaces. */
18010 257213014 : if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
18011 783720 : len++;
18012 :
18013 243564134 : return len;
18014 : }
18015 : }
18016 : return 0;
18017 : }
18018 :
18019 : /* Compute default value for "length_vex" attribute. It includes
18020 : 2 or 3 byte VEX prefix and 1 opcode byte. */
18021 :
18022 : int
18023 5101247 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
18024 : bool has_vex_w)
18025 : {
18026 5101247 : int i, reg_only = 2 + 1;
18027 5101247 : bool has_mem = false;
18028 :
18029 : /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18030 : byte VEX prefix. */
18031 5101247 : if (!has_0f_opcode || has_vex_w)
18032 : return 3 + 1;
18033 :
18034 : /* We can always use 2 byte VEX prefix in 32bit. */
18035 4647205 : if (!TARGET_64BIT)
18036 : return 2 + 1;
18037 :
18038 3544461 : extract_insn_cached (insn);
18039 :
18040 11036984 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18041 7831189 : if (REG_P (recog_data.operand[i]))
18042 : {
18043 : /* REX.W bit uses 3 byte VEX prefix.
18044 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18045 5138079 : if (GET_MODE (recog_data.operand[i]) == DImode
18046 5138079 : && GENERAL_REG_P (recog_data.operand[i]))
18047 : return 3 + 1;
18048 :
18049 : /* REX.B bit requires 3-byte VEX. Right here we don't know which
18050 : operand will be encoded using VEX.B, so be conservative.
18051 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18052 5126240 : if (REX_INT_REGNO_P (recog_data.operand[i])
18053 5126240 : || REX2_INT_REGNO_P (recog_data.operand[i])
18054 5126240 : || REX_SSE_REGNO_P (recog_data.operand[i]))
18055 0 : reg_only = 3 + 1;
18056 : }
18057 2693110 : else if (MEM_P (recog_data.operand[i]))
18058 : {
18059 : /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
18060 2089258 : if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
18061 : return 4;
18062 :
18063 : /* REX.X or REX.B bits use 3 byte VEX prefix. */
18064 2089004 : if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
18065 : return 3 + 1;
18066 :
18067 : has_mem = true;
18068 : }
18069 :
18070 3205795 : return has_mem ? 2 + 1 : reg_only;
18071 : }
18072 :
18073 :
18074 : static bool
18075 : ix86_class_likely_spilled_p (reg_class_t);
18076 :
18077 : /* Returns true if lhs of insn is HW function argument register and set up
18078 : is_spilled to true if it is likely spilled HW register. */
18079 : static bool
18080 1145 : insn_is_function_arg (rtx insn, bool* is_spilled)
18081 : {
18082 1145 : rtx dst;
18083 :
18084 1145 : if (!NONDEBUG_INSN_P (insn))
18085 : return false;
18086 : /* Call instructions are not movable, ignore it. */
18087 1145 : if (CALL_P (insn))
18088 : return false;
18089 1071 : insn = PATTERN (insn);
18090 1071 : if (GET_CODE (insn) == PARALLEL)
18091 73 : insn = XVECEXP (insn, 0, 0);
18092 1071 : if (GET_CODE (insn) != SET)
18093 : return false;
18094 1071 : dst = SET_DEST (insn);
18095 975 : if (REG_P (dst) && HARD_REGISTER_P (dst)
18096 1940 : && ix86_function_arg_regno_p (REGNO (dst)))
18097 : {
18098 : /* Is it likely spilled HW register? */
18099 869 : if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
18100 869 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
18101 825 : *is_spilled = true;
18102 869 : return true;
18103 : }
18104 : return false;
18105 : }
18106 :
18107 : /* Add output dependencies for chain of function adjacent arguments if only
18108 : there is a move to likely spilled HW register. Return first argument
18109 : if at least one dependence was added or NULL otherwise. */
18110 : static rtx_insn *
18111 414 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
18112 : {
18113 414 : rtx_insn *insn;
18114 414 : rtx_insn *last = call;
18115 414 : rtx_insn *first_arg = NULL;
18116 414 : bool is_spilled = false;
18117 :
18118 414 : head = PREV_INSN (head);
18119 :
18120 : /* Find nearest to call argument passing instruction. */
18121 414 : while (true)
18122 : {
18123 414 : last = PREV_INSN (last);
18124 414 : if (last == head)
18125 : return NULL;
18126 414 : if (!NONDEBUG_INSN_P (last))
18127 0 : continue;
18128 414 : if (insn_is_function_arg (last, &is_spilled))
18129 : break;
18130 : return NULL;
18131 : }
18132 :
18133 : first_arg = last;
18134 1050 : while (true)
18135 : {
18136 1050 : insn = PREV_INSN (last);
18137 1050 : if (!INSN_P (insn))
18138 : break;
18139 953 : if (insn == head)
18140 : break;
18141 912 : if (!NONDEBUG_INSN_P (insn))
18142 : {
18143 181 : last = insn;
18144 181 : continue;
18145 : }
18146 731 : if (insn_is_function_arg (insn, &is_spilled))
18147 : {
18148 : /* Add output depdendence between two function arguments if chain
18149 : of output arguments contains likely spilled HW registers. */
18150 463 : if (is_spilled)
18151 463 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18152 : first_arg = last = insn;
18153 : }
18154 : else
18155 : break;
18156 : }
18157 406 : if (!is_spilled)
18158 : return NULL;
18159 : return first_arg;
18160 : }
18161 :
18162 : /* Add output or anti dependency from insn to first_arg to restrict its code
18163 : motion. */
18164 : static void
18165 2335 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
18166 : {
18167 2335 : rtx set;
18168 2335 : rtx tmp;
18169 :
18170 2335 : set = single_set (insn);
18171 2335 : if (!set)
18172 : return;
18173 1453 : tmp = SET_DEST (set);
18174 1453 : if (REG_P (tmp))
18175 : {
18176 : /* Add output dependency to the first function argument. */
18177 1258 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18178 1258 : return;
18179 : }
18180 : /* Add anti dependency. */
18181 195 : add_dependence (first_arg, insn, REG_DEP_ANTI);
18182 : }
18183 :
18184 : /* Avoid cross block motion of function argument through adding dependency
18185 : from the first non-jump instruction in bb. */
18186 : static void
18187 68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
18188 : {
18189 68 : rtx_insn *insn = BB_END (bb);
18190 :
18191 134 : while (insn)
18192 : {
18193 134 : if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
18194 : {
18195 67 : rtx set = single_set (insn);
18196 67 : if (set)
18197 : {
18198 67 : avoid_func_arg_motion (arg, insn);
18199 67 : return;
18200 : }
18201 : }
18202 67 : if (insn == BB_HEAD (bb))
18203 : return;
18204 66 : insn = PREV_INSN (insn);
18205 : }
18206 : }
18207 :
18208 : /* Hook for pre-reload schedule - avoid motion of function arguments
18209 : passed in likely spilled HW registers. */
18210 : static void
18211 10331646 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18212 : {
18213 10331646 : rtx_insn *insn;
18214 10331646 : rtx_insn *first_arg = NULL;
18215 10331646 : if (reload_completed)
18216 : return;
18217 1579 : while (head != tail && DEBUG_INSN_P (head))
18218 346 : head = NEXT_INSN (head);
18219 10676 : for (insn = tail; insn != head; insn = PREV_INSN (insn))
18220 9578 : if (INSN_P (insn) && CALL_P (insn))
18221 : {
18222 414 : first_arg = add_parameter_dependencies (insn, head);
18223 414 : if (first_arg)
18224 : {
18225 : /* Add dependee for first argument to predecessors if only
18226 : region contains more than one block. */
18227 406 : basic_block bb = BLOCK_FOR_INSN (insn);
18228 406 : int rgn = CONTAINING_RGN (bb->index);
18229 406 : int nr_blks = RGN_NR_BLOCKS (rgn);
18230 : /* Skip trivial regions and region head blocks that can have
18231 : predecessors outside of region. */
18232 406 : if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18233 : {
18234 67 : edge e;
18235 67 : edge_iterator ei;
18236 :
18237 : /* Regions are SCCs with the exception of selective
18238 : scheduling with pipelining of outer blocks enabled.
18239 : So also check that immediate predecessors of a non-head
18240 : block are in the same region. */
18241 137 : FOR_EACH_EDGE (e, ei, bb->preds)
18242 : {
18243 : /* Avoid creating of loop-carried dependencies through
18244 : using topological ordering in the region. */
18245 70 : if (rgn == CONTAINING_RGN (e->src->index)
18246 69 : && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18247 68 : add_dependee_for_func_arg (first_arg, e->src);
18248 : }
18249 : }
18250 406 : insn = first_arg;
18251 406 : if (insn == head)
18252 : break;
18253 : }
18254 : }
18255 9164 : else if (first_arg)
18256 2268 : avoid_func_arg_motion (first_arg, insn);
18257 : }
18258 :
18259 : /* Hook for pre-reload schedule - set priority of moves from likely spilled
18260 : HW registers to maximum, to schedule them at soon as possible. These are
18261 : moves from function argument registers at the top of the function entry
18262 : and moves from function return value registers after call. */
18263 : static int
18264 108639027 : ix86_adjust_priority (rtx_insn *insn, int priority)
18265 : {
18266 108639027 : rtx set;
18267 :
18268 108639027 : if (reload_completed)
18269 : return priority;
18270 :
18271 14050 : if (!NONDEBUG_INSN_P (insn))
18272 : return priority;
18273 :
18274 12484 : set = single_set (insn);
18275 12484 : if (set)
18276 : {
18277 11912 : rtx tmp = SET_SRC (set);
18278 11912 : if (REG_P (tmp)
18279 2498 : && HARD_REGISTER_P (tmp)
18280 499 : && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18281 11912 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18282 448 : return current_sched_info->sched_max_insns_priority;
18283 : }
18284 :
18285 : return priority;
18286 : }
18287 :
18288 : /* Prepare for scheduling pass. */
18289 : static void
18290 964480 : ix86_sched_init_global (FILE *, int, int)
18291 : {
18292 : /* Install scheduling hooks for current CPU. Some of these hooks are used
18293 : in time-critical parts of the scheduler, so we only set them up when
18294 : they are actually used. */
18295 964480 : switch (ix86_tune)
18296 : {
18297 917945 : case PROCESSOR_CORE2:
18298 917945 : case PROCESSOR_NEHALEM:
18299 917945 : case PROCESSOR_SANDYBRIDGE:
18300 917945 : case PROCESSOR_HASWELL:
18301 917945 : case PROCESSOR_TREMONT:
18302 917945 : case PROCESSOR_ALDERLAKE:
18303 917945 : case PROCESSOR_GENERIC:
18304 : /* Do not perform multipass scheduling for pre-reload schedule
18305 : to save compile time. */
18306 917945 : if (reload_completed)
18307 : {
18308 917458 : ix86_core2i7_init_hooks ();
18309 917458 : break;
18310 : }
18311 : /* Fall through. */
18312 47022 : default:
18313 47022 : targetm.sched.dfa_post_advance_cycle = NULL;
18314 47022 : targetm.sched.first_cycle_multipass_init = NULL;
18315 47022 : targetm.sched.first_cycle_multipass_begin = NULL;
18316 47022 : targetm.sched.first_cycle_multipass_issue = NULL;
18317 47022 : targetm.sched.first_cycle_multipass_backtrack = NULL;
18318 47022 : targetm.sched.first_cycle_multipass_end = NULL;
18319 47022 : targetm.sched.first_cycle_multipass_fini = NULL;
18320 47022 : break;
18321 : }
18322 964480 : }
18323 :
18324 :
18325 : /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18326 :
18327 : static HOST_WIDE_INT
18328 718485 : ix86_static_rtx_alignment (machine_mode mode)
18329 : {
18330 718485 : if (mode == DFmode)
18331 : return 64;
18332 : if (ALIGN_MODE_128 (mode))
18333 156355 : return MAX (128, GET_MODE_ALIGNMENT (mode));
18334 477350 : return GET_MODE_ALIGNMENT (mode);
18335 : }
18336 :
18337 : /* Implement TARGET_CONSTANT_ALIGNMENT. */
18338 :
18339 : static HOST_WIDE_INT
18340 6816681 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18341 : {
18342 6816681 : if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18343 : || TREE_CODE (exp) == INTEGER_CST)
18344 : {
18345 364001 : machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18346 364001 : HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18347 364001 : return MAX (mode_align, align);
18348 : }
18349 6312536 : else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18350 9514864 : && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18351 : return BITS_PER_WORD;
18352 :
18353 : return align;
18354 : }
18355 :
18356 : /* Implement TARGET_EMPTY_RECORD_P. */
18357 :
18358 : static bool
18359 1482923405 : ix86_is_empty_record (const_tree type)
18360 : {
18361 1482923405 : if (!TARGET_64BIT)
18362 : return false;
18363 1452388039 : return default_is_empty_record (type);
18364 : }
18365 :
18366 : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18367 :
18368 : static void
18369 15129616 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18370 : {
18371 15129616 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
18372 :
18373 15129616 : if (!cum->warn_empty)
18374 : return;
18375 :
18376 12944700 : if (!TYPE_EMPTY_P (type))
18377 : return;
18378 :
18379 : /* Don't warn if the function isn't visible outside of the TU. */
18380 14578 : if (cum->decl && !TREE_PUBLIC (cum->decl))
18381 : return;
18382 :
18383 13124 : tree decl = cum->decl;
18384 13124 : if (!decl)
18385 : /* If we don't know the target, look at the current TU. */
18386 39 : decl = current_function_decl;
18387 :
18388 13124 : const_tree ctx = get_ultimate_context (decl);
18389 13124 : if (ctx == NULL_TREE
18390 26215 : || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18391 : return;
18392 :
18393 : /* If the actual size of the type is zero, then there is no change
18394 : in how objects of this size are passed. */
18395 72 : if (int_size_in_bytes (type) == 0)
18396 : return;
18397 :
18398 66 : warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18399 : "changes in %<-fabi-version=12%> (GCC 8)", type);
18400 :
18401 : /* Only warn once. */
18402 66 : cum->warn_empty = false;
18403 : }
18404 :
18405 : /* This hook returns name of multilib ABI. */
18406 :
18407 : static const char *
18408 3378459 : ix86_get_multilib_abi_name (void)
18409 : {
18410 3378459 : if (!(TARGET_64BIT_P (ix86_isa_flags)))
18411 : return "i386";
18412 3334503 : else if (TARGET_X32_P (ix86_isa_flags))
18413 : return "x32";
18414 : else
18415 3334503 : return "x86_64";
18416 : }
18417 :
18418 : /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18419 : the data type, and ALIGN is the alignment that the object would
18420 : ordinarily have. */
18421 :
18422 : static int
18423 0 : iamcu_alignment (tree type, int align)
18424 : {
18425 0 : machine_mode mode;
18426 :
18427 0 : if (align < 32 || TYPE_USER_ALIGN (type))
18428 : return align;
18429 :
18430 : /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18431 : bytes. */
18432 0 : type = strip_array_types (type);
18433 0 : if (TYPE_ATOMIC (type))
18434 : return align;
18435 :
18436 0 : mode = TYPE_MODE (type);
18437 0 : switch (GET_MODE_CLASS (mode))
18438 : {
18439 : case MODE_INT:
18440 : case MODE_COMPLEX_INT:
18441 : case MODE_COMPLEX_FLOAT:
18442 : case MODE_FLOAT:
18443 : case MODE_DECIMAL_FLOAT:
18444 : return 32;
18445 : default:
18446 : return align;
18447 : }
18448 : }
18449 :
18450 : /* Compute the alignment for a static variable.
18451 : TYPE is the data type, and ALIGN is the alignment that
18452 : the object would ordinarily have. The value of this function is used
18453 : instead of that alignment to align the object. */
18454 :
18455 : int
18456 12074721 : ix86_data_alignment (tree type, unsigned int align, bool opt)
18457 : {
18458 : /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18459 : for symbols from other compilation units or symbols that don't need
18460 : to bind locally. In order to preserve some ABI compatibility with
18461 : those compilers, ensure we don't decrease alignment from what we
18462 : used to assume. */
18463 :
18464 12074721 : unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18465 :
18466 : /* A data structure, equal or greater than the size of a cache line
18467 : (64 bytes in the Pentium 4 and other recent Intel processors, including
18468 : processors based on Intel Core microarchitecture) should be aligned
18469 : so that its base address is a multiple of a cache line size. */
18470 :
18471 24149442 : unsigned int max_align
18472 12074721 : = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18473 :
18474 14656891 : if (max_align < BITS_PER_WORD)
18475 0 : max_align = BITS_PER_WORD;
18476 :
18477 12074721 : switch (ix86_align_data_type)
18478 : {
18479 12074721 : case ix86_align_data_type_abi: opt = false; break;
18480 12074701 : case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18481 : case ix86_align_data_type_cacheline: break;
18482 : }
18483 :
18484 12074721 : if (TARGET_IAMCU)
18485 0 : align = iamcu_alignment (type, align);
18486 :
18487 12074721 : if (opt
18488 5771200 : && AGGREGATE_TYPE_P (type)
18489 3698404 : && TYPE_SIZE (type)
18490 15773073 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18491 : {
18492 6696720 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18493 3698352 : && align < max_align_compat)
18494 699984 : align = max_align_compat;
18495 7334439 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18496 3698352 : && align < max_align)
18497 62265 : align = max_align;
18498 : }
18499 :
18500 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18501 : to 16byte boundary. */
18502 12074721 : if (TARGET_64BIT)
18503 : {
18504 5011726 : if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18505 3246882 : && TYPE_SIZE (type)
18506 3246820 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18507 10920323 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18508 11530694 : && align < 128)
18509 610371 : return 128;
18510 : }
18511 :
18512 11464350 : if (!opt)
18513 6109154 : return align;
18514 :
18515 5355196 : if (TREE_CODE (type) == ARRAY_TYPE)
18516 : {
18517 1098081 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18518 : return 64;
18519 1098081 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18520 : return 128;
18521 : }
18522 4257115 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18523 : {
18524 :
18525 12945 : if (TYPE_MODE (type) == DCmode && align < 64)
18526 : return 64;
18527 12945 : if ((TYPE_MODE (type) == XCmode
18528 12945 : || TYPE_MODE (type) == TCmode) && align < 128)
18529 : return 128;
18530 : }
18531 4244170 : else if (RECORD_OR_UNION_TYPE_P (type)
18532 4244170 : && TYPE_FIELDS (type))
18533 : {
18534 2182578 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18535 : return 64;
18536 2182578 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18537 : return 128;
18538 : }
18539 2061592 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18540 : || TREE_CODE (type) == INTEGER_TYPE)
18541 : {
18542 1915741 : if (TYPE_MODE (type) == DFmode && align < 64)
18543 : return 64;
18544 1915741 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18545 : return 128;
18546 : }
18547 :
18548 5355083 : return align;
18549 : }
18550 :
18551 : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18552 : static void
18553 38520498 : ix86_lower_local_decl_alignment (tree decl)
18554 : {
18555 38520498 : unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18556 38520498 : DECL_ALIGN (decl), true);
18557 38520498 : if (new_align < DECL_ALIGN (decl))
18558 0 : SET_DECL_ALIGN (decl, new_align);
18559 38520498 : }
18560 :
18561 : /* Compute the alignment for a local variable or a stack slot. EXP is
18562 : the data type or decl itself, MODE is the widest mode available and
18563 : ALIGN is the alignment that the object would ordinarily have. The
18564 : value of this macro is used instead of that alignment to align the
18565 : object. */
18566 :
18567 : unsigned int
18568 55806952 : ix86_local_alignment (tree exp, machine_mode mode,
18569 : unsigned int align, bool may_lower)
18570 : {
18571 55806952 : tree type, decl;
18572 :
18573 55806952 : if (exp && DECL_P (exp))
18574 : {
18575 53650275 : type = TREE_TYPE (exp);
18576 53650275 : decl = exp;
18577 : }
18578 : else
18579 : {
18580 : type = exp;
18581 : decl = NULL;
18582 : }
18583 :
18584 : /* Don't do dynamic stack realignment for long long objects with
18585 : -mpreferred-stack-boundary=2. */
18586 55806952 : if (may_lower
18587 38520498 : && !TARGET_64BIT
18588 245560 : && align == 64
18589 38825 : && ix86_preferred_stack_boundary < 64
18590 0 : && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18591 0 : && (!type || (!TYPE_USER_ALIGN (type)
18592 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18593 55806952 : && (!decl || !DECL_USER_ALIGN (decl)))
18594 : align = 32;
18595 :
18596 : /* If TYPE is NULL, we are allocating a stack slot for caller-save
18597 : register in MODE. We will return the largest alignment of XF
18598 : and DF. */
18599 55806952 : if (!type)
18600 : {
18601 1420453 : if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18602 1467 : align = GET_MODE_ALIGNMENT (DFmode);
18603 1420453 : return align;
18604 : }
18605 :
18606 : /* Don't increase alignment for Intel MCU psABI. */
18607 54386499 : if (TARGET_IAMCU)
18608 : return align;
18609 :
18610 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18611 : to 16byte boundary. Exact wording is:
18612 :
18613 : An array uses the same alignment as its elements, except that a local or
18614 : global array variable of length at least 16 bytes or
18615 : a C99 variable-length array variable always has alignment of at least 16 bytes.
18616 :
18617 : This was added to allow use of aligned SSE instructions at arrays. This
18618 : rule is meant for static storage (where compiler cannot do the analysis
18619 : by itself). We follow it for automatic variables only when convenient.
18620 : We fully control everything in the function compiled and functions from
18621 : other unit cannot rely on the alignment.
18622 :
18623 : Exclude va_list type. It is the common case of local array where
18624 : we cannot benefit from the alignment.
18625 :
18626 : TODO: Probably one should optimize for size only when var is not escaping. */
18627 51566365 : if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18628 105605700 : && TARGET_SSE)
18629 : {
18630 51179614 : if (AGGREGATE_TYPE_P (type)
18631 11683936 : && (va_list_type_node == NULL_TREE
18632 11683936 : || (TYPE_MAIN_VARIANT (type)
18633 11683936 : != TYPE_MAIN_VARIANT (va_list_type_node)))
18634 11585687 : && TYPE_SIZE (type)
18635 11585687 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18636 52232389 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18637 59657885 : && align < 128)
18638 7425496 : return 128;
18639 : }
18640 46961003 : if (TREE_CODE (type) == ARRAY_TYPE)
18641 : {
18642 846009 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18643 : return 64;
18644 846009 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18645 : return 128;
18646 : }
18647 46114994 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18648 : {
18649 154219 : if (TYPE_MODE (type) == DCmode && align < 64)
18650 : return 64;
18651 154219 : if ((TYPE_MODE (type) == XCmode
18652 154219 : || TYPE_MODE (type) == TCmode) && align < 128)
18653 : return 128;
18654 : }
18655 45960775 : else if (RECORD_OR_UNION_TYPE_P (type)
18656 45960775 : && TYPE_FIELDS (type))
18657 : {
18658 5376292 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18659 : return 64;
18660 5373187 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18661 : return 128;
18662 : }
18663 40584483 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18664 : || TREE_CODE (type) == INTEGER_TYPE)
18665 : {
18666 :
18667 31802982 : if (TYPE_MODE (type) == DFmode && align < 64)
18668 : return 64;
18669 31802982 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18670 : return 128;
18671 : }
18672 : return align;
18673 : }
18674 :
18675 : /* Compute the minimum required alignment for dynamic stack realignment
18676 : purposes for a local variable, parameter or a stack slot. EXP is
18677 : the data type or decl itself, MODE is its mode and ALIGN is the
18678 : alignment that the object would ordinarily have. */
18679 :
18680 : unsigned int
18681 47783925 : ix86_minimum_alignment (tree exp, machine_mode mode,
18682 : unsigned int align)
18683 : {
18684 47783925 : tree type, decl;
18685 :
18686 47783925 : if (exp && DECL_P (exp))
18687 : {
18688 14909607 : type = TREE_TYPE (exp);
18689 14909607 : decl = exp;
18690 : }
18691 : else
18692 : {
18693 : type = exp;
18694 : decl = NULL;
18695 : }
18696 :
18697 47783925 : if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18698 : return align;
18699 :
18700 : /* Don't do dynamic stack realignment for long long objects with
18701 : -mpreferred-stack-boundary=2. */
18702 0 : if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18703 0 : && (!type || (!TYPE_USER_ALIGN (type)
18704 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18705 0 : && (!decl || !DECL_USER_ALIGN (decl)))
18706 : {
18707 0 : gcc_checking_assert (!TARGET_STV);
18708 : return 32;
18709 : }
18710 :
18711 : return align;
18712 : }
18713 :
18714 : /* Find a location for the static chain incoming to a nested function.
18715 : This is a register, unless all free registers are used by arguments. */
18716 :
18717 : static rtx
18718 269232 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18719 : {
18720 269232 : unsigned regno;
18721 :
18722 269232 : if (TARGET_64BIT)
18723 : {
18724 : /* We always use R10 in 64-bit mode. */
18725 : regno = R10_REG;
18726 : }
18727 : else
18728 : {
18729 88535 : const_tree fntype, fndecl;
18730 88535 : unsigned int ccvt;
18731 :
18732 : /* By default in 32-bit mode we use ECX to pass the static chain. */
18733 88535 : regno = CX_REG;
18734 :
18735 88535 : if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18736 : {
18737 78559 : fntype = TREE_TYPE (fndecl_or_type);
18738 78559 : fndecl = fndecl_or_type;
18739 : }
18740 : else
18741 : {
18742 : fntype = fndecl_or_type;
18743 : fndecl = NULL;
18744 : }
18745 :
18746 88535 : ccvt = ix86_get_callcvt (fntype);
18747 88535 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18748 : {
18749 : /* Fastcall functions use ecx/edx for arguments, which leaves
18750 : us with EAX for the static chain.
18751 : Thiscall functions use ecx for arguments, which also
18752 : leaves us with EAX for the static chain. */
18753 : regno = AX_REG;
18754 : }
18755 88535 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18756 : {
18757 : /* Thiscall functions use ecx for arguments, which leaves
18758 : us with EAX and EDX for the static chain.
18759 : We are using for abi-compatibility EAX. */
18760 : regno = AX_REG;
18761 : }
18762 88535 : else if (ix86_function_regparm (fntype, fndecl) == 3)
18763 : {
18764 : /* For regparm 3, we have no free call-clobbered registers in
18765 : which to store the static chain. In order to implement this,
18766 : we have the trampoline push the static chain to the stack.
18767 : However, we can't push a value below the return address when
18768 : we call the nested function directly, so we have to use an
18769 : alternate entry point. For this we use ESI, and have the
18770 : alternate entry point push ESI, so that things appear the
18771 : same once we're executing the nested function. */
18772 0 : if (incoming_p)
18773 : {
18774 0 : if (fndecl == current_function_decl
18775 0 : && !ix86_static_chain_on_stack)
18776 : {
18777 0 : gcc_assert (!reload_completed);
18778 0 : ix86_static_chain_on_stack = true;
18779 : }
18780 0 : return gen_frame_mem (SImode,
18781 0 : plus_constant (Pmode,
18782 : arg_pointer_rtx, -8));
18783 : }
18784 : regno = SI_REG;
18785 : }
18786 : }
18787 :
18788 357780 : return gen_rtx_REG (Pmode, regno);
18789 : }
18790 :
18791 : /* Emit RTL insns to initialize the variable parts of a trampoline.
18792 : FNDECL is the decl of the target address; M_TRAMP is a MEM for
18793 : the trampoline, and CHAIN_VALUE is an RTX for the static chain
18794 : to be passed to the target function. */
18795 :
18796 : static void
18797 295 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18798 : {
18799 295 : rtx mem, fnaddr;
18800 295 : int opcode;
18801 295 : int offset = 0;
18802 295 : bool need_endbr = (flag_cf_protection & CF_BRANCH);
18803 :
18804 295 : fnaddr = XEXP (DECL_RTL (fndecl), 0);
18805 :
18806 295 : if (TARGET_64BIT)
18807 : {
18808 295 : int size;
18809 :
18810 295 : if (need_endbr)
18811 : {
18812 : /* Insert ENDBR64. */
18813 1 : mem = adjust_address (m_tramp, SImode, offset);
18814 1 : emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18815 1 : offset += 4;
18816 : }
18817 :
18818 : /* Load the function address to r11. Try to load address using
18819 : the shorter movl instead of movabs. We may want to support
18820 : movq for kernel mode, but kernel does not use trampolines at
18821 : the moment. FNADDR is a 32bit address and may not be in
18822 : DImode when ptr_mode == SImode. Always use movl in this
18823 : case. */
18824 295 : if (ptr_mode == SImode
18825 295 : || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18826 : {
18827 263 : fnaddr = copy_addr_to_reg (fnaddr);
18828 :
18829 263 : mem = adjust_address (m_tramp, HImode, offset);
18830 263 : emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18831 :
18832 263 : mem = adjust_address (m_tramp, SImode, offset + 2);
18833 263 : emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18834 263 : offset += 6;
18835 : }
18836 : else
18837 : {
18838 32 : mem = adjust_address (m_tramp, HImode, offset);
18839 32 : emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18840 :
18841 32 : mem = adjust_address (m_tramp, DImode, offset + 2);
18842 32 : emit_move_insn (mem, fnaddr);
18843 32 : offset += 10;
18844 : }
18845 :
18846 : /* Load static chain using movabs to r10. Use the shorter movl
18847 : instead of movabs when ptr_mode == SImode. */
18848 295 : if (ptr_mode == SImode)
18849 : {
18850 : opcode = 0xba41;
18851 : size = 6;
18852 : }
18853 : else
18854 : {
18855 295 : opcode = 0xba49;
18856 295 : size = 10;
18857 : }
18858 :
18859 295 : mem = adjust_address (m_tramp, HImode, offset);
18860 295 : emit_move_insn (mem, gen_int_mode (opcode, HImode));
18861 :
18862 295 : mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18863 295 : emit_move_insn (mem, chain_value);
18864 295 : offset += size;
18865 :
18866 : /* Jump to r11; the last (unused) byte is a nop, only there to
18867 : pad the write out to a single 32-bit store. */
18868 295 : mem = adjust_address (m_tramp, SImode, offset);
18869 295 : emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18870 295 : offset += 4;
18871 : }
18872 : else
18873 : {
18874 0 : rtx disp, chain;
18875 :
18876 : /* Depending on the static chain location, either load a register
18877 : with a constant, or push the constant to the stack. All of the
18878 : instructions are the same size. */
18879 0 : chain = ix86_static_chain (fndecl, true);
18880 0 : if (REG_P (chain))
18881 : {
18882 0 : switch (REGNO (chain))
18883 : {
18884 : case AX_REG:
18885 : opcode = 0xb8; break;
18886 0 : case CX_REG:
18887 0 : opcode = 0xb9; break;
18888 0 : default:
18889 0 : gcc_unreachable ();
18890 : }
18891 : }
18892 : else
18893 : opcode = 0x68;
18894 :
18895 0 : if (need_endbr)
18896 : {
18897 : /* Insert ENDBR32. */
18898 0 : mem = adjust_address (m_tramp, SImode, offset);
18899 0 : emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18900 0 : offset += 4;
18901 : }
18902 :
18903 0 : mem = adjust_address (m_tramp, QImode, offset);
18904 0 : emit_move_insn (mem, gen_int_mode (opcode, QImode));
18905 :
18906 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
18907 0 : emit_move_insn (mem, chain_value);
18908 0 : offset += 5;
18909 :
18910 0 : mem = adjust_address (m_tramp, QImode, offset);
18911 0 : emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18912 :
18913 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
18914 :
18915 : /* Compute offset from the end of the jmp to the target function.
18916 : In the case in which the trampoline stores the static chain on
18917 : the stack, we need to skip the first insn which pushes the
18918 : (call-saved) register static chain; this push is 1 byte. */
18919 0 : offset += 5;
18920 0 : int skip = MEM_P (chain) ? 1 : 0;
18921 : /* Skip ENDBR32 at the entry of the target function. */
18922 0 : if (need_endbr
18923 0 : && !cgraph_node::get (fndecl)->only_called_directly_p ())
18924 0 : skip += 4;
18925 0 : disp = expand_binop (SImode, sub_optab, fnaddr,
18926 0 : plus_constant (Pmode, XEXP (m_tramp, 0),
18927 0 : offset - skip),
18928 : NULL_RTX, 1, OPTAB_DIRECT);
18929 0 : emit_move_insn (mem, disp);
18930 : }
18931 :
18932 295 : gcc_assert (offset <= TRAMPOLINE_SIZE);
18933 :
18934 : #ifdef HAVE_ENABLE_EXECUTE_STACK
18935 : #ifdef CHECK_EXECUTE_STACK_ENABLED
18936 : if (CHECK_EXECUTE_STACK_ENABLED)
18937 : #endif
18938 : emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18939 : LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18940 : #endif
18941 295 : }
18942 :
18943 : static bool
18944 54025244 : ix86_allocate_stack_slots_for_args (void)
18945 : {
18946 : /* Naked functions should not allocate stack slots for arguments. */
18947 54025244 : return !ix86_function_naked (current_function_decl);
18948 : }
18949 :
18950 : static bool
18951 42198375 : ix86_warn_func_return (tree decl)
18952 : {
18953 : /* Naked functions are implemented entirely in assembly, including the
18954 : return sequence, so suppress warnings about this. */
18955 42198375 : return !ix86_function_naked (decl);
18956 : }
18957 :
18958 : /* Return the shift count of a vector by scalar shift builtin second argument
18959 : ARG1. */
18960 : static tree
18961 14142 : ix86_vector_shift_count (tree arg1)
18962 : {
18963 14142 : if (tree_fits_uhwi_p (arg1))
18964 : return arg1;
18965 8316 : else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18966 : {
18967 : /* The count argument is weird, passed in as various 128-bit
18968 : (or 64-bit) vectors, the low 64 bits from it are the count. */
18969 162 : unsigned char buf[16];
18970 162 : int len = native_encode_expr (arg1, buf, 16);
18971 162 : if (len == 0)
18972 162 : return NULL_TREE;
18973 162 : tree t = native_interpret_expr (uint64_type_node, buf, len);
18974 162 : if (t && tree_fits_uhwi_p (t))
18975 : return t;
18976 : }
18977 : return NULL_TREE;
18978 : }
18979 :
18980 : /* Return true if arg_mask is all ones, ELEMS is elements number of
18981 : corresponding vector. */
18982 : static bool
18983 25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18984 : {
18985 25042 : if (TREE_CODE (arg_mask) != INTEGER_CST)
18986 : return false;
18987 :
18988 7462 : unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18989 7462 : if (elems == HOST_BITS_PER_WIDE_INT)
18990 33 : return mask == HOST_WIDE_INT_M1U;
18991 7429 : if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18992 2681 : return false;
18993 :
18994 : return true;
18995 : }
18996 :
18997 : static tree
18998 67970077 : ix86_fold_builtin (tree fndecl, int n_args,
18999 : tree *args, bool ignore ATTRIBUTE_UNUSED)
19000 : {
19001 67970077 : if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
19002 : {
19003 67970077 : enum ix86_builtins fn_code
19004 67970077 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19005 67970077 : enum rtx_code rcode;
19006 67970077 : bool is_vshift;
19007 67970077 : enum tree_code tcode;
19008 67970077 : bool is_scalar;
19009 67970077 : unsigned HOST_WIDE_INT mask;
19010 :
19011 67970077 : switch (fn_code)
19012 : {
19013 8746 : case IX86_BUILTIN_CPU_IS:
19014 8746 : case IX86_BUILTIN_CPU_SUPPORTS:
19015 8746 : gcc_assert (n_args == 1);
19016 8746 : return fold_builtin_cpu (fndecl, args);
19017 :
19018 24315 : case IX86_BUILTIN_NANQ:
19019 24315 : case IX86_BUILTIN_NANSQ:
19020 24315 : {
19021 24315 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19022 24315 : const char *str = c_getstr (*args);
19023 24315 : int quiet = fn_code == IX86_BUILTIN_NANQ;
19024 24315 : REAL_VALUE_TYPE real;
19025 :
19026 24315 : if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
19027 24315 : return build_real (type, real);
19028 0 : return NULL_TREE;
19029 : }
19030 :
19031 108 : case IX86_BUILTIN_INFQ:
19032 108 : case IX86_BUILTIN_HUGE_VALQ:
19033 108 : {
19034 108 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19035 108 : REAL_VALUE_TYPE inf;
19036 108 : real_inf (&inf);
19037 108 : return build_real (type, inf);
19038 : }
19039 :
19040 62297 : case IX86_BUILTIN_TZCNT16:
19041 62297 : case IX86_BUILTIN_CTZS:
19042 62297 : case IX86_BUILTIN_TZCNT32:
19043 62297 : case IX86_BUILTIN_TZCNT64:
19044 62297 : gcc_assert (n_args == 1);
19045 62297 : if (TREE_CODE (args[0]) == INTEGER_CST)
19046 : {
19047 45 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19048 45 : tree arg = args[0];
19049 45 : if (fn_code == IX86_BUILTIN_TZCNT16
19050 45 : || fn_code == IX86_BUILTIN_CTZS)
19051 3 : arg = fold_convert (short_unsigned_type_node, arg);
19052 45 : if (integer_zerop (arg))
19053 6 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19054 : else
19055 39 : return fold_const_call (CFN_CTZ, type, arg);
19056 : }
19057 : break;
19058 :
19059 51869 : case IX86_BUILTIN_LZCNT16:
19060 51869 : case IX86_BUILTIN_CLZS:
19061 51869 : case IX86_BUILTIN_LZCNT32:
19062 51869 : case IX86_BUILTIN_LZCNT64:
19063 51869 : gcc_assert (n_args == 1);
19064 51869 : if (TREE_CODE (args[0]) == INTEGER_CST)
19065 : {
19066 54 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19067 54 : tree arg = args[0];
19068 54 : if (fn_code == IX86_BUILTIN_LZCNT16
19069 54 : || fn_code == IX86_BUILTIN_CLZS)
19070 18 : arg = fold_convert (short_unsigned_type_node, arg);
19071 54 : if (integer_zerop (arg))
19072 3 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19073 : else
19074 51 : return fold_const_call (CFN_CLZ, type, arg);
19075 : }
19076 : break;
19077 :
19078 61081 : case IX86_BUILTIN_BEXTR32:
19079 61081 : case IX86_BUILTIN_BEXTR64:
19080 61081 : case IX86_BUILTIN_BEXTRI32:
19081 61081 : case IX86_BUILTIN_BEXTRI64:
19082 61081 : gcc_assert (n_args == 2);
19083 61081 : if (tree_fits_uhwi_p (args[1]))
19084 : {
19085 152 : unsigned HOST_WIDE_INT res = 0;
19086 152 : unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
19087 152 : unsigned int start = tree_to_uhwi (args[1]);
19088 152 : unsigned int len = (start & 0xff00) >> 8;
19089 152 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19090 152 : start &= 0xff;
19091 152 : if (start >= prec || len == 0)
19092 111 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19093 : args[0]);
19094 41 : else if (!tree_fits_uhwi_p (args[0]))
19095 : break;
19096 : else
19097 24 : res = tree_to_uhwi (args[0]) >> start;
19098 24 : if (len > prec)
19099 : len = prec;
19100 24 : if (len < HOST_BITS_PER_WIDE_INT)
19101 15 : res &= (HOST_WIDE_INT_1U << len) - 1;
19102 24 : return build_int_cstu (lhs_type, res);
19103 : }
19104 : break;
19105 :
19106 20984 : case IX86_BUILTIN_BZHI32:
19107 20984 : case IX86_BUILTIN_BZHI64:
19108 20984 : gcc_assert (n_args == 2);
19109 20984 : if (tree_fits_uhwi_p (args[1]))
19110 : {
19111 190 : unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
19112 190 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19113 190 : if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
19114 : return args[0];
19115 190 : if (idx == 0)
19116 52 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19117 : args[0]);
19118 138 : if (!tree_fits_uhwi_p (args[0]))
19119 : break;
19120 12 : unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
19121 12 : res &= ~(HOST_WIDE_INT_M1U << idx);
19122 12 : return build_int_cstu (lhs_type, res);
19123 : }
19124 : break;
19125 :
19126 20742 : case IX86_BUILTIN_PDEP32:
19127 20742 : case IX86_BUILTIN_PDEP64:
19128 20742 : gcc_assert (n_args == 2);
19129 20742 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19130 : {
19131 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19132 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19133 46 : unsigned HOST_WIDE_INT res = 0;
19134 46 : unsigned HOST_WIDE_INT m, k = 1;
19135 2990 : for (m = 1; m; m <<= 1)
19136 2944 : if ((mask & m) != 0)
19137 : {
19138 1440 : if ((src & k) != 0)
19139 789 : res |= m;
19140 1440 : k <<= 1;
19141 : }
19142 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19143 : }
19144 : break;
19145 :
19146 20744 : case IX86_BUILTIN_PEXT32:
19147 20744 : case IX86_BUILTIN_PEXT64:
19148 20744 : gcc_assert (n_args == 2);
19149 20744 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19150 : {
19151 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19152 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19153 46 : unsigned HOST_WIDE_INT res = 0;
19154 46 : unsigned HOST_WIDE_INT m, k = 1;
19155 2990 : for (m = 1; m; m <<= 1)
19156 2944 : if ((mask & m) != 0)
19157 : {
19158 2016 : if ((src & m) != 0)
19159 1063 : res |= k;
19160 2016 : k <<= 1;
19161 : }
19162 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19163 : }
19164 : break;
19165 :
19166 81068 : case IX86_BUILTIN_MOVMSKPS:
19167 81068 : case IX86_BUILTIN_PMOVMSKB:
19168 81068 : case IX86_BUILTIN_MOVMSKPD:
19169 81068 : case IX86_BUILTIN_PMOVMSKB128:
19170 81068 : case IX86_BUILTIN_MOVMSKPD256:
19171 81068 : case IX86_BUILTIN_MOVMSKPS256:
19172 81068 : case IX86_BUILTIN_PMOVMSKB256:
19173 81068 : gcc_assert (n_args == 1);
19174 81068 : if (TREE_CODE (args[0]) == VECTOR_CST)
19175 : {
19176 : HOST_WIDE_INT res = 0;
19177 139 : for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
19178 : {
19179 124 : tree e = VECTOR_CST_ELT (args[0], i);
19180 124 : if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
19181 : {
19182 80 : if (wi::neg_p (wi::to_wide (e)))
19183 31 : res |= HOST_WIDE_INT_1 << i;
19184 : }
19185 44 : else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
19186 : {
19187 44 : if (TREE_REAL_CST (e).sign)
19188 19 : res |= HOST_WIDE_INT_1 << i;
19189 : }
19190 : else
19191 : return NULL_TREE;
19192 : }
19193 15 : return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
19194 : }
19195 : break;
19196 :
19197 658268 : case IX86_BUILTIN_PSLLD:
19198 658268 : case IX86_BUILTIN_PSLLD128:
19199 658268 : case IX86_BUILTIN_PSLLD128_MASK:
19200 658268 : case IX86_BUILTIN_PSLLD256:
19201 658268 : case IX86_BUILTIN_PSLLD256_MASK:
19202 658268 : case IX86_BUILTIN_PSLLD512:
19203 658268 : case IX86_BUILTIN_PSLLDI:
19204 658268 : case IX86_BUILTIN_PSLLDI128:
19205 658268 : case IX86_BUILTIN_PSLLDI128_MASK:
19206 658268 : case IX86_BUILTIN_PSLLDI256:
19207 658268 : case IX86_BUILTIN_PSLLDI256_MASK:
19208 658268 : case IX86_BUILTIN_PSLLDI512:
19209 658268 : case IX86_BUILTIN_PSLLQ:
19210 658268 : case IX86_BUILTIN_PSLLQ128:
19211 658268 : case IX86_BUILTIN_PSLLQ128_MASK:
19212 658268 : case IX86_BUILTIN_PSLLQ256:
19213 658268 : case IX86_BUILTIN_PSLLQ256_MASK:
19214 658268 : case IX86_BUILTIN_PSLLQ512:
19215 658268 : case IX86_BUILTIN_PSLLQI:
19216 658268 : case IX86_BUILTIN_PSLLQI128:
19217 658268 : case IX86_BUILTIN_PSLLQI128_MASK:
19218 658268 : case IX86_BUILTIN_PSLLQI256:
19219 658268 : case IX86_BUILTIN_PSLLQI256_MASK:
19220 658268 : case IX86_BUILTIN_PSLLQI512:
19221 658268 : case IX86_BUILTIN_PSLLW:
19222 658268 : case IX86_BUILTIN_PSLLW128:
19223 658268 : case IX86_BUILTIN_PSLLW128_MASK:
19224 658268 : case IX86_BUILTIN_PSLLW256:
19225 658268 : case IX86_BUILTIN_PSLLW256_MASK:
19226 658268 : case IX86_BUILTIN_PSLLW512_MASK:
19227 658268 : case IX86_BUILTIN_PSLLWI:
19228 658268 : case IX86_BUILTIN_PSLLWI128:
19229 658268 : case IX86_BUILTIN_PSLLWI128_MASK:
19230 658268 : case IX86_BUILTIN_PSLLWI256:
19231 658268 : case IX86_BUILTIN_PSLLWI256_MASK:
19232 658268 : case IX86_BUILTIN_PSLLWI512_MASK:
19233 658268 : rcode = ASHIFT;
19234 658268 : is_vshift = false;
19235 658268 : goto do_shift;
19236 599991 : case IX86_BUILTIN_PSRAD:
19237 599991 : case IX86_BUILTIN_PSRAD128:
19238 599991 : case IX86_BUILTIN_PSRAD128_MASK:
19239 599991 : case IX86_BUILTIN_PSRAD256:
19240 599991 : case IX86_BUILTIN_PSRAD256_MASK:
19241 599991 : case IX86_BUILTIN_PSRAD512:
19242 599991 : case IX86_BUILTIN_PSRADI:
19243 599991 : case IX86_BUILTIN_PSRADI128:
19244 599991 : case IX86_BUILTIN_PSRADI128_MASK:
19245 599991 : case IX86_BUILTIN_PSRADI256:
19246 599991 : case IX86_BUILTIN_PSRADI256_MASK:
19247 599991 : case IX86_BUILTIN_PSRADI512:
19248 599991 : case IX86_BUILTIN_PSRAQ128_MASK:
19249 599991 : case IX86_BUILTIN_PSRAQ256_MASK:
19250 599991 : case IX86_BUILTIN_PSRAQ512:
19251 599991 : case IX86_BUILTIN_PSRAQI128_MASK:
19252 599991 : case IX86_BUILTIN_PSRAQI256_MASK:
19253 599991 : case IX86_BUILTIN_PSRAQI512:
19254 599991 : case IX86_BUILTIN_PSRAW:
19255 599991 : case IX86_BUILTIN_PSRAW128:
19256 599991 : case IX86_BUILTIN_PSRAW128_MASK:
19257 599991 : case IX86_BUILTIN_PSRAW256:
19258 599991 : case IX86_BUILTIN_PSRAW256_MASK:
19259 599991 : case IX86_BUILTIN_PSRAW512:
19260 599991 : case IX86_BUILTIN_PSRAWI:
19261 599991 : case IX86_BUILTIN_PSRAWI128:
19262 599991 : case IX86_BUILTIN_PSRAWI128_MASK:
19263 599991 : case IX86_BUILTIN_PSRAWI256:
19264 599991 : case IX86_BUILTIN_PSRAWI256_MASK:
19265 599991 : case IX86_BUILTIN_PSRAWI512:
19266 599991 : rcode = ASHIFTRT;
19267 599991 : is_vshift = false;
19268 599991 : goto do_shift;
19269 632213 : case IX86_BUILTIN_PSRLD:
19270 632213 : case IX86_BUILTIN_PSRLD128:
19271 632213 : case IX86_BUILTIN_PSRLD128_MASK:
19272 632213 : case IX86_BUILTIN_PSRLD256:
19273 632213 : case IX86_BUILTIN_PSRLD256_MASK:
19274 632213 : case IX86_BUILTIN_PSRLD512:
19275 632213 : case IX86_BUILTIN_PSRLDI:
19276 632213 : case IX86_BUILTIN_PSRLDI128:
19277 632213 : case IX86_BUILTIN_PSRLDI128_MASK:
19278 632213 : case IX86_BUILTIN_PSRLDI256:
19279 632213 : case IX86_BUILTIN_PSRLDI256_MASK:
19280 632213 : case IX86_BUILTIN_PSRLDI512:
19281 632213 : case IX86_BUILTIN_PSRLQ:
19282 632213 : case IX86_BUILTIN_PSRLQ128:
19283 632213 : case IX86_BUILTIN_PSRLQ128_MASK:
19284 632213 : case IX86_BUILTIN_PSRLQ256:
19285 632213 : case IX86_BUILTIN_PSRLQ256_MASK:
19286 632213 : case IX86_BUILTIN_PSRLQ512:
19287 632213 : case IX86_BUILTIN_PSRLQI:
19288 632213 : case IX86_BUILTIN_PSRLQI128:
19289 632213 : case IX86_BUILTIN_PSRLQI128_MASK:
19290 632213 : case IX86_BUILTIN_PSRLQI256:
19291 632213 : case IX86_BUILTIN_PSRLQI256_MASK:
19292 632213 : case IX86_BUILTIN_PSRLQI512:
19293 632213 : case IX86_BUILTIN_PSRLW:
19294 632213 : case IX86_BUILTIN_PSRLW128:
19295 632213 : case IX86_BUILTIN_PSRLW128_MASK:
19296 632213 : case IX86_BUILTIN_PSRLW256:
19297 632213 : case IX86_BUILTIN_PSRLW256_MASK:
19298 632213 : case IX86_BUILTIN_PSRLW512:
19299 632213 : case IX86_BUILTIN_PSRLWI:
19300 632213 : case IX86_BUILTIN_PSRLWI128:
19301 632213 : case IX86_BUILTIN_PSRLWI128_MASK:
19302 632213 : case IX86_BUILTIN_PSRLWI256:
19303 632213 : case IX86_BUILTIN_PSRLWI256_MASK:
19304 632213 : case IX86_BUILTIN_PSRLWI512:
19305 632213 : rcode = LSHIFTRT;
19306 632213 : is_vshift = false;
19307 632213 : goto do_shift;
19308 275402 : case IX86_BUILTIN_PSLLVV16HI:
19309 275402 : case IX86_BUILTIN_PSLLVV16SI:
19310 275402 : case IX86_BUILTIN_PSLLVV2DI:
19311 275402 : case IX86_BUILTIN_PSLLVV2DI_MASK:
19312 275402 : case IX86_BUILTIN_PSLLVV32HI:
19313 275402 : case IX86_BUILTIN_PSLLVV4DI:
19314 275402 : case IX86_BUILTIN_PSLLVV4DI_MASK:
19315 275402 : case IX86_BUILTIN_PSLLVV4SI:
19316 275402 : case IX86_BUILTIN_PSLLVV4SI_MASK:
19317 275402 : case IX86_BUILTIN_PSLLVV8DI:
19318 275402 : case IX86_BUILTIN_PSLLVV8HI:
19319 275402 : case IX86_BUILTIN_PSLLVV8SI:
19320 275402 : case IX86_BUILTIN_PSLLVV8SI_MASK:
19321 275402 : rcode = ASHIFT;
19322 275402 : is_vshift = true;
19323 275402 : goto do_shift;
19324 274983 : case IX86_BUILTIN_PSRAVQ128:
19325 274983 : case IX86_BUILTIN_PSRAVQ256:
19326 274983 : case IX86_BUILTIN_PSRAVV16HI:
19327 274983 : case IX86_BUILTIN_PSRAVV16SI:
19328 274983 : case IX86_BUILTIN_PSRAVV32HI:
19329 274983 : case IX86_BUILTIN_PSRAVV4SI:
19330 274983 : case IX86_BUILTIN_PSRAVV4SI_MASK:
19331 274983 : case IX86_BUILTIN_PSRAVV8DI:
19332 274983 : case IX86_BUILTIN_PSRAVV8HI:
19333 274983 : case IX86_BUILTIN_PSRAVV8SI:
19334 274983 : case IX86_BUILTIN_PSRAVV8SI_MASK:
19335 274983 : rcode = ASHIFTRT;
19336 274983 : is_vshift = true;
19337 274983 : goto do_shift;
19338 275393 : case IX86_BUILTIN_PSRLVV16HI:
19339 275393 : case IX86_BUILTIN_PSRLVV16SI:
19340 275393 : case IX86_BUILTIN_PSRLVV2DI:
19341 275393 : case IX86_BUILTIN_PSRLVV2DI_MASK:
19342 275393 : case IX86_BUILTIN_PSRLVV32HI:
19343 275393 : case IX86_BUILTIN_PSRLVV4DI:
19344 275393 : case IX86_BUILTIN_PSRLVV4DI_MASK:
19345 275393 : case IX86_BUILTIN_PSRLVV4SI:
19346 275393 : case IX86_BUILTIN_PSRLVV4SI_MASK:
19347 275393 : case IX86_BUILTIN_PSRLVV8DI:
19348 275393 : case IX86_BUILTIN_PSRLVV8HI:
19349 275393 : case IX86_BUILTIN_PSRLVV8SI:
19350 275393 : case IX86_BUILTIN_PSRLVV8SI_MASK:
19351 275393 : rcode = LSHIFTRT;
19352 275393 : is_vshift = true;
19353 275393 : goto do_shift;
19354 :
19355 2716250 : do_shift:
19356 2716250 : gcc_assert (n_args >= 2);
19357 2716250 : if (TREE_CODE (args[0]) != VECTOR_CST)
19358 : break;
19359 927 : mask = HOST_WIDE_INT_M1U;
19360 927 : if (n_args > 2)
19361 : {
19362 : /* This is masked shift. */
19363 678 : if (!tree_fits_uhwi_p (args[n_args - 1])
19364 678 : || TREE_SIDE_EFFECTS (args[n_args - 2]))
19365 : break;
19366 678 : mask = tree_to_uhwi (args[n_args - 1]);
19367 678 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19368 678 : mask |= HOST_WIDE_INT_M1U << elems;
19369 678 : if (mask != HOST_WIDE_INT_M1U
19370 567 : && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19371 : break;
19372 633 : if (mask == (HOST_WIDE_INT_M1U << elems))
19373 : return args[n_args - 2];
19374 : }
19375 879 : if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19376 : break;
19377 879 : if (tree tem = (is_vshift ? integer_one_node
19378 879 : : ix86_vector_shift_count (args[1])))
19379 : {
19380 558 : unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19381 558 : unsigned HOST_WIDE_INT prec
19382 558 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19383 558 : if (count == 0 && mask == HOST_WIDE_INT_M1U)
19384 : return args[0];
19385 558 : if (count >= prec)
19386 : {
19387 72 : if (rcode == ASHIFTRT)
19388 27 : count = prec - 1;
19389 45 : else if (mask == HOST_WIDE_INT_M1U)
19390 3 : return build_zero_cst (TREE_TYPE (args[0]));
19391 : }
19392 555 : tree countt = NULL_TREE;
19393 555 : if (!is_vshift)
19394 : {
19395 377 : if (count >= prec)
19396 42 : countt = integer_zero_node;
19397 : else
19398 335 : countt = build_int_cst (integer_type_node, count);
19399 : }
19400 555 : tree_vector_builder builder;
19401 555 : if (mask != HOST_WIDE_INT_M1U || is_vshift)
19402 392 : builder.new_vector (TREE_TYPE (args[0]),
19403 784 : TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19404 : 1);
19405 : else
19406 163 : builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
19407 : false);
19408 555 : unsigned int cnt = builder.encoded_nelts ();
19409 5967 : for (unsigned int i = 0; i < cnt; ++i)
19410 : {
19411 5412 : tree elt = VECTOR_CST_ELT (args[0], i);
19412 5412 : if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19413 0 : return NULL_TREE;
19414 5412 : tree type = TREE_TYPE (elt);
19415 5412 : if (rcode == LSHIFTRT)
19416 2040 : elt = fold_convert (unsigned_type_for (type), elt);
19417 5412 : if (is_vshift)
19418 : {
19419 1846 : countt = VECTOR_CST_ELT (args[1], i);
19420 1846 : if (TREE_CODE (countt) != INTEGER_CST
19421 1846 : || TREE_OVERFLOW (countt))
19422 : return NULL_TREE;
19423 1846 : if (wi::neg_p (wi::to_wide (countt))
19424 3610 : || wi::to_widest (countt) >= prec)
19425 : {
19426 325 : if (rcode == ASHIFTRT)
19427 108 : countt = build_int_cst (TREE_TYPE (countt),
19428 108 : prec - 1);
19429 : else
19430 : {
19431 217 : elt = build_zero_cst (TREE_TYPE (elt));
19432 217 : countt = build_zero_cst (TREE_TYPE (countt));
19433 : }
19434 : }
19435 : }
19436 3566 : else if (count >= prec)
19437 504 : elt = build_zero_cst (TREE_TYPE (elt));
19438 8950 : elt = const_binop (rcode == ASHIFT
19439 : ? LSHIFT_EXPR : RSHIFT_EXPR,
19440 5412 : TREE_TYPE (elt), elt, countt);
19441 5412 : if (!elt || TREE_CODE (elt) != INTEGER_CST)
19442 : return NULL_TREE;
19443 5412 : if (rcode == LSHIFTRT)
19444 2040 : elt = fold_convert (type, elt);
19445 5412 : if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19446 : {
19447 1566 : elt = VECTOR_CST_ELT (args[n_args - 2], i);
19448 1566 : if (TREE_CODE (elt) != INTEGER_CST
19449 1566 : || TREE_OVERFLOW (elt))
19450 : return NULL_TREE;
19451 : }
19452 5412 : builder.quick_push (elt);
19453 : }
19454 555 : return builder.build ();
19455 555 : }
19456 : break;
19457 :
19458 32650 : case IX86_BUILTIN_MINSS:
19459 32650 : case IX86_BUILTIN_MINSH_MASK:
19460 32650 : tcode = LT_EXPR;
19461 32650 : is_scalar = true;
19462 32650 : goto do_minmax;
19463 :
19464 32650 : case IX86_BUILTIN_MAXSS:
19465 32650 : case IX86_BUILTIN_MAXSH_MASK:
19466 32650 : tcode = GT_EXPR;
19467 32650 : is_scalar = true;
19468 32650 : goto do_minmax;
19469 :
19470 349807 : case IX86_BUILTIN_MINPS:
19471 349807 : case IX86_BUILTIN_MINPD:
19472 349807 : case IX86_BUILTIN_MINPS256:
19473 349807 : case IX86_BUILTIN_MINPD256:
19474 349807 : case IX86_BUILTIN_MINPS512:
19475 349807 : case IX86_BUILTIN_MINPD512:
19476 349807 : case IX86_BUILTIN_MINPS128_MASK:
19477 349807 : case IX86_BUILTIN_MINPD128_MASK:
19478 349807 : case IX86_BUILTIN_MINPS256_MASK:
19479 349807 : case IX86_BUILTIN_MINPD256_MASK:
19480 349807 : case IX86_BUILTIN_MINPH128_MASK:
19481 349807 : case IX86_BUILTIN_MINPH256_MASK:
19482 349807 : case IX86_BUILTIN_MINPH512_MASK:
19483 349807 : tcode = LT_EXPR;
19484 349807 : is_scalar = false;
19485 349807 : goto do_minmax;
19486 :
19487 : case IX86_BUILTIN_MAXPS:
19488 : case IX86_BUILTIN_MAXPD:
19489 : case IX86_BUILTIN_MAXPS256:
19490 : case IX86_BUILTIN_MAXPD256:
19491 : case IX86_BUILTIN_MAXPS512:
19492 : case IX86_BUILTIN_MAXPD512:
19493 : case IX86_BUILTIN_MAXPS128_MASK:
19494 : case IX86_BUILTIN_MAXPD128_MASK:
19495 : case IX86_BUILTIN_MAXPS256_MASK:
19496 : case IX86_BUILTIN_MAXPD256_MASK:
19497 : case IX86_BUILTIN_MAXPH128_MASK:
19498 : case IX86_BUILTIN_MAXPH256_MASK:
19499 : case IX86_BUILTIN_MAXPH512_MASK:
19500 : tcode = GT_EXPR;
19501 : is_scalar = false;
19502 764934 : do_minmax:
19503 764934 : gcc_assert (n_args >= 2);
19504 764934 : if (TREE_CODE (args[0]) != VECTOR_CST
19505 76 : || TREE_CODE (args[1]) != VECTOR_CST)
19506 : break;
19507 76 : mask = HOST_WIDE_INT_M1U;
19508 76 : if (n_args > 2)
19509 : {
19510 36 : gcc_assert (n_args >= 4);
19511 : /* This is masked minmax. */
19512 36 : if (TREE_CODE (args[3]) != INTEGER_CST
19513 36 : || TREE_SIDE_EFFECTS (args[2]))
19514 : break;
19515 36 : mask = TREE_INT_CST_LOW (args[3]);
19516 36 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19517 36 : mask |= HOST_WIDE_INT_M1U << elems;
19518 36 : if (mask != HOST_WIDE_INT_M1U
19519 32 : && TREE_CODE (args[2]) != VECTOR_CST)
19520 : break;
19521 36 : if (n_args >= 5)
19522 : {
19523 20 : if (!tree_fits_uhwi_p (args[4]))
19524 : break;
19525 20 : if (tree_to_uhwi (args[4]) != 4
19526 0 : && tree_to_uhwi (args[4]) != 8)
19527 : break;
19528 : }
19529 36 : if (mask == (HOST_WIDE_INT_M1U << elems))
19530 : return args[2];
19531 : }
19532 : /* Punt on NaNs, unless exceptions are disabled. */
19533 76 : if (HONOR_NANS (args[0])
19534 76 : && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19535 184 : for (int i = 0; i < 2; ++i)
19536 : {
19537 134 : unsigned count = vector_cst_encoded_nelts (args[i]);
19538 957 : for (unsigned j = 0; j < count; ++j)
19539 849 : if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19540 : return NULL_TREE;
19541 : }
19542 50 : {
19543 50 : tree res = const_binop (tcode,
19544 50 : truth_type_for (TREE_TYPE (args[0])),
19545 : args[0], args[1]);
19546 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19547 : break;
19548 50 : res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19549 : args[0], args[1]);
19550 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19551 : break;
19552 50 : if (mask != HOST_WIDE_INT_M1U)
19553 : {
19554 32 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19555 32 : vec_perm_builder sel (nelts, nelts, 1);
19556 328 : for (unsigned int i = 0; i < nelts; i++)
19557 296 : if (mask & (HOST_WIDE_INT_1U << i))
19558 160 : sel.quick_push (i);
19559 : else
19560 136 : sel.quick_push (nelts + i);
19561 32 : vec_perm_indices indices (sel, 2, nelts);
19562 32 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19563 : indices);
19564 32 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19565 : break;
19566 32 : }
19567 50 : if (is_scalar)
19568 : {
19569 10 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19570 10 : vec_perm_builder sel (nelts, nelts, 1);
19571 10 : sel.quick_push (0);
19572 40 : for (unsigned int i = 1; i < nelts; i++)
19573 30 : sel.quick_push (nelts + i);
19574 10 : vec_perm_indices indices (sel, 2, nelts);
19575 10 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19576 : indices);
19577 10 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19578 : break;
19579 10 : }
19580 50 : return res;
19581 : }
19582 :
19583 : default:
19584 : break;
19585 : }
19586 : }
19587 :
19588 : #ifdef SUBTARGET_FOLD_BUILTIN
19589 : return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19590 : #endif
19591 :
19592 : return NULL_TREE;
19593 : }
19594 :
19595 : /* Fold a MD builtin (use ix86_fold_builtin for folding into
19596 : constant) in GIMPLE. */
19597 :
19598 : bool
19599 1095065 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19600 : {
19601 1095065 : gimple *stmt = gsi_stmt (*gsi), *g;
19602 1095065 : gimple_seq stmts = NULL;
19603 1095065 : tree fndecl = gimple_call_fndecl (stmt);
19604 1095065 : gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19605 1095065 : int n_args = gimple_call_num_args (stmt);
19606 1095065 : enum ix86_builtins fn_code
19607 1095065 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19608 1095065 : tree decl = NULL_TREE;
19609 1095065 : tree arg0, arg1, arg2;
19610 1095065 : enum rtx_code rcode;
19611 1095065 : enum tree_code tcode;
19612 1095065 : unsigned HOST_WIDE_INT count;
19613 1095065 : bool is_vshift;
19614 1095065 : unsigned HOST_WIDE_INT elems;
19615 1095065 : location_t loc;
19616 :
19617 : /* Don't fold when there's isa mismatch. */
19618 1095065 : if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19619 : return false;
19620 :
19621 1094938 : switch (fn_code)
19622 : {
19623 288 : case IX86_BUILTIN_TZCNT32:
19624 288 : decl = builtin_decl_implicit (BUILT_IN_CTZ);
19625 288 : goto fold_tzcnt_lzcnt;
19626 :
19627 237 : case IX86_BUILTIN_TZCNT64:
19628 237 : decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19629 237 : goto fold_tzcnt_lzcnt;
19630 :
19631 215 : case IX86_BUILTIN_LZCNT32:
19632 215 : decl = builtin_decl_implicit (BUILT_IN_CLZ);
19633 215 : goto fold_tzcnt_lzcnt;
19634 :
19635 224 : case IX86_BUILTIN_LZCNT64:
19636 224 : decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19637 224 : goto fold_tzcnt_lzcnt;
19638 :
19639 964 : fold_tzcnt_lzcnt:
19640 964 : gcc_assert (n_args == 1);
19641 964 : arg0 = gimple_call_arg (stmt, 0);
19642 964 : if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19643 : {
19644 799 : int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19645 : /* If arg0 is provably non-zero, optimize into generic
19646 : __builtin_c[tl]z{,ll} function the middle-end handles
19647 : better. */
19648 799 : if (!expr_not_equal_to (arg0, wi::zero (prec)))
19649 : return false;
19650 :
19651 9 : loc = gimple_location (stmt);
19652 9 : g = gimple_build_call (decl, 1, arg0);
19653 9 : gimple_set_location (g, loc);
19654 9 : tree lhs = make_ssa_name (integer_type_node);
19655 9 : gimple_call_set_lhs (g, lhs);
19656 9 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
19657 9 : g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19658 9 : gimple_set_location (g, loc);
19659 9 : gsi_replace (gsi, g, false);
19660 9 : return true;
19661 : }
19662 : break;
19663 :
19664 491 : case IX86_BUILTIN_BZHI32:
19665 491 : case IX86_BUILTIN_BZHI64:
19666 491 : gcc_assert (n_args == 2);
19667 491 : arg1 = gimple_call_arg (stmt, 1);
19668 491 : if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19669 : {
19670 195 : unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19671 195 : arg0 = gimple_call_arg (stmt, 0);
19672 195 : if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19673 : break;
19674 31 : loc = gimple_location (stmt);
19675 31 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19676 31 : gimple_set_location (g, loc);
19677 31 : gsi_replace (gsi, g, false);
19678 31 : return true;
19679 : }
19680 : break;
19681 :
19682 502 : case IX86_BUILTIN_PDEP32:
19683 502 : case IX86_BUILTIN_PDEP64:
19684 502 : case IX86_BUILTIN_PEXT32:
19685 502 : case IX86_BUILTIN_PEXT64:
19686 502 : gcc_assert (n_args == 2);
19687 502 : arg1 = gimple_call_arg (stmt, 1);
19688 502 : if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19689 : {
19690 4 : loc = gimple_location (stmt);
19691 4 : arg0 = gimple_call_arg (stmt, 0);
19692 4 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19693 4 : gimple_set_location (g, loc);
19694 4 : gsi_replace (gsi, g, false);
19695 4 : return true;
19696 : }
19697 : break;
19698 :
19699 145 : case IX86_BUILTIN_PBLENDVB256:
19700 145 : case IX86_BUILTIN_BLENDVPS256:
19701 145 : case IX86_BUILTIN_BLENDVPD256:
19702 : /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19703 : to scalar operations and not combined back. */
19704 145 : if (!TARGET_AVX2)
19705 : break;
19706 :
19707 : /* FALLTHRU. */
19708 112 : case IX86_BUILTIN_BLENDVPD:
19709 : /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19710 : w/o sse4.2, it's veclowered to scalar operations and
19711 : not combined back. */
19712 112 : if (!TARGET_SSE4_2)
19713 : break;
19714 : /* FALLTHRU. */
19715 166 : case IX86_BUILTIN_PBLENDVB128:
19716 166 : case IX86_BUILTIN_BLENDVPS:
19717 166 : gcc_assert (n_args == 3);
19718 166 : arg0 = gimple_call_arg (stmt, 0);
19719 166 : arg1 = gimple_call_arg (stmt, 1);
19720 166 : arg2 = gimple_call_arg (stmt, 2);
19721 166 : if (gimple_call_lhs (stmt))
19722 : {
19723 166 : loc = gimple_location (stmt);
19724 166 : tree type = TREE_TYPE (arg2);
19725 166 : if (VECTOR_FLOAT_TYPE_P (type))
19726 : {
19727 73 : tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19728 73 : ? intSI_type_node : intDI_type_node;
19729 73 : type = get_same_sized_vectype (itype, type);
19730 : }
19731 : else
19732 93 : type = signed_type_for (type);
19733 166 : arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19734 166 : tree zero_vec = build_zero_cst (type);
19735 166 : tree cmp_type = truth_type_for (type);
19736 166 : tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19737 166 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19738 166 : g = gimple_build_assign (gimple_call_lhs (stmt),
19739 : VEC_COND_EXPR, cmp,
19740 : arg1, arg0);
19741 166 : gimple_set_location (g, loc);
19742 166 : gsi_replace (gsi, g, false);
19743 : }
19744 : else
19745 0 : gsi_replace (gsi, gimple_build_nop (), false);
19746 : return true;
19747 :
19748 :
19749 16 : case IX86_BUILTIN_PCMPEQB128:
19750 16 : case IX86_BUILTIN_PCMPEQW128:
19751 16 : case IX86_BUILTIN_PCMPEQD128:
19752 16 : case IX86_BUILTIN_PCMPEQQ:
19753 16 : case IX86_BUILTIN_PCMPEQB256:
19754 16 : case IX86_BUILTIN_PCMPEQW256:
19755 16 : case IX86_BUILTIN_PCMPEQD256:
19756 16 : case IX86_BUILTIN_PCMPEQQ256:
19757 16 : tcode = EQ_EXPR;
19758 16 : goto do_cmp;
19759 :
19760 : case IX86_BUILTIN_PCMPGTB128:
19761 : case IX86_BUILTIN_PCMPGTW128:
19762 : case IX86_BUILTIN_PCMPGTD128:
19763 : case IX86_BUILTIN_PCMPGTQ:
19764 : case IX86_BUILTIN_PCMPGTB256:
19765 : case IX86_BUILTIN_PCMPGTW256:
19766 : case IX86_BUILTIN_PCMPGTD256:
19767 : case IX86_BUILTIN_PCMPGTQ256:
19768 : tcode = GT_EXPR;
19769 :
19770 33 : do_cmp:
19771 33 : gcc_assert (n_args == 2);
19772 33 : arg0 = gimple_call_arg (stmt, 0);
19773 33 : arg1 = gimple_call_arg (stmt, 1);
19774 33 : if (gimple_call_lhs (stmt))
19775 : {
19776 32 : loc = gimple_location (stmt);
19777 32 : tree type = TREE_TYPE (arg0);
19778 32 : tree zero_vec = build_zero_cst (type);
19779 32 : tree minus_one_vec = build_minus_one_cst (type);
19780 32 : tree cmp_type = truth_type_for (type);
19781 32 : tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19782 32 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19783 32 : g = gimple_build_assign (gimple_call_lhs (stmt),
19784 : VEC_COND_EXPR, cmp,
19785 : minus_one_vec, zero_vec);
19786 32 : gimple_set_location (g, loc);
19787 32 : gsi_replace (gsi, g, false);
19788 : }
19789 : else
19790 1 : gsi_replace (gsi, gimple_build_nop (), false);
19791 : return true;
19792 :
19793 9297 : case IX86_BUILTIN_PSLLD:
19794 9297 : case IX86_BUILTIN_PSLLD128:
19795 9297 : case IX86_BUILTIN_PSLLD128_MASK:
19796 9297 : case IX86_BUILTIN_PSLLD256:
19797 9297 : case IX86_BUILTIN_PSLLD256_MASK:
19798 9297 : case IX86_BUILTIN_PSLLD512:
19799 9297 : case IX86_BUILTIN_PSLLDI:
19800 9297 : case IX86_BUILTIN_PSLLDI128:
19801 9297 : case IX86_BUILTIN_PSLLDI128_MASK:
19802 9297 : case IX86_BUILTIN_PSLLDI256:
19803 9297 : case IX86_BUILTIN_PSLLDI256_MASK:
19804 9297 : case IX86_BUILTIN_PSLLDI512:
19805 9297 : case IX86_BUILTIN_PSLLQ:
19806 9297 : case IX86_BUILTIN_PSLLQ128:
19807 9297 : case IX86_BUILTIN_PSLLQ128_MASK:
19808 9297 : case IX86_BUILTIN_PSLLQ256:
19809 9297 : case IX86_BUILTIN_PSLLQ256_MASK:
19810 9297 : case IX86_BUILTIN_PSLLQ512:
19811 9297 : case IX86_BUILTIN_PSLLQI:
19812 9297 : case IX86_BUILTIN_PSLLQI128:
19813 9297 : case IX86_BUILTIN_PSLLQI128_MASK:
19814 9297 : case IX86_BUILTIN_PSLLQI256:
19815 9297 : case IX86_BUILTIN_PSLLQI256_MASK:
19816 9297 : case IX86_BUILTIN_PSLLQI512:
19817 9297 : case IX86_BUILTIN_PSLLW:
19818 9297 : case IX86_BUILTIN_PSLLW128:
19819 9297 : case IX86_BUILTIN_PSLLW128_MASK:
19820 9297 : case IX86_BUILTIN_PSLLW256:
19821 9297 : case IX86_BUILTIN_PSLLW256_MASK:
19822 9297 : case IX86_BUILTIN_PSLLW512_MASK:
19823 9297 : case IX86_BUILTIN_PSLLWI:
19824 9297 : case IX86_BUILTIN_PSLLWI128:
19825 9297 : case IX86_BUILTIN_PSLLWI128_MASK:
19826 9297 : case IX86_BUILTIN_PSLLWI256:
19827 9297 : case IX86_BUILTIN_PSLLWI256_MASK:
19828 9297 : case IX86_BUILTIN_PSLLWI512_MASK:
19829 9297 : rcode = ASHIFT;
19830 9297 : is_vshift = false;
19831 9297 : goto do_shift;
19832 6495 : case IX86_BUILTIN_PSRAD:
19833 6495 : case IX86_BUILTIN_PSRAD128:
19834 6495 : case IX86_BUILTIN_PSRAD128_MASK:
19835 6495 : case IX86_BUILTIN_PSRAD256:
19836 6495 : case IX86_BUILTIN_PSRAD256_MASK:
19837 6495 : case IX86_BUILTIN_PSRAD512:
19838 6495 : case IX86_BUILTIN_PSRADI:
19839 6495 : case IX86_BUILTIN_PSRADI128:
19840 6495 : case IX86_BUILTIN_PSRADI128_MASK:
19841 6495 : case IX86_BUILTIN_PSRADI256:
19842 6495 : case IX86_BUILTIN_PSRADI256_MASK:
19843 6495 : case IX86_BUILTIN_PSRADI512:
19844 6495 : case IX86_BUILTIN_PSRAQ128_MASK:
19845 6495 : case IX86_BUILTIN_PSRAQ256_MASK:
19846 6495 : case IX86_BUILTIN_PSRAQ512:
19847 6495 : case IX86_BUILTIN_PSRAQI128_MASK:
19848 6495 : case IX86_BUILTIN_PSRAQI256_MASK:
19849 6495 : case IX86_BUILTIN_PSRAQI512:
19850 6495 : case IX86_BUILTIN_PSRAW:
19851 6495 : case IX86_BUILTIN_PSRAW128:
19852 6495 : case IX86_BUILTIN_PSRAW128_MASK:
19853 6495 : case IX86_BUILTIN_PSRAW256:
19854 6495 : case IX86_BUILTIN_PSRAW256_MASK:
19855 6495 : case IX86_BUILTIN_PSRAW512:
19856 6495 : case IX86_BUILTIN_PSRAWI:
19857 6495 : case IX86_BUILTIN_PSRAWI128:
19858 6495 : case IX86_BUILTIN_PSRAWI128_MASK:
19859 6495 : case IX86_BUILTIN_PSRAWI256:
19860 6495 : case IX86_BUILTIN_PSRAWI256_MASK:
19861 6495 : case IX86_BUILTIN_PSRAWI512:
19862 6495 : rcode = ASHIFTRT;
19863 6495 : is_vshift = false;
19864 6495 : goto do_shift;
19865 7960 : case IX86_BUILTIN_PSRLD:
19866 7960 : case IX86_BUILTIN_PSRLD128:
19867 7960 : case IX86_BUILTIN_PSRLD128_MASK:
19868 7960 : case IX86_BUILTIN_PSRLD256:
19869 7960 : case IX86_BUILTIN_PSRLD256_MASK:
19870 7960 : case IX86_BUILTIN_PSRLD512:
19871 7960 : case IX86_BUILTIN_PSRLDI:
19872 7960 : case IX86_BUILTIN_PSRLDI128:
19873 7960 : case IX86_BUILTIN_PSRLDI128_MASK:
19874 7960 : case IX86_BUILTIN_PSRLDI256:
19875 7960 : case IX86_BUILTIN_PSRLDI256_MASK:
19876 7960 : case IX86_BUILTIN_PSRLDI512:
19877 7960 : case IX86_BUILTIN_PSRLQ:
19878 7960 : case IX86_BUILTIN_PSRLQ128:
19879 7960 : case IX86_BUILTIN_PSRLQ128_MASK:
19880 7960 : case IX86_BUILTIN_PSRLQ256:
19881 7960 : case IX86_BUILTIN_PSRLQ256_MASK:
19882 7960 : case IX86_BUILTIN_PSRLQ512:
19883 7960 : case IX86_BUILTIN_PSRLQI:
19884 7960 : case IX86_BUILTIN_PSRLQI128:
19885 7960 : case IX86_BUILTIN_PSRLQI128_MASK:
19886 7960 : case IX86_BUILTIN_PSRLQI256:
19887 7960 : case IX86_BUILTIN_PSRLQI256_MASK:
19888 7960 : case IX86_BUILTIN_PSRLQI512:
19889 7960 : case IX86_BUILTIN_PSRLW:
19890 7960 : case IX86_BUILTIN_PSRLW128:
19891 7960 : case IX86_BUILTIN_PSRLW128_MASK:
19892 7960 : case IX86_BUILTIN_PSRLW256:
19893 7960 : case IX86_BUILTIN_PSRLW256_MASK:
19894 7960 : case IX86_BUILTIN_PSRLW512:
19895 7960 : case IX86_BUILTIN_PSRLWI:
19896 7960 : case IX86_BUILTIN_PSRLWI128:
19897 7960 : case IX86_BUILTIN_PSRLWI128_MASK:
19898 7960 : case IX86_BUILTIN_PSRLWI256:
19899 7960 : case IX86_BUILTIN_PSRLWI256_MASK:
19900 7960 : case IX86_BUILTIN_PSRLWI512:
19901 7960 : rcode = LSHIFTRT;
19902 7960 : is_vshift = false;
19903 7960 : goto do_shift;
19904 2384 : case IX86_BUILTIN_PSLLVV16HI:
19905 2384 : case IX86_BUILTIN_PSLLVV16SI:
19906 2384 : case IX86_BUILTIN_PSLLVV2DI:
19907 2384 : case IX86_BUILTIN_PSLLVV2DI_MASK:
19908 2384 : case IX86_BUILTIN_PSLLVV32HI:
19909 2384 : case IX86_BUILTIN_PSLLVV4DI:
19910 2384 : case IX86_BUILTIN_PSLLVV4DI_MASK:
19911 2384 : case IX86_BUILTIN_PSLLVV4SI:
19912 2384 : case IX86_BUILTIN_PSLLVV4SI_MASK:
19913 2384 : case IX86_BUILTIN_PSLLVV8DI:
19914 2384 : case IX86_BUILTIN_PSLLVV8HI:
19915 2384 : case IX86_BUILTIN_PSLLVV8SI:
19916 2384 : case IX86_BUILTIN_PSLLVV8SI_MASK:
19917 2384 : rcode = ASHIFT;
19918 2384 : is_vshift = true;
19919 2384 : goto do_shift;
19920 2341 : case IX86_BUILTIN_PSRAVQ128:
19921 2341 : case IX86_BUILTIN_PSRAVQ256:
19922 2341 : case IX86_BUILTIN_PSRAVV16HI:
19923 2341 : case IX86_BUILTIN_PSRAVV16SI:
19924 2341 : case IX86_BUILTIN_PSRAVV32HI:
19925 2341 : case IX86_BUILTIN_PSRAVV4SI:
19926 2341 : case IX86_BUILTIN_PSRAVV4SI_MASK:
19927 2341 : case IX86_BUILTIN_PSRAVV8DI:
19928 2341 : case IX86_BUILTIN_PSRAVV8HI:
19929 2341 : case IX86_BUILTIN_PSRAVV8SI:
19930 2341 : case IX86_BUILTIN_PSRAVV8SI_MASK:
19931 2341 : rcode = ASHIFTRT;
19932 2341 : is_vshift = true;
19933 2341 : goto do_shift;
19934 2380 : case IX86_BUILTIN_PSRLVV16HI:
19935 2380 : case IX86_BUILTIN_PSRLVV16SI:
19936 2380 : case IX86_BUILTIN_PSRLVV2DI:
19937 2380 : case IX86_BUILTIN_PSRLVV2DI_MASK:
19938 2380 : case IX86_BUILTIN_PSRLVV32HI:
19939 2380 : case IX86_BUILTIN_PSRLVV4DI:
19940 2380 : case IX86_BUILTIN_PSRLVV4DI_MASK:
19941 2380 : case IX86_BUILTIN_PSRLVV4SI:
19942 2380 : case IX86_BUILTIN_PSRLVV4SI_MASK:
19943 2380 : case IX86_BUILTIN_PSRLVV8DI:
19944 2380 : case IX86_BUILTIN_PSRLVV8HI:
19945 2380 : case IX86_BUILTIN_PSRLVV8SI:
19946 2380 : case IX86_BUILTIN_PSRLVV8SI_MASK:
19947 2380 : rcode = LSHIFTRT;
19948 2380 : is_vshift = true;
19949 2380 : goto do_shift;
19950 :
19951 30857 : do_shift:
19952 30857 : gcc_assert (n_args >= 2);
19953 30857 : if (!gimple_call_lhs (stmt))
19954 : {
19955 1 : gsi_replace (gsi, gimple_build_nop (), false);
19956 1 : return true;
19957 : }
19958 30856 : arg0 = gimple_call_arg (stmt, 0);
19959 30856 : arg1 = gimple_call_arg (stmt, 1);
19960 30856 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19961 : /* For masked shift, only optimize if the mask is all ones. */
19962 30856 : if (n_args > 2
19963 30856 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
19964 : break;
19965 16081 : if (is_vshift)
19966 : {
19967 2640 : if (TREE_CODE (arg1) != VECTOR_CST)
19968 : break;
19969 69 : count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19970 69 : if (integer_zerop (arg1))
19971 27 : count = 0;
19972 42 : else if (rcode == ASHIFTRT)
19973 : break;
19974 : else
19975 230 : for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19976 : {
19977 212 : tree elt = VECTOR_CST_ELT (arg1, i);
19978 212 : if (!wi::neg_p (wi::to_wide (elt))
19979 375 : && wi::to_widest (elt) < count)
19980 16 : return false;
19981 : }
19982 : }
19983 : else
19984 : {
19985 13441 : arg1 = ix86_vector_shift_count (arg1);
19986 13441 : if (!arg1)
19987 : break;
19988 5608 : count = tree_to_uhwi (arg1);
19989 : }
19990 5653 : if (count == 0)
19991 : {
19992 : /* Just return the first argument for shift by 0. */
19993 93 : loc = gimple_location (stmt);
19994 93 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19995 93 : gimple_set_location (g, loc);
19996 93 : gsi_replace (gsi, g, false);
19997 93 : return true;
19998 : }
19999 5560 : if (rcode != ASHIFTRT
20000 5560 : && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
20001 : {
20002 : /* For shift counts equal or greater than precision, except for
20003 : arithmetic right shift the result is zero. */
20004 78 : loc = gimple_location (stmt);
20005 78 : g = gimple_build_assign (gimple_call_lhs (stmt),
20006 78 : build_zero_cst (TREE_TYPE (arg0)));
20007 78 : gimple_set_location (g, loc);
20008 78 : gsi_replace (gsi, g, false);
20009 78 : return true;
20010 : }
20011 : break;
20012 :
20013 531 : case IX86_BUILTIN_SHUFPD512:
20014 531 : case IX86_BUILTIN_SHUFPS512:
20015 531 : case IX86_BUILTIN_SHUFPD:
20016 531 : case IX86_BUILTIN_SHUFPD256:
20017 531 : case IX86_BUILTIN_SHUFPS:
20018 531 : case IX86_BUILTIN_SHUFPS256:
20019 531 : arg0 = gimple_call_arg (stmt, 0);
20020 531 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20021 : /* This is masked shuffle. Only optimize if the mask is all ones. */
20022 531 : if (n_args > 3
20023 895 : && !ix86_masked_all_ones (elems,
20024 364 : gimple_call_arg (stmt, n_args - 1)))
20025 : break;
20026 203 : arg2 = gimple_call_arg (stmt, 2);
20027 203 : if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
20028 : {
20029 146 : unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
20030 : /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
20031 146 : if (shuffle_mask > 255)
20032 : return false;
20033 :
20034 144 : machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
20035 144 : loc = gimple_location (stmt);
20036 144 : tree itype = (imode == E_DFmode
20037 144 : ? long_long_integer_type_node : integer_type_node);
20038 144 : tree vtype = build_vector_type (itype, elems);
20039 144 : tree_vector_builder elts (vtype, elems, 1);
20040 :
20041 :
20042 : /* Transform integer shuffle_mask to vector perm_mask which
20043 : is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
20044 840 : for (unsigned i = 0; i != elems; i++)
20045 : {
20046 696 : unsigned sel_idx;
20047 : /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
20048 : provide 2 select constrols for each element of the
20049 : destination. */
20050 696 : if (imode == E_DFmode)
20051 240 : sel_idx = (i & 1) * elems + (i & ~1)
20052 240 : + ((shuffle_mask >> i) & 1);
20053 : else
20054 : {
20055 : /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
20056 : controls for each element of the destination. */
20057 456 : unsigned j = i % 4;
20058 456 : sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
20059 456 : + ((shuffle_mask >> 2 * j) & 3);
20060 : }
20061 696 : elts.quick_push (build_int_cst (itype, sel_idx));
20062 : }
20063 :
20064 144 : tree perm_mask = elts.build ();
20065 144 : arg1 = gimple_call_arg (stmt, 1);
20066 144 : g = gimple_build_assign (gimple_call_lhs (stmt),
20067 : VEC_PERM_EXPR,
20068 : arg0, arg1, perm_mask);
20069 144 : gimple_set_location (g, loc);
20070 144 : gsi_replace (gsi, g, false);
20071 144 : return true;
20072 144 : }
20073 : // Do not error yet, the constant could be propagated later?
20074 : break;
20075 :
20076 48 : case IX86_BUILTIN_PABSB:
20077 48 : case IX86_BUILTIN_PABSW:
20078 48 : case IX86_BUILTIN_PABSD:
20079 : /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
20080 48 : if (!TARGET_MMX_WITH_SSE)
20081 : break;
20082 : /* FALLTHRU. */
20083 2189 : case IX86_BUILTIN_PABSB128:
20084 2189 : case IX86_BUILTIN_PABSB256:
20085 2189 : case IX86_BUILTIN_PABSB512:
20086 2189 : case IX86_BUILTIN_PABSW128:
20087 2189 : case IX86_BUILTIN_PABSW256:
20088 2189 : case IX86_BUILTIN_PABSW512:
20089 2189 : case IX86_BUILTIN_PABSD128:
20090 2189 : case IX86_BUILTIN_PABSD256:
20091 2189 : case IX86_BUILTIN_PABSD512:
20092 2189 : case IX86_BUILTIN_PABSQ128:
20093 2189 : case IX86_BUILTIN_PABSQ256:
20094 2189 : case IX86_BUILTIN_PABSQ512:
20095 2189 : case IX86_BUILTIN_PABSB128_MASK:
20096 2189 : case IX86_BUILTIN_PABSB256_MASK:
20097 2189 : case IX86_BUILTIN_PABSW128_MASK:
20098 2189 : case IX86_BUILTIN_PABSW256_MASK:
20099 2189 : case IX86_BUILTIN_PABSD128_MASK:
20100 2189 : case IX86_BUILTIN_PABSD256_MASK:
20101 2189 : gcc_assert (n_args >= 1);
20102 2189 : if (!gimple_call_lhs (stmt))
20103 : {
20104 1 : gsi_replace (gsi, gimple_build_nop (), false);
20105 1 : return true;
20106 : }
20107 2188 : arg0 = gimple_call_arg (stmt, 0);
20108 2188 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20109 : /* For masked ABS, only optimize if the mask is all ones. */
20110 2188 : if (n_args > 1
20111 2188 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20112 : break;
20113 228 : {
20114 228 : tree utype, ures, vce;
20115 228 : utype = unsigned_type_for (TREE_TYPE (arg0));
20116 : /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
20117 : instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
20118 228 : ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
20119 228 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20120 228 : loc = gimple_location (stmt);
20121 228 : vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
20122 228 : g = gimple_build_assign (gimple_call_lhs (stmt),
20123 : VIEW_CONVERT_EXPR, vce);
20124 228 : gsi_replace (gsi, g, false);
20125 : }
20126 228 : return true;
20127 :
20128 2225 : case IX86_BUILTIN_MINPS:
20129 2225 : case IX86_BUILTIN_MINPD:
20130 2225 : case IX86_BUILTIN_MINPS256:
20131 2225 : case IX86_BUILTIN_MINPD256:
20132 2225 : case IX86_BUILTIN_MINPS512:
20133 2225 : case IX86_BUILTIN_MINPD512:
20134 2225 : case IX86_BUILTIN_MINPS128_MASK:
20135 2225 : case IX86_BUILTIN_MINPD128_MASK:
20136 2225 : case IX86_BUILTIN_MINPS256_MASK:
20137 2225 : case IX86_BUILTIN_MINPD256_MASK:
20138 2225 : case IX86_BUILTIN_MINPH128_MASK:
20139 2225 : case IX86_BUILTIN_MINPH256_MASK:
20140 2225 : case IX86_BUILTIN_MINPH512_MASK:
20141 2225 : tcode = LT_EXPR;
20142 2225 : goto do_minmax;
20143 :
20144 : case IX86_BUILTIN_MAXPS:
20145 : case IX86_BUILTIN_MAXPD:
20146 : case IX86_BUILTIN_MAXPS256:
20147 : case IX86_BUILTIN_MAXPD256:
20148 : case IX86_BUILTIN_MAXPS512:
20149 : case IX86_BUILTIN_MAXPD512:
20150 : case IX86_BUILTIN_MAXPS128_MASK:
20151 : case IX86_BUILTIN_MAXPD128_MASK:
20152 : case IX86_BUILTIN_MAXPS256_MASK:
20153 : case IX86_BUILTIN_MAXPD256_MASK:
20154 : case IX86_BUILTIN_MAXPH128_MASK:
20155 : case IX86_BUILTIN_MAXPH256_MASK:
20156 : case IX86_BUILTIN_MAXPH512_MASK:
20157 : tcode = GT_EXPR;
20158 4435 : do_minmax:
20159 4435 : gcc_assert (n_args >= 2);
20160 : /* Without SSE4.1 we often aren't able to pattern match it back to the
20161 : desired instruction. */
20162 4435 : if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
20163 : break;
20164 3865 : arg0 = gimple_call_arg (stmt, 0);
20165 3865 : arg1 = gimple_call_arg (stmt, 1);
20166 3865 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20167 : /* For masked minmax, only optimize if the mask is all ones. */
20168 3865 : if (n_args > 2
20169 3865 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
20170 : break;
20171 647 : if (n_args >= 5)
20172 : {
20173 436 : tree arg4 = gimple_call_arg (stmt, 4);
20174 436 : if (!tree_fits_uhwi_p (arg4))
20175 : break;
20176 424 : if (tree_to_uhwi (arg4) == 4)
20177 : /* Ok. */;
20178 416 : else if (tree_to_uhwi (arg4) != 8)
20179 : /* Invalid round argument. */
20180 : break;
20181 416 : else if (HONOR_NANS (arg0))
20182 : /* Lowering to comparison would raise exceptions which
20183 : shouldn't be raised. */
20184 : break;
20185 : }
20186 219 : {
20187 219 : tree type = truth_type_for (TREE_TYPE (arg0));
20188 219 : tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
20189 219 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20190 219 : g = gimple_build_assign (gimple_call_lhs (stmt),
20191 : VEC_COND_EXPR, cmpres, arg0, arg1);
20192 219 : gsi_replace (gsi, g, false);
20193 : }
20194 219 : return true;
20195 :
20196 : default:
20197 : break;
20198 : }
20199 :
20200 : return false;
20201 : }
20202 :
20203 : /* Handler for an SVML-style interface to
20204 : a library with vectorized intrinsics. */
20205 :
20206 : tree
20207 10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
20208 : {
20209 10 : char name[20];
20210 10 : tree fntype, new_fndecl, args;
20211 10 : unsigned arity;
20212 10 : const char *bname;
20213 10 : machine_mode el_mode, in_mode;
20214 10 : int n, in_n;
20215 :
20216 : /* The SVML is suitable for unsafe math only. */
20217 10 : if (!flag_unsafe_math_optimizations)
20218 : return NULL_TREE;
20219 :
20220 10 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20221 10 : n = TYPE_VECTOR_SUBPARTS (type_out);
20222 10 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20223 10 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20224 10 : if (el_mode != in_mode
20225 10 : || n != in_n)
20226 : return NULL_TREE;
20227 :
20228 10 : switch (fn)
20229 : {
20230 10 : CASE_CFN_EXP:
20231 10 : CASE_CFN_LOG:
20232 10 : CASE_CFN_LOG10:
20233 10 : CASE_CFN_POW:
20234 10 : CASE_CFN_TANH:
20235 10 : CASE_CFN_TAN:
20236 10 : CASE_CFN_ATAN:
20237 10 : CASE_CFN_ATAN2:
20238 10 : CASE_CFN_ATANH:
20239 10 : CASE_CFN_CBRT:
20240 10 : CASE_CFN_SINH:
20241 10 : CASE_CFN_SIN:
20242 10 : CASE_CFN_ASINH:
20243 10 : CASE_CFN_ASIN:
20244 10 : CASE_CFN_COSH:
20245 10 : CASE_CFN_COS:
20246 10 : CASE_CFN_ACOSH:
20247 10 : CASE_CFN_ACOS:
20248 10 : if ((el_mode != DFmode || n != 2)
20249 8 : && (el_mode != SFmode || n != 4))
20250 : return NULL_TREE;
20251 6 : break;
20252 :
20253 : default:
20254 : return NULL_TREE;
20255 : }
20256 :
20257 6 : tree fndecl = mathfn_built_in (el_mode == DFmode
20258 : ? double_type_node : float_type_node, fn);
20259 6 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20260 :
20261 6 : if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
20262 2 : strcpy (name, "vmlsLn4");
20263 4 : else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
20264 0 : strcpy (name, "vmldLn2");
20265 4 : else if (n == 4)
20266 : {
20267 2 : sprintf (name, "vmls%s", bname+10);
20268 2 : name[strlen (name)-1] = '4';
20269 : }
20270 : else
20271 2 : sprintf (name, "vmld%s2", bname+10);
20272 :
20273 : /* Convert to uppercase. */
20274 6 : name[4] &= ~0x20;
20275 :
20276 6 : arity = 0;
20277 6 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20278 0 : arity++;
20279 :
20280 6 : if (arity == 1)
20281 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20282 : else
20283 6 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20284 :
20285 : /* Build a function declaration for the vectorized function. */
20286 6 : new_fndecl = build_decl (BUILTINS_LOCATION,
20287 : FUNCTION_DECL, get_identifier (name), fntype);
20288 6 : TREE_PUBLIC (new_fndecl) = 1;
20289 6 : DECL_EXTERNAL (new_fndecl) = 1;
20290 6 : DECL_IS_NOVOPS (new_fndecl) = 1;
20291 6 : TREE_READONLY (new_fndecl) = 1;
20292 :
20293 6 : return new_fndecl;
20294 : }
20295 :
20296 : /* Handler for an ACML-style interface to
20297 : a library with vectorized intrinsics. */
20298 :
20299 : tree
20300 3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20301 : {
20302 3 : char name[20] = "__vr.._";
20303 3 : tree fntype, new_fndecl, args;
20304 3 : unsigned arity;
20305 3 : const char *bname;
20306 3 : machine_mode el_mode, in_mode;
20307 3 : int n, in_n;
20308 :
20309 : /* The ACML is 64bits only and suitable for unsafe math only as
20310 : it does not correctly support parts of IEEE with the required
20311 : precision such as denormals. */
20312 3 : if (!TARGET_64BIT
20313 3 : || !flag_unsafe_math_optimizations)
20314 : return NULL_TREE;
20315 :
20316 3 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20317 3 : n = TYPE_VECTOR_SUBPARTS (type_out);
20318 3 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20319 3 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20320 3 : if (el_mode != in_mode
20321 3 : || n != in_n)
20322 : return NULL_TREE;
20323 :
20324 3 : switch (fn)
20325 : {
20326 3 : CASE_CFN_SIN:
20327 3 : CASE_CFN_COS:
20328 3 : CASE_CFN_EXP:
20329 3 : CASE_CFN_LOG:
20330 3 : CASE_CFN_LOG2:
20331 3 : CASE_CFN_LOG10:
20332 3 : if (el_mode == DFmode && n == 2)
20333 : {
20334 3 : name[4] = 'd';
20335 3 : name[5] = '2';
20336 : }
20337 0 : else if (el_mode == SFmode && n == 4)
20338 : {
20339 0 : name[4] = 's';
20340 0 : name[5] = '4';
20341 : }
20342 : else
20343 : return NULL_TREE;
20344 3 : break;
20345 :
20346 : default:
20347 : return NULL_TREE;
20348 : }
20349 :
20350 3 : tree fndecl = mathfn_built_in (el_mode == DFmode
20351 : ? double_type_node : float_type_node, fn);
20352 3 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20353 3 : sprintf (name + 7, "%s", bname+10);
20354 :
20355 3 : arity = 0;
20356 3 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20357 0 : arity++;
20358 :
20359 3 : if (arity == 1)
20360 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20361 : else
20362 3 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20363 :
20364 : /* Build a function declaration for the vectorized function. */
20365 3 : new_fndecl = build_decl (BUILTINS_LOCATION,
20366 : FUNCTION_DECL, get_identifier (name), fntype);
20367 3 : TREE_PUBLIC (new_fndecl) = 1;
20368 3 : DECL_EXTERNAL (new_fndecl) = 1;
20369 3 : DECL_IS_NOVOPS (new_fndecl) = 1;
20370 3 : TREE_READONLY (new_fndecl) = 1;
20371 :
20372 3 : return new_fndecl;
20373 : }
20374 :
20375 : /* Handler for an AOCL-LibM-style interface to
20376 : a library with vectorized intrinsics. */
20377 :
20378 : tree
20379 220 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20380 : {
20381 220 : char name[20] = "amd_vr";
20382 220 : int name_len = 6;
20383 220 : tree fntype, new_fndecl, args;
20384 220 : unsigned arity;
20385 220 : const char *bname;
20386 220 : machine_mode el_mode, in_mode;
20387 220 : int n, in_n;
20388 :
20389 : /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20390 : as it trades off some accuracy for increased performance. */
20391 220 : if (!TARGET_64BIT
20392 220 : || !flag_unsafe_math_optimizations)
20393 : return NULL_TREE;
20394 :
20395 220 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20396 220 : n = TYPE_VECTOR_SUBPARTS (type_out);
20397 220 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20398 220 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20399 220 : if (el_mode != in_mode
20400 220 : || n != in_n)
20401 : return NULL_TREE;
20402 :
20403 220 : gcc_checking_assert (n > 0);
20404 :
20405 : /* Decide whether there exists a function for the combination of FN, the mode
20406 : and the vector width. Return early if it doesn't. */
20407 :
20408 220 : if (el_mode != DFmode && el_mode != SFmode)
20409 : return NULL_TREE;
20410 :
20411 : /* Supported vector widths for given FN and single/double precision. Zeros
20412 : are used to fill out unused positions in the arrays. */
20413 220 : static const int supported_n[][2][3] = {
20414 : /* Single prec. , Double prec. */
20415 : { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20416 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20417 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20418 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20419 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20420 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20421 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20422 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20423 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20424 : { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20425 : { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20426 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20427 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20428 : { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20429 : { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20430 : { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20431 : { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20432 : { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20433 : };
20434 :
20435 : /* We cannot simply index the supported_n array with FN since multiple FNs
20436 : may correspond to a single operation (see the definitions of these
20437 : CASE_CFN_* macros). */
20438 220 : int i;
20439 220 : switch (fn)
20440 : {
20441 : CASE_CFN_TAN : i = 0; break;
20442 16 : CASE_CFN_EXP : i = 1; break;
20443 16 : CASE_CFN_EXP2 : i = 2; break;
20444 16 : CASE_CFN_LOG : i = 3; break;
20445 16 : CASE_CFN_LOG2 : i = 4; break;
20446 16 : CASE_CFN_COS : i = 5; break;
20447 16 : CASE_CFN_SIN : i = 6; break;
20448 16 : CASE_CFN_POW : i = 7; break;
20449 16 : CASE_CFN_ERF : i = 8; break;
20450 13 : CASE_CFN_ATAN : i = 9; break;
20451 11 : CASE_CFN_LOG10 : i = 10; break;
20452 8 : CASE_CFN_EXP10 : i = 11; break;
20453 8 : CASE_CFN_LOG1P : i = 12; break;
20454 11 : CASE_CFN_ASIN : i = 13; break;
20455 7 : CASE_CFN_ACOS : i = 14; break;
20456 9 : CASE_CFN_TANH : i = 15; break;
20457 7 : CASE_CFN_EXPM1 : i = 16; break;
20458 9 : CASE_CFN_COSH : i = 17; break;
20459 : default: return NULL_TREE;
20460 : }
20461 :
20462 220 : int j = el_mode == DFmode;
20463 220 : bool n_is_supported = false;
20464 489 : for (unsigned k = 0; k < 3; k++)
20465 470 : if (supported_n[i][j][k] == n)
20466 : {
20467 : n_is_supported = true;
20468 : break;
20469 : }
20470 220 : if (!n_is_supported)
20471 : return NULL_TREE;
20472 :
20473 : /* Append the precision and the vector width to the function name we are
20474 : constructing. */
20475 201 : name[name_len++] = el_mode == DFmode ? 'd' : 's';
20476 201 : switch (n)
20477 : {
20478 148 : case 2:
20479 148 : case 4:
20480 148 : case 8:
20481 148 : name[name_len++] = '0' + n;
20482 148 : break;
20483 53 : case 16:
20484 53 : name[name_len++] = '1';
20485 53 : name[name_len++] = '6';
20486 53 : break;
20487 0 : default:
20488 0 : gcc_unreachable ();
20489 : }
20490 201 : name[name_len++] = '_';
20491 :
20492 : /* Append the operation name (steal it from the name of a builtin). */
20493 201 : tree fndecl = mathfn_built_in (el_mode == DFmode
20494 : ? double_type_node : float_type_node, fn);
20495 201 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20496 201 : sprintf (name + name_len, "%s", bname + 10);
20497 :
20498 201 : arity = 0;
20499 201 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20500 0 : arity++;
20501 :
20502 201 : if (arity == 1)
20503 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20504 : else
20505 201 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20506 :
20507 : /* Build a function declaration for the vectorized function. */
20508 201 : new_fndecl = build_decl (BUILTINS_LOCATION,
20509 : FUNCTION_DECL, get_identifier (name), fntype);
20510 201 : TREE_PUBLIC (new_fndecl) = 1;
20511 201 : DECL_EXTERNAL (new_fndecl) = 1;
20512 201 : TREE_READONLY (new_fndecl) = 1;
20513 :
20514 201 : return new_fndecl;
20515 : }
20516 :
20517 : /* Returns a decl of a function that implements scatter store with
20518 : register type VECTYPE and index type INDEX_TYPE and SCALE.
20519 : Return NULL_TREE if it is not available. */
20520 :
20521 : static tree
20522 151870 : ix86_vectorize_builtin_scatter (const_tree vectype,
20523 : const_tree index_type, int scale)
20524 : {
20525 151870 : bool si;
20526 151870 : enum ix86_builtins code;
20527 :
20528 151870 : if (!TARGET_AVX512F)
20529 : return NULL_TREE;
20530 :
20531 3207 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20532 5760 : ? !TARGET_USE_SCATTER_2PARTS
20533 5760 : : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20534 2553 : ? !TARGET_USE_SCATTER_4PARTS
20535 1773 : : !TARGET_USE_SCATTER_8PARTS))
20536 : return NULL_TREE;
20537 :
20538 3207 : if ((TREE_CODE (index_type) != INTEGER_TYPE
20539 463 : && !POINTER_TYPE_P (index_type))
20540 3670 : || (TYPE_MODE (index_type) != SImode
20541 1392 : && TYPE_MODE (index_type) != DImode))
20542 0 : return NULL_TREE;
20543 :
20544 3399 : if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20545 : return NULL_TREE;
20546 :
20547 : /* v*scatter* insn sign extends index to pointer mode. */
20548 3207 : if (TYPE_PRECISION (index_type) < POINTER_SIZE
20549 3207 : && TYPE_UNSIGNED (index_type))
20550 : return NULL_TREE;
20551 :
20552 : /* Scale can be 1, 2, 4 or 8. */
20553 3207 : if (scale <= 0
20554 3207 : || scale > 8
20555 3193 : || (scale & (scale - 1)) != 0)
20556 : return NULL_TREE;
20557 :
20558 3193 : si = TYPE_MODE (index_type) == SImode;
20559 3193 : switch (TYPE_MODE (vectype))
20560 : {
20561 169 : case E_V8DFmode:
20562 169 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20563 : break;
20564 104 : case E_V8DImode:
20565 104 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20566 : break;
20567 177 : case E_V16SFmode:
20568 177 : code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20569 : break;
20570 257 : case E_V16SImode:
20571 257 : code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20572 : break;
20573 151 : case E_V4DFmode:
20574 151 : if (TARGET_AVX512VL)
20575 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20576 : else
20577 : return NULL_TREE;
20578 : break;
20579 115 : case E_V4DImode:
20580 115 : if (TARGET_AVX512VL)
20581 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20582 : else
20583 : return NULL_TREE;
20584 : break;
20585 132 : case E_V8SFmode:
20586 132 : if (TARGET_AVX512VL)
20587 40 : code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20588 : else
20589 : return NULL_TREE;
20590 : break;
20591 202 : case E_V8SImode:
20592 202 : if (TARGET_AVX512VL)
20593 82 : code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20594 : else
20595 : return NULL_TREE;
20596 : break;
20597 171 : case E_V2DFmode:
20598 171 : if (TARGET_AVX512VL)
20599 66 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20600 : else
20601 : return NULL_TREE;
20602 : break;
20603 141 : case E_V2DImode:
20604 141 : if (TARGET_AVX512VL)
20605 66 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20606 : else
20607 : return NULL_TREE;
20608 : break;
20609 156 : case E_V4SFmode:
20610 156 : if (TARGET_AVX512VL)
20611 68 : code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20612 : else
20613 : return NULL_TREE;
20614 : break;
20615 226 : case E_V4SImode:
20616 226 : if (TARGET_AVX512VL)
20617 110 : code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20618 : else
20619 : return NULL_TREE;
20620 : break;
20621 : default:
20622 : return NULL_TREE;
20623 : }
20624 :
20625 1207 : return get_ix86_builtin (code);
20626 : }
20627 :
20628 : /* Return true if it is safe to use the rsqrt optabs to optimize
20629 : 1.0/sqrt. */
20630 :
20631 : static bool
20632 84 : use_rsqrt_p (machine_mode mode)
20633 : {
20634 84 : return ((mode == HFmode
20635 36 : || (TARGET_SSE && TARGET_SSE_MATH))
20636 84 : && flag_finite_math_only
20637 83 : && !flag_trapping_math
20638 149 : && flag_unsafe_math_optimizations);
20639 : }
20640 :
20641 : /* Helper for avx_vpermilps256_operand et al. This is also used by
20642 : the expansion functions to turn the parallel back into a mask.
20643 : The return value is 0 for no match and the imm8+1 for a match. */
20644 :
20645 : int
20646 64041 : avx_vpermilp_parallel (rtx par, machine_mode mode)
20647 : {
20648 64041 : unsigned i, nelt = GET_MODE_NUNITS (mode);
20649 64041 : unsigned mask = 0;
20650 64041 : unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20651 :
20652 64041 : if (XVECLEN (par, 0) != (int) nelt)
20653 : return 0;
20654 :
20655 : /* Validate that all of the elements are constants, and not totally
20656 : out of range. Copy the data into an integral array to make the
20657 : subsequent checks easier. */
20658 320409 : for (i = 0; i < nelt; ++i)
20659 : {
20660 256368 : rtx er = XVECEXP (par, 0, i);
20661 256368 : unsigned HOST_WIDE_INT ei;
20662 :
20663 256368 : if (!CONST_INT_P (er))
20664 : return 0;
20665 256368 : ei = INTVAL (er);
20666 256368 : if (ei >= nelt)
20667 : return 0;
20668 256368 : ipar[i] = ei;
20669 : }
20670 :
20671 64041 : switch (mode)
20672 : {
20673 : case E_V8DFmode:
20674 : case E_V8DImode:
20675 : /* In the 512-bit DFmode case, we can only move elements within
20676 : a 128-bit lane. First fill the second part of the mask,
20677 : then fallthru. */
20678 4945 : for (i = 4; i < 6; ++i)
20679 : {
20680 3427 : if (!IN_RANGE (ipar[i], 4, 5))
20681 : return 0;
20682 3202 : mask |= (ipar[i] - 4) << i;
20683 : }
20684 3702 : for (i = 6; i < 8; ++i)
20685 : {
20686 2610 : if (!IN_RANGE (ipar[i], 6, 7))
20687 : return 0;
20688 2184 : mask |= (ipar[i] - 6) << i;
20689 : }
20690 : /* FALLTHRU */
20691 :
20692 : case E_V4DFmode:
20693 : case E_V4DImode:
20694 : /* In the 256-bit DFmode case, we can only move elements within
20695 : a 128-bit lane. */
20696 46279 : for (i = 0; i < 2; ++i)
20697 : {
20698 38872 : if (!IN_RANGE (ipar[i], 0, 1))
20699 : return 0;
20700 25997 : mask |= ipar[i] << i;
20701 : }
20702 19523 : for (i = 2; i < 4; ++i)
20703 : {
20704 13470 : if (!IN_RANGE (ipar[i], 2, 3))
20705 : return 0;
20706 12116 : mask |= (ipar[i] - 2) << i;
20707 : }
20708 : break;
20709 :
20710 : case E_V16SFmode:
20711 : case E_V16SImode:
20712 : /* In 512 bit SFmode case, permutation in the upper 256 bits
20713 : must mirror the permutation in the lower 256-bits. */
20714 4398 : for (i = 0; i < 8; ++i)
20715 3918 : if (ipar[i] + 8 != ipar[i + 8])
20716 : return 0;
20717 : /* FALLTHRU */
20718 :
20719 : case E_V8SFmode:
20720 : case E_V8SImode:
20721 : /* In 256 bit SFmode case, we have full freedom of
20722 : movement within the low 128-bit lane, but the high 128-bit
20723 : lane must mirror the exact same pattern. */
20724 37835 : for (i = 0; i < 4; ++i)
20725 32127 : if (ipar[i] + 4 != ipar[i + 4])
20726 : return 0;
20727 : nelt = 4;
20728 : /* FALLTHRU */
20729 :
20730 37609 : case E_V2DFmode:
20731 37609 : case E_V2DImode:
20732 37609 : case E_V4SFmode:
20733 37609 : case E_V4SImode:
20734 : /* In the 128-bit case, we've full freedom in the placement of
20735 : the elements from the source operand. */
20736 132001 : for (i = 0; i < nelt; ++i)
20737 94392 : mask |= ipar[i] << (i * (nelt / 2));
20738 : break;
20739 :
20740 0 : default:
20741 0 : gcc_unreachable ();
20742 : }
20743 :
20744 : /* Make sure success has a non-zero value by adding one. */
20745 43662 : return mask + 1;
20746 : }
20747 :
20748 : /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20749 : the expansion functions to turn the parallel back into a mask.
20750 : The return value is 0 for no match and the imm8+1 for a match. */
20751 :
20752 : int
20753 50646 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
20754 : {
20755 50646 : unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20756 50646 : unsigned mask = 0;
20757 50646 : unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20758 :
20759 50646 : if (XVECLEN (par, 0) != (int) nelt)
20760 : return 0;
20761 :
20762 : /* Validate that all of the elements are constants, and not totally
20763 : out of range. Copy the data into an integral array to make the
20764 : subsequent checks easier. */
20765 404750 : for (i = 0; i < nelt; ++i)
20766 : {
20767 354104 : rtx er = XVECEXP (par, 0, i);
20768 354104 : unsigned HOST_WIDE_INT ei;
20769 :
20770 354104 : if (!CONST_INT_P (er))
20771 : return 0;
20772 354104 : ei = INTVAL (er);
20773 354104 : if (ei >= 2 * nelt)
20774 : return 0;
20775 354104 : ipar[i] = ei;
20776 : }
20777 :
20778 : /* Validate that the halves of the permute are halves. */
20779 98909 : for (i = 0; i < nelt2 - 1; ++i)
20780 79326 : if (ipar[i] + 1 != ipar[i + 1])
20781 : return 0;
20782 57980 : for (i = nelt2; i < nelt - 1; ++i)
20783 39803 : if (ipar[i] + 1 != ipar[i + 1])
20784 : return 0;
20785 :
20786 : /* Reconstruct the mask. */
20787 54443 : for (i = 0; i < 2; ++i)
20788 : {
20789 36312 : unsigned e = ipar[i * nelt2];
20790 36312 : if (e % nelt2)
20791 : return 0;
20792 36266 : e /= nelt2;
20793 36266 : mask |= e << (i * 4);
20794 : }
20795 :
20796 : /* Make sure success has a non-zero value by adding one. */
20797 18131 : return mask + 1;
20798 : }
20799 :
20800 : /* Return a mask of VPTERNLOG operands that do not affect output. */
20801 :
20802 : int
20803 2425 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
20804 : {
20805 2425 : int mask = 0;
20806 2425 : int imm8 = INTVAL (pternlog_imm);
20807 :
20808 2425 : if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20809 6 : mask |= 1;
20810 2425 : if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20811 6 : mask |= 2;
20812 2425 : if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20813 147 : mask |= 4;
20814 :
20815 2425 : return mask;
20816 : }
20817 :
20818 : /* Eliminate false dependencies on operands that do not affect output
20819 : by substituting other operands of a VPTERNLOG. */
20820 :
20821 : void
20822 77 : substitute_vpternlog_operands (rtx *operands)
20823 : {
20824 77 : int mask = vpternlog_redundant_operand_mask (operands[4]);
20825 :
20826 77 : if (mask & 1) /* The first operand is redundant. */
20827 2 : operands[1] = operands[2];
20828 :
20829 77 : if (mask & 2) /* The second operand is redundant. */
20830 2 : operands[2] = operands[1];
20831 :
20832 77 : if (mask & 4) /* The third operand is redundant. */
20833 73 : operands[3] = operands[1];
20834 4 : else if (REG_P (operands[3]))
20835 : {
20836 0 : if (mask & 1)
20837 0 : operands[1] = operands[3];
20838 0 : if (mask & 2)
20839 0 : operands[2] = operands[3];
20840 : }
20841 77 : }
20842 :
20843 : /* Return a register priority for hard reg REGNO. */
20844 : static int
20845 58298372 : ix86_register_priority (int hard_regno)
20846 : {
20847 : /* ebp and r13 as the base always wants a displacement, r12 as the
20848 : base always wants an index. So discourage their usage in an
20849 : address. */
20850 58298372 : if (hard_regno == R12_REG || hard_regno == R13_REG)
20851 : return 0;
20852 53820055 : if (hard_regno == BP_REG)
20853 : return 1;
20854 : /* New x86-64 int registers result in bigger code size. Discourage them. */
20855 51828803 : if (REX_INT_REGNO_P (hard_regno))
20856 : return 2;
20857 35281039 : if (REX2_INT_REGNO_P (hard_regno))
20858 : return 2;
20859 : /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20860 35278599 : if (REX_SSE_REGNO_P (hard_regno))
20861 : return 2;
20862 29235527 : if (EXT_REX_SSE_REGNO_P (hard_regno))
20863 : return 1;
20864 : /* Usage of AX register results in smaller code. Prefer it. */
20865 28957265 : if (hard_regno == AX_REG)
20866 3794239 : return 4;
20867 : return 3;
20868 : }
20869 :
20870 : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
20871 :
20872 : Put float CONST_DOUBLE in the constant pool instead of fp regs.
20873 : QImode must go into class Q_REGS.
20874 : Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20875 : movdf to do mem-to-mem moves through integer regs. */
20876 :
20877 : static reg_class_t
20878 546988971 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20879 : {
20880 546988971 : machine_mode mode = GET_MODE (x);
20881 :
20882 : /* We're only allowed to return a subclass of CLASS. Many of the
20883 : following checks fail for NO_REGS, so eliminate that early. */
20884 546988971 : if (regclass == NO_REGS)
20885 : return NO_REGS;
20886 :
20887 : /* All classes can load zeros. */
20888 546096324 : if (x == CONST0_RTX (mode))
20889 : return regclass;
20890 :
20891 : /* Force constants into memory if we are loading a (nonzero) constant into
20892 : an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20893 : instructions to load from a constant. */
20894 521004515 : if (CONSTANT_P (x)
20895 521004515 : && (MAYBE_MMX_CLASS_P (regclass)
20896 152069855 : || MAYBE_SSE_CLASS_P (regclass)
20897 122292756 : || MAYBE_MASK_CLASS_P (regclass)))
20898 29907075 : return NO_REGS;
20899 :
20900 : /* Floating-point constants need more complex checks. */
20901 491097440 : if (CONST_DOUBLE_P (x))
20902 : {
20903 : /* General regs can load everything. */
20904 301993 : if (INTEGER_CLASS_P (regclass))
20905 : return regclass;
20906 :
20907 : /* Floats can load 0 and 1 plus some others. Note that we eliminated
20908 : zero above. We only want to wind up preferring 80387 registers if
20909 : we plan on doing computation with them. */
20910 179369 : if (IS_STACK_MODE (mode)
20911 237593 : && standard_80387_constant_p (x) > 0)
20912 : {
20913 : /* Limit class to FP regs. */
20914 40456 : if (FLOAT_CLASS_P (regclass))
20915 : return FLOAT_REGS;
20916 : }
20917 :
20918 138913 : return NO_REGS;
20919 : }
20920 :
20921 : /* Prefer SSE if we can use them for math. Also allow integer regs
20922 : when moves between register units are cheap. */
20923 490795447 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20924 : {
20925 31076840 : if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20926 31061927 : && TARGET_INTER_UNIT_MOVES_TO_VEC
20927 93191182 : && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20928 30910566 : return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20929 : else
20930 166274 : return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20931 : }
20932 :
20933 : /* Generally when we see PLUS here, it's the function invariant
20934 : (plus soft-fp const_int). Which can only be computed into general
20935 : regs. */
20936 459718607 : if (GET_CODE (x) == PLUS)
20937 1890058 : return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20938 :
20939 : /* QImode constants are easy to load, but non-constant QImode data
20940 : must go into Q_REGS or ALL_MASK_REGS. */
20941 457828549 : if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20942 : {
20943 25388443 : if (Q_CLASS_P (regclass))
20944 : return regclass;
20945 20548637 : else if (reg_class_subset_p (Q_REGS, regclass))
20946 : return Q_REGS;
20947 54778 : else if (MASK_CLASS_P (regclass))
20948 : return regclass;
20949 : else
20950 : return NO_REGS;
20951 : }
20952 :
20953 : return regclass;
20954 : }
20955 :
20956 : /* Discourage putting floating-point values in SSE registers unless
20957 : SSE math is being used, and likewise for the 387 registers. */
20958 : static reg_class_t
20959 74549757 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20960 : {
20961 : /* Restrict the output reload class to the register bank that we are doing
20962 : math on. If we would like not to return a subset of CLASS, reject this
20963 : alternative: if reload cannot do this, it will still use its choice. */
20964 74549757 : machine_mode mode = GET_MODE (x);
20965 74549757 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20966 7216712 : return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20967 :
20968 67333045 : if (IS_STACK_MODE (mode))
20969 209603 : return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20970 :
20971 : return regclass;
20972 : }
20973 :
20974 : static reg_class_t
20975 385297044 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20976 : machine_mode mode, secondary_reload_info *sri)
20977 : {
20978 : /* Double-word spills from general registers to non-offsettable memory
20979 : references (zero-extended addresses) require special handling. */
20980 385297044 : if (TARGET_64BIT
20981 332439219 : && MEM_P (x)
20982 180672577 : && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20983 19037387 : && INTEGER_CLASS_P (rclass)
20984 388050155 : && !offsettable_memref_p (x))
20985 : {
20986 2579562 : sri->icode = (in_p
20987 1289781 : ? CODE_FOR_reload_noff_load
20988 : : CODE_FOR_reload_noff_store);
20989 : /* Add the cost of moving address to a temporary. */
20990 1289781 : sri->extra_cost = 1;
20991 :
20992 1289781 : return NO_REGS;
20993 : }
20994 :
20995 : /* QImode spills from non-QI registers require
20996 : intermediate register on 32bit targets. */
20997 384007263 : if (mode == QImode
20998 384007263 : && ((!TARGET_64BIT && !in_p
20999 578684 : && INTEGER_CLASS_P (rclass)
21000 578640 : && MAYBE_NON_Q_CLASS_P (rclass))
21001 22792888 : || (!TARGET_AVX512DQ
21002 22593980 : && MAYBE_MASK_CLASS_P (rclass))))
21003 : {
21004 6470 : int regno = true_regnum (x);
21005 :
21006 : /* Return Q_REGS if the operand is in memory. */
21007 6470 : if (regno == -1)
21008 : return Q_REGS;
21009 :
21010 : return NO_REGS;
21011 : }
21012 :
21013 : /* Require movement to gpr, and then store to memory. */
21014 384000793 : if ((mode == HFmode || mode == HImode || mode == V2QImode
21015 : || mode == BFmode)
21016 3838280 : && !TARGET_SSE4_1
21017 3236284 : && SSE_CLASS_P (rclass)
21018 225316 : && !in_p && MEM_P (x))
21019 : {
21020 115536 : sri->extra_cost = 1;
21021 115536 : return GENERAL_REGS;
21022 : }
21023 :
21024 : /* This condition handles corner case where an expression involving
21025 : pointers gets vectorized. We're trying to use the address of a
21026 : stack slot as a vector initializer.
21027 :
21028 : (set (reg:V2DI 74 [ vect_cst_.2 ])
21029 : (vec_duplicate:V2DI (reg/f:DI 20 frame)))
21030 :
21031 : Eventually frame gets turned into sp+offset like this:
21032 :
21033 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21034 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21035 : (const_int 392 [0x188]))))
21036 :
21037 : That later gets turned into:
21038 :
21039 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21040 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21041 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
21042 :
21043 : We'll have the following reload recorded:
21044 :
21045 : Reload 0: reload_in (DI) =
21046 : (plus:DI (reg/f:DI 7 sp)
21047 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
21048 : reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21049 : SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
21050 : reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
21051 : reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21052 : reload_reg_rtx: (reg:V2DI 22 xmm1)
21053 :
21054 : Which isn't going to work since SSE instructions can't handle scalar
21055 : additions. Returning GENERAL_REGS forces the addition into integer
21056 : register and reload can handle subsequent reloads without problems. */
21057 :
21058 221647070 : if (in_p && GET_CODE (x) == PLUS
21059 2 : && SSE_CLASS_P (rclass)
21060 383885257 : && SCALAR_INT_MODE_P (mode))
21061 : return GENERAL_REGS;
21062 :
21063 : return NO_REGS;
21064 : }
21065 :
21066 : /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
21067 :
21068 : static bool
21069 717454158 : ix86_class_likely_spilled_p (reg_class_t rclass)
21070 : {
21071 707406620 : switch (rclass)
21072 : {
21073 : case AREG:
21074 : case DREG:
21075 : case CREG:
21076 : case BREG:
21077 : case AD_REGS:
21078 : case SIREG:
21079 : case DIREG:
21080 : case SSE_FIRST_REG:
21081 : case FP_TOP_REG:
21082 : case FP_SECOND_REG:
21083 : return true;
21084 :
21085 685923854 : default:
21086 685923854 : break;
21087 : }
21088 :
21089 685923854 : return false;
21090 : }
21091 :
21092 : /* Implement TARGET_CALLEE_SAVE_COST. */
21093 :
21094 : static int
21095 82460410 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
21096 : unsigned int, int mem_cost, const HARD_REG_SET &, bool)
21097 : {
21098 : /* Account for the fact that push and pop are shorter and do their
21099 : own allocation and deallocation. */
21100 82460410 : if (GENERAL_REGNO_P (hard_regno))
21101 : {
21102 : /* push is 1 byte while typical spill is 4-5 bytes.
21103 : ??? We probably should adjust size costs accordingly.
21104 : Costs are relative to reg-reg move that has 2 bytes for 32bit
21105 : and 3 bytes otherwise. Be sure that no cost table sets cost
21106 : to 2, so we end up with 0. */
21107 82451152 : if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
21108 3580312 : return 1;
21109 78870840 : return mem_cost - 2;
21110 : }
21111 : return mem_cost;
21112 : }
21113 :
21114 : /* Return true if a set of DST by the expression SRC should be allowed.
21115 : This prevents complex sets of likely_spilled hard regs before split1. */
21116 :
21117 : bool
21118 627123398 : ix86_hardreg_mov_ok (rtx dst, rtx src)
21119 : {
21120 : /* Avoid complex sets of likely_spilled hard registers before reload. */
21121 511062628 : if (REG_P (dst) && HARD_REGISTER_P (dst)
21122 301013308 : && !REG_P (src) && !MEM_P (src)
21123 94832294 : && !(VECTOR_MODE_P (GET_MODE (dst))
21124 94832294 : ? standard_sse_constant_p (src, GET_MODE (dst))
21125 47182472 : : x86_64_immediate_operand (src, GET_MODE (dst)))
21126 10047538 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
21127 635933063 : && ix86_pre_reload_split ())
21128 : return false;
21129 : return true;
21130 : }
21131 :
21132 : /* If we are copying between registers from different register sets
21133 : (e.g. FP and integer), we may need a memory location.
21134 :
21135 : The function can't work reliably when one of the CLASSES is a class
21136 : containing registers from multiple sets. We avoid this by never combining
21137 : different sets in a single alternative in the machine description.
21138 : Ensure that this constraint holds to avoid unexpected surprises.
21139 :
21140 : When STRICT is false, we are being called from REGISTER_MOVE_COST,
21141 : so do not enforce these sanity checks.
21142 :
21143 : To optimize register_move_cost performance, define inline variant. */
21144 :
21145 : static inline bool
21146 5614218433 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21147 : reg_class_t class2, int strict)
21148 : {
21149 5614218433 : if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
21150 : return false;
21151 :
21152 5581896566 : if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21153 4756819266 : || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21154 4062149458 : || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21155 3875740156 : || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21156 3699329781 : || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21157 3699329781 : || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
21158 3699329781 : || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
21159 9113123377 : || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
21160 : {
21161 2210853143 : gcc_assert (!strict || lra_in_progress);
21162 : return true;
21163 : }
21164 :
21165 3371043423 : if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21166 : return true;
21167 :
21168 : /* ??? This is a lie. We do have moves between mmx/general, and for
21169 : mmx/sse2. But by saying we need secondary memory we discourage the
21170 : register allocator from using the mmx registers unless needed. */
21171 3223626683 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21172 : return true;
21173 :
21174 : /* Between mask and general, we have moves no larger than word size. */
21175 3128456624 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21176 : {
21177 2592574 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
21178 3390990 : || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21179 190973 : return true;
21180 : }
21181 :
21182 3128265651 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21183 : {
21184 : /* SSE1 doesn't have any direct moves from other classes. */
21185 679554046 : if (!TARGET_SSE2)
21186 : return true;
21187 :
21188 676943508 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
21189 : return true;
21190 :
21191 : /* If the target says that inter-unit moves are more expensive
21192 : than moving through memory, then don't generate them. */
21193 1014969440 : if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
21194 1014483660 : || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
21195 1320929 : return true;
21196 :
21197 : /* With SSE4.1, *mov{ti,di}_internal supports moves between
21198 : SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
21199 675622579 : if (TARGET_SSE4_1
21200 36340249 : && (TARGET_64BIT ? mode == TImode : mode == DImode))
21201 : return false;
21202 :
21203 674047317 : int msize = GET_MODE_SIZE (mode);
21204 :
21205 : /* Between SSE and general, we have moves no larger than word size. */
21206 690416569 : if (msize > UNITS_PER_WORD)
21207 : return true;
21208 :
21209 : /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
21210 : Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
21211 582991166 : int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
21212 :
21213 582991166 : if (msize < minsize)
21214 : return true;
21215 : }
21216 :
21217 : return false;
21218 : }
21219 :
21220 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21221 :
21222 : static bool
21223 71021878 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21224 : reg_class_t class2)
21225 : {
21226 71021878 : return inline_secondary_memory_needed (mode, class1, class2, true);
21227 : }
21228 :
21229 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21230 :
21231 : get_secondary_mem widens integral modes to BITS_PER_WORD.
21232 : There is no need to emit full 64 bit move on 64 bit targets
21233 : for integral modes that can be moved using 32 bit move. */
21234 :
21235 : static machine_mode
21236 13204 : ix86_secondary_memory_needed_mode (machine_mode mode)
21237 : {
21238 26408 : if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21239 17 : return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21240 : return mode;
21241 : }
21242 :
21243 : /* Implement the TARGET_CLASS_MAX_NREGS hook.
21244 :
21245 : On the 80386, this is the size of MODE in words,
21246 : except in the FP regs, where a single reg is always enough. */
21247 :
21248 : static unsigned char
21249 5897988628 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21250 : {
21251 5897988628 : if (MAYBE_INTEGER_CLASS_P (rclass))
21252 : {
21253 3967247858 : if (mode == XFmode)
21254 144229809 : return (TARGET_64BIT ? 2 : 3);
21255 3823018049 : else if (mode == XCmode)
21256 144229440 : return (TARGET_64BIT ? 4 : 6);
21257 : else
21258 7463522693 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21259 : }
21260 : else
21261 : {
21262 1930740770 : if (COMPLEX_MODE_P (mode))
21263 : return 2;
21264 : else
21265 1649103179 : return 1;
21266 : }
21267 : }
21268 :
21269 : /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21270 :
21271 : static bool
21272 40241722 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
21273 : reg_class_t regclass)
21274 : {
21275 40241722 : if (from == to)
21276 : return true;
21277 :
21278 : /* x87 registers can't do subreg at all, as all values are reformatted
21279 : to extended precision.
21280 :
21281 : ??? middle-end queries mode changes for ALL_REGS and this makes
21282 : vec_series_lowpart_p to always return false. We probably should
21283 : restrict this to modes supported by i387 and check if it is enabled. */
21284 38873005 : if (MAYBE_FLOAT_CLASS_P (regclass))
21285 : return false;
21286 :
21287 34272359 : if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21288 : {
21289 : /* Vector registers do not support QI or HImode loads. If we don't
21290 : disallow a change to these modes, reload will assume it's ok to
21291 : drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21292 : the vec_dupv4hi pattern.
21293 : NB: SSE2 can load 16bit data to sse register via pinsrw. */
21294 16996021 : int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21295 16996021 : if (GET_MODE_SIZE (from) < mov_size
21296 33991771 : || GET_MODE_SIZE (to) < mov_size)
21297 : return false;
21298 : }
21299 :
21300 : return true;
21301 : }
21302 :
21303 : /* Return index of MODE in the sse load/store tables. */
21304 :
21305 : static inline int
21306 764221115 : sse_store_index (machine_mode mode)
21307 : {
21308 : /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21309 : costs to processor_costs, which requires changes to all entries in
21310 : processor cost table. */
21311 764221115 : if (mode == E_HFmode)
21312 135477880 : mode = E_SFmode;
21313 :
21314 1528442230 : switch (GET_MODE_SIZE (mode))
21315 : {
21316 : case 4:
21317 : return 0;
21318 : case 8:
21319 : return 1;
21320 : case 16:
21321 : return 2;
21322 : case 32:
21323 : return 3;
21324 : case 64:
21325 : return 4;
21326 : default:
21327 : return -1;
21328 : }
21329 : }
21330 :
21331 : /* Return the cost of moving data of mode M between a
21332 : register and memory. A value of 2 is the default; this cost is
21333 : relative to those in `REGISTER_MOVE_COST'.
21334 :
21335 : This function is used extensively by register_move_cost that is used to
21336 : build tables at startup. Make it inline in this case.
21337 : When IN is 2, return maximum of in and out move cost.
21338 :
21339 : If moving between registers and memory is more expensive than
21340 : between two registers, you should define this macro to express the
21341 : relative cost.
21342 :
21343 : Model also increased moving costs of QImode registers in non
21344 : Q_REGS classes.
21345 : */
21346 : static inline int
21347 6831953271 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21348 : {
21349 6831953271 : int cost;
21350 :
21351 6831953271 : if (FLOAT_CLASS_P (regclass))
21352 : {
21353 348903607 : int index;
21354 348903607 : switch (mode)
21355 : {
21356 : case E_SFmode:
21357 : index = 0;
21358 : break;
21359 : case E_DFmode:
21360 : index = 1;
21361 : break;
21362 : case E_XFmode:
21363 : index = 2;
21364 : break;
21365 : default:
21366 : return 100;
21367 : }
21368 104292665 : if (in == 2)
21369 100400817 : return MAX (ix86_cost->hard_register.fp_load [index],
21370 : ix86_cost->hard_register.fp_store [index]);
21371 3891848 : return in ? ix86_cost->hard_register.fp_load [index]
21372 3891848 : : ix86_cost->hard_register.fp_store [index];
21373 : }
21374 6483049664 : if (SSE_CLASS_P (regclass))
21375 : {
21376 637152245 : int index = sse_store_index (mode);
21377 637152245 : if (index == -1)
21378 : return 100;
21379 554114731 : if (in == 2)
21380 392691617 : return MAX (ix86_cost->hard_register.sse_load [index],
21381 : ix86_cost->hard_register.sse_store [index]);
21382 161423114 : return in ? ix86_cost->hard_register.sse_load [index]
21383 161423114 : : ix86_cost->hard_register.sse_store [index];
21384 : }
21385 5845897419 : if (MASK_CLASS_P (regclass))
21386 : {
21387 106984702 : int index;
21388 213969404 : switch (GET_MODE_SIZE (mode))
21389 : {
21390 : case 1:
21391 : index = 0;
21392 : break;
21393 8827357 : case 2:
21394 8827357 : index = 1;
21395 8827357 : break;
21396 : /* DImode loads and stores assumed to cost the same as SImode. */
21397 39703401 : case 4:
21398 39703401 : case 8:
21399 39703401 : index = 2;
21400 39703401 : break;
21401 : default:
21402 : return 100;
21403 : }
21404 :
21405 52065790 : if (in == 2)
21406 579259 : return MAX (ix86_cost->hard_register.mask_load[index],
21407 : ix86_cost->hard_register.mask_store[index]);
21408 51486531 : return in ? ix86_cost->hard_register.mask_load[2]
21409 51486531 : : ix86_cost->hard_register.mask_store[2];
21410 : }
21411 5738912717 : if (MMX_CLASS_P (regclass))
21412 : {
21413 170351813 : int index;
21414 340703626 : switch (GET_MODE_SIZE (mode))
21415 : {
21416 : case 4:
21417 : index = 0;
21418 : break;
21419 99978349 : case 8:
21420 99978349 : index = 1;
21421 99978349 : break;
21422 : default:
21423 : return 100;
21424 : }
21425 136885601 : if (in == 2)
21426 117149095 : return MAX (ix86_cost->hard_register.mmx_load [index],
21427 : ix86_cost->hard_register.mmx_store [index]);
21428 19736506 : return in ? ix86_cost->hard_register.mmx_load [index]
21429 19736506 : : ix86_cost->hard_register.mmx_store [index];
21430 : }
21431 11137121808 : switch (GET_MODE_SIZE (mode))
21432 : {
21433 123270431 : case 1:
21434 123270431 : if (Q_CLASS_P (regclass) || TARGET_64BIT)
21435 : {
21436 120642049 : if (!in)
21437 19353032 : return ix86_cost->hard_register.int_store[0];
21438 101289017 : if (TARGET_PARTIAL_REG_DEPENDENCY
21439 101289017 : && optimize_function_for_speed_p (cfun))
21440 94423813 : cost = ix86_cost->hard_register.movzbl_load;
21441 : else
21442 6865204 : cost = ix86_cost->hard_register.int_load[0];
21443 101289017 : if (in == 2)
21444 81906760 : return MAX (cost, ix86_cost->hard_register.int_store[0]);
21445 : return cost;
21446 : }
21447 : else
21448 : {
21449 2628382 : if (in == 2)
21450 1861370 : return MAX (ix86_cost->hard_register.movzbl_load,
21451 : ix86_cost->hard_register.int_store[0] + 4);
21452 767012 : if (in)
21453 383560 : return ix86_cost->hard_register.movzbl_load;
21454 : else
21455 383452 : return ix86_cost->hard_register.int_store[0] + 4;
21456 : }
21457 636302905 : break;
21458 636302905 : case 2:
21459 636302905 : {
21460 636302905 : int cost;
21461 636302905 : if (in == 2)
21462 537599674 : cost = MAX (ix86_cost->hard_register.int_load[1],
21463 : ix86_cost->hard_register.int_store[1]);
21464 : else
21465 98703231 : cost = in ? ix86_cost->hard_register.int_load[1]
21466 : : ix86_cost->hard_register.int_store[1];
21467 :
21468 636302905 : if (mode == E_HFmode)
21469 : {
21470 : /* Prefer SSE over GPR for HFmode. */
21471 123300536 : int sse_cost;
21472 123300536 : int index = sse_store_index (mode);
21473 123300536 : if (in == 2)
21474 113432232 : sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21475 : ix86_cost->hard_register.sse_store[index]);
21476 : else
21477 19736608 : sse_cost = (in
21478 9868304 : ? ix86_cost->hard_register.sse_load [index]
21479 : : ix86_cost->hard_register.sse_store [index]);
21480 123300536 : if (sse_cost >= cost)
21481 123300536 : cost = sse_cost + 1;
21482 : }
21483 : return cost;
21484 : }
21485 4808987568 : default:
21486 4808987568 : if (in == 2)
21487 3723091082 : cost = MAX (ix86_cost->hard_register.int_load[2],
21488 : ix86_cost->hard_register.int_store[2]);
21489 1085896486 : else if (in)
21490 543143176 : cost = ix86_cost->hard_register.int_load[2];
21491 : else
21492 542753310 : cost = ix86_cost->hard_register.int_store[2];
21493 : /* Multiply with the number of GPR moves needed. */
21494 9736916766 : return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21495 : }
21496 : }
21497 :
21498 : static int
21499 1756349345 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21500 : {
21501 2634193829 : return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
21502 : }
21503 :
21504 :
21505 : /* Return the cost of moving data from a register in class CLASS1 to
21506 : one in class CLASS2.
21507 :
21508 : It is not required that the cost always equal 2 when FROM is the same as TO;
21509 : on some machines it is expensive to move between registers if they are not
21510 : general registers. */
21511 :
21512 : static int
21513 5543196555 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21514 : reg_class_t class2_i)
21515 : {
21516 5543196555 : enum reg_class class1 = (enum reg_class) class1_i;
21517 5543196555 : enum reg_class class2 = (enum reg_class) class2_i;
21518 :
21519 : /* In case we require secondary memory, compute cost of the store followed
21520 : by load. In order to avoid bad register allocation choices, we need
21521 : for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21522 :
21523 5543196555 : if (inline_secondary_memory_needed (mode, class1, class2, false))
21524 : {
21525 2537801963 : int cost = 1;
21526 :
21527 2537801963 : cost += inline_memory_move_cost (mode, class1, 2);
21528 2537801963 : cost += inline_memory_move_cost (mode, class2, 2);
21529 :
21530 : /* In case of copying from general_purpose_register we may emit multiple
21531 : stores followed by single load causing memory size mismatch stall.
21532 : Count this as arbitrarily high cost of 20. */
21533 5075603926 : if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21534 759980730 : && TARGET_MEMORY_MISMATCH_STALL
21535 4057763423 : && targetm.class_max_nregs (class1, mode)
21536 759980730 : > targetm.class_max_nregs (class2, mode))
21537 144628567 : cost += 20;
21538 :
21539 : /* In the case of FP/MMX moves, the registers actually overlap, and we
21540 : have to switch modes in order to treat them differently. */
21541 58574584 : if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21542 2587151886 : || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21543 18449322 : cost += 20;
21544 :
21545 2537801963 : return cost;
21546 : }
21547 :
21548 : /* Moves between MMX and non-MMX units require secondary memory. */
21549 3005394592 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21550 0 : gcc_unreachable ();
21551 :
21552 3005394592 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21553 575202598 : return (SSE_CLASS_P (class1)
21554 575202598 : ? ix86_cost->hard_register.sse_to_integer
21555 575202598 : : ix86_cost->hard_register.integer_to_sse);
21556 :
21557 : /* Moves between mask register and GPR. */
21558 2430191994 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21559 : {
21560 1046453 : return (MASK_CLASS_P (class1)
21561 1046453 : ? ix86_cost->hard_register.mask_to_integer
21562 1046453 : : ix86_cost->hard_register.integer_to_mask);
21563 : }
21564 : /* Moving between mask registers. */
21565 2429145541 : if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21566 100488 : return ix86_cost->hard_register.mask_move;
21567 :
21568 2429045053 : if (MAYBE_FLOAT_CLASS_P (class1))
21569 11660278 : return ix86_cost->hard_register.fp_move;
21570 2417384775 : if (MAYBE_SSE_CLASS_P (class1))
21571 : {
21572 226916320 : if (GET_MODE_BITSIZE (mode) <= 128)
21573 111005008 : return ix86_cost->hard_register.xmm_move;
21574 4906304 : if (GET_MODE_BITSIZE (mode) <= 256)
21575 1559147 : return ix86_cost->hard_register.ymm_move;
21576 894005 : return ix86_cost->hard_register.zmm_move;
21577 : }
21578 2303926615 : if (MAYBE_MMX_CLASS_P (class1))
21579 2145324 : return ix86_cost->hard_register.mmx_move;
21580 : return 2;
21581 : }
21582 :
21583 : /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21584 : words of a value of mode MODE but can be less for certain modes in
21585 : special long registers.
21586 :
21587 : Actually there are no two word move instructions for consecutive
21588 : registers. And only registers 0-3 may have mov byte instructions
21589 : applied to them. */
21590 :
21591 : static unsigned int
21592 8833761984 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21593 : {
21594 8833761984 : if (GENERAL_REGNO_P (regno))
21595 : {
21596 3072612864 : if (mode == XFmode)
21597 25257664 : return TARGET_64BIT ? 2 : 3;
21598 3047833728 : if (mode == XCmode)
21599 25257664 : return TARGET_64BIT ? 4 : 6;
21600 6104489600 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21601 : }
21602 5761149120 : if (COMPLEX_MODE_P (mode))
21603 : return 2;
21604 : /* Register pair for mask registers. */
21605 5017775040 : if (mode == P2QImode || mode == P2HImode)
21606 92921760 : return 2;
21607 :
21608 : return 1;
21609 : }
21610 :
21611 : /* Implement REGMODE_NATURAL_SIZE(MODE). */
21612 : unsigned int
21613 108004041 : ix86_regmode_natural_size (machine_mode mode)
21614 : {
21615 108004041 : if (mode == P2HImode || mode == P2QImode)
21616 2462 : return GET_MODE_SIZE (mode) / 2;
21617 108002810 : return UNITS_PER_WORD;
21618 : }
21619 :
21620 : /* Implement TARGET_HARD_REGNO_MODE_OK. */
21621 :
21622 : static bool
21623 53725774225 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21624 : {
21625 : /* Flags and only flags can only hold CCmode values. */
21626 53725774225 : if (CC_REGNO_P (regno))
21627 428225348 : return GET_MODE_CLASS (mode) == MODE_CC;
21628 53297548877 : if (GET_MODE_CLASS (mode) == MODE_CC
21629 : || GET_MODE_CLASS (mode) == MODE_RANDOM)
21630 : return false;
21631 47818087581 : if (STACK_REGNO_P (regno))
21632 4652460066 : return VALID_FP_MODE_P (mode);
21633 43165627515 : if (MASK_REGNO_P (regno))
21634 : {
21635 : /* Register pair only starts at even register number. */
21636 3639747288 : if ((mode == P2QImode || mode == P2HImode))
21637 50741108 : return MASK_PAIR_REGNO_P(regno);
21638 :
21639 998851804 : return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21640 4567544573 : || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21641 : }
21642 :
21643 39525880227 : if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21644 : return false;
21645 :
21646 38556663157 : if (SSE_REGNO_P (regno))
21647 : {
21648 : /* We implement the move patterns for all vector modes into and
21649 : out of SSE registers, even when no operation instructions
21650 : are available. */
21651 :
21652 : /* For AVX-512 we allow, regardless of regno:
21653 : - XI mode
21654 : - any of 512-bit wide vector mode
21655 : - any scalar mode. */
21656 16681196506 : if (TARGET_AVX512F
21657 : && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21658 : || VALID_AVX512F_SCALAR_MODE (mode)))
21659 : return true;
21660 :
21661 : /* TODO check for QI/HI scalars. */
21662 : /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21663 15992163505 : if (TARGET_AVX512VL
21664 1746674171 : && (VALID_AVX256_REG_OR_OI_MODE (mode)
21665 1534954810 : || VALID_AVX512VL_128_REG_MODE (mode)))
21666 : return true;
21667 :
21668 : /* xmm16-xmm31 are only available for AVX-512. */
21669 15546765805 : if (EXT_REX_SSE_REGNO_P (regno))
21670 : return false;
21671 :
21672 : /* OImode and AVX modes are available only when AVX is enabled. */
21673 8986542376 : return ((TARGET_AVX
21674 1919528172 : && VALID_AVX256_REG_OR_OI_MODE (mode))
21675 : || VALID_SSE_REG_MODE (mode)
21676 : || VALID_SSE2_REG_MODE (mode)
21677 : || VALID_MMX_REG_MODE (mode)
21678 8986542376 : || VALID_MMX_REG_MODE_3DNOW (mode));
21679 : }
21680 21875466651 : if (MMX_REGNO_P (regno))
21681 : {
21682 : /* We implement the move patterns for 3DNOW modes even in MMX mode,
21683 : so if the register is available at all, then we can move data of
21684 : the given mode into or out of it. */
21685 3901281476 : return (VALID_MMX_REG_MODE (mode)
21686 : || VALID_MMX_REG_MODE_3DNOW (mode));
21687 : }
21688 :
21689 17974185175 : if (mode == QImode)
21690 : {
21691 : /* Take care for QImode values - they can be in non-QI regs,
21692 : but then they do cause partial register stalls. */
21693 204675969 : if (ANY_QI_REGNO_P (regno))
21694 : return true;
21695 14212383 : if (!TARGET_PARTIAL_REG_STALL)
21696 : return true;
21697 : /* LRA checks if the hard register is OK for the given mode.
21698 : QImode values can live in non-QI regs, so we allow all
21699 : registers here. */
21700 0 : if (lra_in_progress)
21701 : return true;
21702 0 : return !can_create_pseudo_p ();
21703 : }
21704 : /* We handle both integer and floats in the general purpose registers. */
21705 17769509206 : else if (VALID_INT_MODE_P (mode)
21706 12996924945 : || VALID_FP_MODE_P (mode))
21707 : return true;
21708 : /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21709 : on to use that value in smaller contexts, this can easily force a
21710 : pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21711 : supporting DImode, allow it. */
21712 11949759850 : else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21713 : return true;
21714 :
21715 : return false;
21716 : }
21717 :
21718 : /* Implement TARGET_INSN_CALLEE_ABI. */
21719 :
21720 : const predefined_function_abi &
21721 247898082 : ix86_insn_callee_abi (const rtx_insn *insn)
21722 : {
21723 247898082 : unsigned int abi_id = 0;
21724 247898082 : rtx pat = PATTERN (insn);
21725 247898082 : if (vzeroupper_pattern (pat, VOIDmode))
21726 409965 : abi_id = ABI_VZEROUPPER;
21727 :
21728 247898082 : return function_abis[abi_id];
21729 : }
21730 :
21731 : /* Initialize function_abis with corresponding abi_id,
21732 : currently only handle vzeroupper. */
21733 : void
21734 22602 : ix86_initialize_callee_abi (unsigned int abi_id)
21735 : {
21736 22602 : gcc_assert (abi_id == ABI_VZEROUPPER);
21737 22602 : predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21738 22602 : if (!vzeroupper_abi.initialized_p ())
21739 : {
21740 : HARD_REG_SET full_reg_clobbers;
21741 4264 : CLEAR_HARD_REG_SET (full_reg_clobbers);
21742 4264 : vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21743 : }
21744 22602 : }
21745 :
21746 : void
21747 22602 : ix86_expand_avx_vzeroupper (void)
21748 : {
21749 : /* Initialize vzeroupper_abi here. */
21750 22602 : ix86_initialize_callee_abi (ABI_VZEROUPPER);
21751 22602 : rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21752 : /* Return false for non-local goto in can_nonlocal_goto. */
21753 22602 : make_reg_eh_region_note (insn, 0, INT_MIN);
21754 : /* Flag used for call_insn indicates it's a fake call. */
21755 22602 : RTX_FLAG (insn, used) = 1;
21756 22602 : }
21757 :
21758 :
21759 : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21760 : saves SSE registers across calls is Win64 (thus no need to check the
21761 : current ABI here), and with AVX enabled Win64 only guarantees that
21762 : the low 16 bytes are saved. */
21763 :
21764 : static bool
21765 2024890971 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21766 : machine_mode mode)
21767 : {
21768 : /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21769 2024890971 : if (abi_id == ABI_VZEROUPPER)
21770 30828658 : return (GET_MODE_SIZE (mode) > 16
21771 30828658 : && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21772 4717624 : || LEGACY_SSE_REGNO_P (regno)));
21773 :
21774 2624894017 : return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21775 : }
21776 :
21777 : /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21778 : tieable integer mode. */
21779 :
21780 : static bool
21781 51890441 : ix86_tieable_integer_mode_p (machine_mode mode)
21782 : {
21783 51890441 : switch (mode)
21784 : {
21785 : case E_HImode:
21786 : case E_SImode:
21787 : return true;
21788 :
21789 5227234 : case E_QImode:
21790 5227234 : return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21791 :
21792 10191038 : case E_DImode:
21793 10191038 : return TARGET_64BIT;
21794 :
21795 : default:
21796 : return false;
21797 : }
21798 : }
21799 :
21800 : /* Implement TARGET_MODES_TIEABLE_P.
21801 :
21802 : Return true if MODE1 is accessible in a register that can hold MODE2
21803 : without copying. That is, all register classes that can hold MODE2
21804 : can also hold MODE1. */
21805 :
21806 : static bool
21807 33590985 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21808 : {
21809 33590985 : if (mode1 == mode2)
21810 : return true;
21811 :
21812 33504546 : if (ix86_tieable_integer_mode_p (mode1)
21813 33504546 : && ix86_tieable_integer_mode_p (mode2))
21814 : return true;
21815 :
21816 : /* MODE2 being XFmode implies fp stack or general regs, which means we
21817 : can tie any smaller floating point modes to it. Note that we do not
21818 : tie this with TFmode. */
21819 24556197 : if (mode2 == XFmode)
21820 4310 : return mode1 == SFmode || mode1 == DFmode;
21821 :
21822 : /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21823 : that we can tie it with SFmode. */
21824 24551887 : if (mode2 == DFmode)
21825 250073 : return mode1 == SFmode;
21826 :
21827 : /* If MODE2 is only appropriate for an SSE register, then tie with
21828 : any vector modes or scalar floating point modes acceptable to SSE
21829 : registers, excluding scalar integer modes with SUBREG:
21830 : (subreg:QI (reg:TI 99) 0))
21831 : (subreg:HI (reg:TI 99) 0))
21832 : (subreg:SI (reg:TI 99) 0))
21833 : (subreg:DI (reg:TI 99) 0))
21834 : to avoid unnecessary move from SSE register to integer register.
21835 : */
21836 24301814 : if (GET_MODE_SIZE (mode2) >= 16
21837 38058948 : && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
21838 13425914 : || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
21839 481832 : && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
21840 30145701 : && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21841 5415307 : return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
21842 :
21843 : /* If MODE2 is appropriate for an MMX register, then tie
21844 : with any other mode acceptable to MMX registers. */
21845 18886507 : if (GET_MODE_SIZE (mode2) == 8
21846 18886507 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
21847 3276884 : return (GET_MODE_SIZE (mode1) == 8
21848 3276884 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
21849 :
21850 : /* SCmode and DImode can be tied. */
21851 15609623 : if ((mode1 == E_SCmode && mode2 == E_DImode)
21852 15609623 : || (mode1 == E_DImode && mode2 == E_SCmode))
21853 108 : return TARGET_64BIT;
21854 :
21855 : /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21856 15609515 : if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21857 15609515 : || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21858 15609515 : || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21859 15609515 : || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21860 0 : return true;
21861 :
21862 : return false;
21863 : }
21864 :
21865 : /* Return the cost of moving between two registers of mode MODE. */
21866 :
21867 : static int
21868 28921501 : ix86_set_reg_reg_cost (machine_mode mode)
21869 : {
21870 28921501 : unsigned int units = UNITS_PER_WORD;
21871 :
21872 28921501 : switch (GET_MODE_CLASS (mode))
21873 : {
21874 : default:
21875 : break;
21876 :
21877 : case MODE_CC:
21878 28921501 : units = GET_MODE_SIZE (CCmode);
21879 : break;
21880 :
21881 1154795 : case MODE_FLOAT:
21882 1154795 : if ((TARGET_SSE && mode == TFmode)
21883 676920 : || (TARGET_80387 && mode == XFmode)
21884 209399 : || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21885 141798 : || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21886 2279732 : units = GET_MODE_SIZE (mode);
21887 : break;
21888 :
21889 1291918 : case MODE_COMPLEX_FLOAT:
21890 1291918 : if ((TARGET_SSE && mode == TCmode)
21891 866208 : || (TARGET_80387 && mode == XCmode)
21892 440378 : || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21893 14520 : || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21894 2577352 : units = GET_MODE_SIZE (mode);
21895 : break;
21896 :
21897 18486096 : case MODE_VECTOR_INT:
21898 18486096 : case MODE_VECTOR_FLOAT:
21899 18486096 : if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
21900 18390787 : || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21901 18220043 : || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21902 15623537 : || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21903 14333802 : || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21904 14288292 : && VALID_MMX_REG_MODE (mode)))
21905 8411366 : units = GET_MODE_SIZE (mode);
21906 : }
21907 :
21908 : /* Return the cost of moving between two registers of mode MODE,
21909 : assuming that the move will be in pieces of at most UNITS bytes. */
21910 28921501 : return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21911 : }
21912 :
21913 : /* Return cost of vector operation in MODE given that scalar version has
21914 : COST. */
21915 :
21916 : static int
21917 2797768689 : ix86_vec_cost (machine_mode mode, int cost)
21918 : {
21919 2797768689 : if (!VECTOR_MODE_P (mode))
21920 : return cost;
21921 :
21922 2797541187 : if (GET_MODE_BITSIZE (mode) == 128
21923 2797541187 : && TARGET_SSE_SPLIT_REGS)
21924 2862046 : return cost * GET_MODE_BITSIZE (mode) / 64;
21925 2796110164 : else if (GET_MODE_BITSIZE (mode) > 128
21926 2796110164 : && TARGET_AVX256_SPLIT_REGS)
21927 1676180 : return cost * GET_MODE_BITSIZE (mode) / 128;
21928 2795272074 : else if (GET_MODE_BITSIZE (mode) > 256
21929 2795272074 : && TARGET_AVX512_SPLIT_REGS)
21930 194508 : return cost * GET_MODE_BITSIZE (mode) / 256;
21931 : return cost;
21932 : }
21933 :
21934 : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21935 : vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21936 : static int
21937 1018 : ix86_widen_mult_cost (const struct processor_costs *cost,
21938 : enum machine_mode mode, bool uns_p)
21939 : {
21940 1018 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21941 1018 : int extra_cost = 0;
21942 1018 : int basic_cost = 0;
21943 1018 : switch (mode)
21944 : {
21945 108 : case V8HImode:
21946 108 : case V16HImode:
21947 108 : if (!uns_p || mode == V16HImode)
21948 43 : extra_cost = cost->sse_op * 2;
21949 108 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21950 108 : break;
21951 188 : case V4SImode:
21952 188 : case V8SImode:
21953 : /* pmulhw/pmullw can be used. */
21954 188 : basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21955 188 : break;
21956 659 : case V2DImode:
21957 : /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21958 : require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21959 659 : if (!TARGET_SSE4_1 && !uns_p)
21960 403 : extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
21961 403 : + cost->sse_op * 2;
21962 : /* Fallthru. */
21963 706 : case V4DImode:
21964 706 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21965 706 : break;
21966 : default:
21967 : /* Not implemented. */
21968 : return 100;
21969 : }
21970 1002 : return ix86_vec_cost (mode, basic_cost + extra_cost);
21971 : }
21972 :
21973 : /* Return cost of multiplication in MODE. */
21974 :
21975 : static int
21976 1194139846 : ix86_multiplication_cost (const struct processor_costs *cost,
21977 : enum machine_mode mode)
21978 : {
21979 1194139846 : machine_mode inner_mode = mode;
21980 1194139846 : if (VECTOR_MODE_P (mode))
21981 1193168111 : inner_mode = GET_MODE_INNER (mode);
21982 :
21983 1194139846 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
21984 723253 : return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21985 1193416593 : else if (X87_FLOAT_MODE_P (mode))
21986 162330 : return cost->fmul;
21987 1193254263 : else if (FLOAT_MODE_P (mode))
21988 213415 : return ix86_vec_cost (mode,
21989 213415 : inner_mode == DFmode ? cost->mulsd : cost->mulss);
21990 1193040848 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21991 : {
21992 1192978942 : int nmults, nops;
21993 : /* Cost of reading the memory. */
21994 1192978942 : int extra;
21995 :
21996 1192978942 : switch (mode)
21997 : {
21998 18784948 : case V4QImode:
21999 18784948 : case V8QImode:
22000 : /* Partial V*QImode is emulated with 4-6 insns. */
22001 18784948 : nmults = 1;
22002 18784948 : nops = 3;
22003 18784948 : extra = 0;
22004 :
22005 18784948 : if (TARGET_AVX512BW && TARGET_AVX512VL)
22006 : ;
22007 18676767 : else if (TARGET_AVX2)
22008 : nops += 2;
22009 18174792 : else if (TARGET_XOP)
22010 9504 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22011 : else
22012 : {
22013 18165288 : nops += 1;
22014 18165288 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22015 : }
22016 18784948 : goto do_qimode;
22017 :
22018 9393708 : case V16QImode:
22019 : /* V*QImode is emulated with 4-11 insns. */
22020 9393708 : nmults = 1;
22021 9393708 : nops = 3;
22022 9393708 : extra = 0;
22023 :
22024 9393708 : if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
22025 : {
22026 303343 : if (!(TARGET_AVX512BW && TARGET_AVX512VL))
22027 249575 : nops += 3;
22028 : }
22029 9090365 : else if (TARGET_XOP)
22030 : {
22031 5200 : nmults += 1;
22032 5200 : nops += 2;
22033 5200 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22034 : }
22035 : else
22036 : {
22037 9085165 : nmults += 1;
22038 9085165 : nops += 4;
22039 9085165 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22040 : }
22041 9393708 : goto do_qimode;
22042 :
22043 9392425 : case V32QImode:
22044 9392425 : nmults = 1;
22045 9392425 : nops = 3;
22046 9392425 : extra = 0;
22047 :
22048 9392425 : if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
22049 : {
22050 9310712 : nmults += 1;
22051 9310712 : nops += 4;
22052 : /* 2 loads, so no division by 2. */
22053 9310712 : extra += COSTS_N_INSNS (cost->sse_load[3]);
22054 : }
22055 9392425 : goto do_qimode;
22056 :
22057 9391830 : case V64QImode:
22058 9391830 : nmults = 2;
22059 9391830 : nops = 9;
22060 : /* 2 loads of each size, so no division by 2. */
22061 9391830 : extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
22062 :
22063 46962911 : do_qimode:
22064 46962911 : return ix86_vec_cost (mode, cost->mulss * nmults
22065 46962911 : + cost->sse_op * nops) + extra;
22066 :
22067 40163604 : case V4SImode:
22068 : /* pmulld is used in this case. No emulation is needed. */
22069 40163604 : if (TARGET_SSE4_1)
22070 2221009 : goto do_native;
22071 : /* V4SImode is emulated with 7 insns. */
22072 : else
22073 37942595 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
22074 :
22075 162342487 : case V2DImode:
22076 162342487 : case V4DImode:
22077 : /* vpmullq is used in this case. No emulation is needed. */
22078 162342487 : if (TARGET_AVX512DQ && TARGET_AVX512VL)
22079 583938 : goto do_native;
22080 : /* V*DImode is emulated with 6-8 insns. */
22081 161758549 : else if (TARGET_XOP && mode == V2DImode)
22082 52592 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
22083 : /* FALLTHRU */
22084 242818035 : case V8DImode:
22085 : /* vpmullq is used in this case. No emulation is needed. */
22086 242818035 : if (TARGET_AVX512DQ && mode == V8DImode)
22087 383334 : goto do_native;
22088 : else
22089 242434701 : return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
22090 :
22091 865586143 : default:
22092 865586143 : do_native:
22093 865586143 : return ix86_vec_cost (mode, cost->mulss);
22094 : }
22095 : }
22096 : else
22097 123804 : return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
22098 : }
22099 :
22100 : /* Return cost of multiplication in MODE. */
22101 :
22102 : static int
22103 71755017 : ix86_division_cost (const struct processor_costs *cost,
22104 : enum machine_mode mode)
22105 : {
22106 71755017 : machine_mode inner_mode = mode;
22107 71755017 : if (VECTOR_MODE_P (mode))
22108 52948425 : inner_mode = GET_MODE_INNER (mode);
22109 :
22110 71755017 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22111 248266 : return inner_mode == DFmode ? cost->divsd : cost->divss;
22112 71506751 : else if (X87_FLOAT_MODE_P (mode))
22113 44794 : return cost->fdiv;
22114 71461957 : else if (FLOAT_MODE_P (mode))
22115 16914 : return ix86_vec_cost (mode,
22116 16914 : inner_mode == DFmode ? cost->divsd : cost->divss);
22117 : else
22118 79711580 : return cost->divide[MODE_INDEX (mode)];
22119 : }
22120 :
22121 : /* Return cost of shift in MODE.
22122 : If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
22123 : AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
22124 : if op1 is a result of subreg.
22125 :
22126 : SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
22127 :
22128 : static int
22129 766599040 : ix86_shift_rotate_cost (const struct processor_costs *cost,
22130 : enum rtx_code code,
22131 : enum machine_mode mode, bool constant_op1,
22132 : HOST_WIDE_INT op1_val,
22133 : bool and_in_op1,
22134 : bool shift_and_truncate,
22135 : bool *skip_op0, bool *skip_op1)
22136 : {
22137 766599040 : if (skip_op0)
22138 766544253 : *skip_op0 = *skip_op1 = false;
22139 :
22140 766599040 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22141 : {
22142 393815798 : int count;
22143 : /* Cost of reading the memory. */
22144 393815798 : int extra;
22145 :
22146 393815798 : switch (mode)
22147 : {
22148 5976612 : case V4QImode:
22149 5976612 : case V8QImode:
22150 5976612 : if (TARGET_AVX2)
22151 : /* Use vpbroadcast. */
22152 193964 : extra = cost->sse_op;
22153 : else
22154 5782648 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22155 :
22156 5976612 : if (constant_op1)
22157 : {
22158 5976584 : if (code == ASHIFTRT)
22159 : {
22160 40 : count = 4;
22161 40 : extra *= 2;
22162 : }
22163 : else
22164 : count = 2;
22165 : }
22166 28 : else if (TARGET_AVX512BW && TARGET_AVX512VL)
22167 28 : return ix86_vec_cost (mode, cost->sse_op * 4);
22168 0 : else if (TARGET_SSE4_1)
22169 : count = 5;
22170 0 : else if (code == ASHIFTRT)
22171 : count = 6;
22172 : else
22173 0 : count = 5;
22174 5976584 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22175 :
22176 2990974 : case V16QImode:
22177 2990974 : if (TARGET_XOP)
22178 : {
22179 : /* For XOP we use vpshab, which requires a broadcast of the
22180 : value to the variable shift insn. For constants this
22181 : means a V16Q const in mem; even when we can perform the
22182 : shift with one insn set the cost to prefer paddb. */
22183 3489 : if (constant_op1)
22184 : {
22185 2530 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22186 2530 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22187 : }
22188 : else
22189 : {
22190 959 : count = (code == ASHIFT) ? 3 : 4;
22191 959 : return ix86_vec_cost (mode, cost->sse_op * count);
22192 : }
22193 : }
22194 : /* FALLTHRU */
22195 5976272 : case V32QImode:
22196 5976272 : if (TARGET_GFNI && constant_op1)
22197 : {
22198 : /* Use vgf2p8affine. One extra load for the mask, but in a loop
22199 : with enough registers it will be moved out. So for now don't
22200 : account the constant mask load. This is not quite right
22201 : for non loop vectorization. */
22202 11327 : extra = 0;
22203 11327 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22204 : }
22205 5964945 : if (TARGET_AVX2)
22206 : /* Use vpbroadcast. */
22207 187470 : extra = cost->sse_op;
22208 : else
22209 5777475 : extra = COSTS_N_INSNS (mode == V16QImode
22210 : ? cost->sse_load[2]
22211 5777475 : : cost->sse_load[3]) / 2;
22212 :
22213 5964945 : if (constant_op1)
22214 : {
22215 5964794 : if (code == ASHIFTRT)
22216 : {
22217 106 : count = 4;
22218 106 : extra *= 2;
22219 : }
22220 : else
22221 : count = 2;
22222 : }
22223 151 : else if (TARGET_AVX512BW
22224 75 : && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22225 37 : || (mode == V16QImode && TARGET_AVX512VL
22226 37 : && !TARGET_PREFER_AVX128)))
22227 75 : return ix86_vec_cost (mode, cost->sse_op * 4);
22228 76 : else if (TARGET_AVX2
22229 0 : && mode == V16QImode && !TARGET_PREFER_AVX128)
22230 : count = 6;
22231 76 : else if (TARGET_SSE4_1)
22232 : count = 9;
22233 76 : else if (code == ASHIFTRT)
22234 : count = 10;
22235 : else
22236 76 : count = 9;
22237 5964870 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22238 :
22239 2988770 : case V64QImode:
22240 : /* Ignore the mask load for GF2P8AFFINEQB. */
22241 2988770 : extra = 0;
22242 2988770 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22243 :
22244 53877185 : case V2DImode:
22245 53877185 : case V4DImode:
22246 : /* V*DImode arithmetic right shift is emulated. */
22247 53877185 : if (code == ASHIFTRT && !TARGET_AVX512VL)
22248 : {
22249 1181 : if (constant_op1)
22250 : {
22251 479 : if (op1_val == 63)
22252 402 : count = TARGET_SSE4_2 ? 1 : 2;
22253 376 : else if (TARGET_XOP)
22254 : count = 2;
22255 77 : else if (TARGET_SSE4_1)
22256 : count = 3;
22257 : else
22258 86 : count = 4;
22259 : }
22260 702 : else if (TARGET_XOP)
22261 : count = 3;
22262 21 : else if (TARGET_SSE4_2)
22263 : count = 4;
22264 : else
22265 1181 : count = 5;
22266 :
22267 1181 : return ix86_vec_cost (mode, cost->sse_op * count);
22268 : }
22269 : /* FALLTHRU */
22270 378869474 : default:
22271 378869474 : return ix86_vec_cost (mode, cost->sse_op);
22272 : }
22273 : }
22274 :
22275 754255442 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22276 : {
22277 190792648 : if (constant_op1)
22278 : {
22279 190758198 : if (op1_val > 32)
22280 135517529 : return cost->shift_const + COSTS_N_INSNS (2);
22281 : else
22282 55240669 : return cost->shift_const * 2;
22283 : }
22284 : else
22285 : {
22286 34450 : if (and_in_op1)
22287 63 : return cost->shift_var * 2;
22288 : else
22289 34387 : return cost->shift_var * 6 + COSTS_N_INSNS (2);
22290 : }
22291 : }
22292 : else
22293 : {
22294 181990594 : if (constant_op1)
22295 181279494 : return cost->shift_const;
22296 711100 : else if (shift_and_truncate)
22297 : {
22298 22835 : if (skip_op0)
22299 22835 : *skip_op0 = *skip_op1 = true;
22300 : /* Return the cost after shift-and truncation. */
22301 22835 : return cost->shift_var;
22302 : }
22303 : else
22304 688265 : return cost->shift_var;
22305 : }
22306 : }
22307 :
22308 : static int
22309 148912208 : ix86_insn_cost (rtx_insn *insn, bool speed)
22310 : {
22311 148912208 : int insn_cost = 0;
22312 : /* Add extra cost to avoid post_reload late_combine revert
22313 : the optimization did in pass_rpad. */
22314 148912208 : if (reload_completed
22315 4645522 : && ix86_rpad_gate ()
22316 265551 : && recog_memoized (insn) >= 0
22317 149177505 : && get_attr_avx_partial_xmm_update (insn)
22318 : == AVX_PARTIAL_XMM_UPDATE_TRUE)
22319 : insn_cost += COSTS_N_INSNS (3);
22320 :
22321 148912208 : return insn_cost + pattern_cost (PATTERN (insn), speed);
22322 : }
22323 :
22324 : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22325 :
22326 : static int
22327 745487 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22328 : {
22329 745487 : if (size < 128)
22330 741997 : return cost->cvtss2sd;
22331 3490 : else if (size < 256)
22332 : {
22333 1420 : if (TARGET_SSE_SPLIT_REGS)
22334 0 : return cost->cvtss2sd * size / 64;
22335 1420 : return cost->cvtss2sd;
22336 : }
22337 2070 : if (size < 512)
22338 768 : return cost->vcvtps2pd256;
22339 : else
22340 1302 : return cost->vcvtps2pd512;
22341 : }
22342 :
22343 : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22344 :
22345 : static bool
22346 266616 : unspec_pcmp_p (rtx x)
22347 : {
22348 266616 : return GET_CODE (x) == UNSPEC
22349 266616 : && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22350 : }
22351 :
22352 : /* Compute a (partial) cost for rtx X. Return true if the complete
22353 : cost has been computed, and false if subexpressions should be
22354 : scanned. In either case, *TOTAL contains the cost result. */
22355 :
22356 : static bool
22357 7628701434 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22358 : int *total, bool speed)
22359 : {
22360 7628701434 : rtx mask;
22361 7628701434 : enum rtx_code code = GET_CODE (x);
22362 7628701434 : enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22363 4088917346 : const struct processor_costs *cost
22364 7628701434 : = speed ? ix86_tune_cost : &ix86_size_cost;
22365 7628701434 : int src_cost;
22366 :
22367 : /* Handling different vternlog variants. */
22368 7628701434 : if ((GET_MODE_SIZE (mode) == 64
22369 7628701434 : ? TARGET_AVX512F
22370 6457849688 : : (TARGET_AVX512VL
22371 6396588513 : || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22372 176704431 : && GET_MODE_SIZE (mode) >= 16
22373 119961547 : && outer_code_i == SET
22374 7675067720 : && ternlog_operand (x, mode))
22375 : {
22376 32996 : rtx args[3];
22377 :
22378 32996 : args[0] = NULL_RTX;
22379 32996 : args[1] = NULL_RTX;
22380 32996 : args[2] = NULL_RTX;
22381 32996 : int idx = ix86_ternlog_idx (x, args);
22382 32996 : gcc_assert (idx >= 0);
22383 :
22384 32996 : *total = cost->sse_op;
22385 131984 : for (int i = 0; i != 3; i++)
22386 98988 : if (args[i])
22387 70502 : *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22388 32996 : return true;
22389 : }
22390 :
22391 :
22392 7628668438 : switch (code)
22393 : {
22394 47325526 : case SET:
22395 47325526 : if (register_operand (SET_DEST (x), VOIDmode)
22396 47325526 : && register_operand (SET_SRC (x), VOIDmode))
22397 : {
22398 28921501 : *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22399 28921501 : return true;
22400 : }
22401 :
22402 18404025 : if (register_operand (SET_SRC (x), VOIDmode))
22403 : /* Avoid potentially incorrect high cost from rtx_costs
22404 : for non-tieable SUBREGs. */
22405 : src_cost = 0;
22406 : else
22407 : {
22408 15603793 : src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22409 :
22410 15603793 : if (CONSTANT_P (SET_SRC (x)))
22411 : /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22412 : a small value, possibly zero for cheap constants. */
22413 6944677 : src_cost += COSTS_N_INSNS (1);
22414 : }
22415 :
22416 18404025 : *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22417 18404025 : return true;
22418 :
22419 2810295281 : case CONST_INT:
22420 2810295281 : case CONST:
22421 2810295281 : case LABEL_REF:
22422 2810295281 : case SYMBOL_REF:
22423 2810295281 : if (x86_64_immediate_operand (x, VOIDmode))
22424 2211709327 : *total = 0;
22425 598585954 : else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22426 : /* Consider the zext constants slightly more expensive, as they
22427 : can't appear in most instructions. */
22428 27717223 : *total = 1;
22429 : else
22430 : /* movabsq is slightly more expensive than a simple instruction. */
22431 570868731 : *total = COSTS_N_INSNS (1) + 1;
22432 : return true;
22433 :
22434 7507981 : case CONST_DOUBLE:
22435 7507981 : if (IS_STACK_MODE (mode))
22436 1302681 : switch (standard_80387_constant_p (x))
22437 : {
22438 : case -1:
22439 : case 0:
22440 : break;
22441 283372 : case 1: /* 0.0 */
22442 283372 : *total = 1;
22443 283372 : return true;
22444 484787 : default: /* Other constants */
22445 484787 : *total = 2;
22446 484787 : return true;
22447 : }
22448 : /* FALLTHRU */
22449 :
22450 14265738 : case CONST_VECTOR:
22451 14265738 : switch (standard_sse_constant_p (x, mode))
22452 : {
22453 : case 0:
22454 : break;
22455 4189711 : case 1: /* 0: xor eliminates false dependency */
22456 4189711 : *total = 0;
22457 4189711 : return true;
22458 160229 : default: /* -1: cmp contains false dependency */
22459 160229 : *total = 1;
22460 160229 : return true;
22461 : }
22462 : /* FALLTHRU */
22463 :
22464 10914460 : case CONST_WIDE_INT:
22465 : /* Fall back to (MEM (SYMBOL_REF)), since that's where
22466 : it'll probably end up. Add a penalty for size. */
22467 21828920 : *total = (COSTS_N_INSNS (1)
22468 21603170 : + (!TARGET_64BIT && flag_pic)
22469 21828920 : + (GET_MODE_SIZE (mode) <= 4
22470 19061814 : ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22471 10914460 : return true;
22472 :
22473 22229051 : case ZERO_EXTEND:
22474 : /* The zero extensions is often completely free on x86_64, so make
22475 : it as cheap as possible. */
22476 22229051 : if (TARGET_64BIT && mode == DImode
22477 4978230 : && GET_MODE (XEXP (x, 0)) == SImode)
22478 3071102 : *total = 1;
22479 19157949 : else if (TARGET_ZERO_EXTEND_WITH_AND)
22480 0 : *total = cost->add;
22481 : else
22482 19157949 : *total = cost->movzx;
22483 : return false;
22484 :
22485 3054653 : case SIGN_EXTEND:
22486 3054653 : *total = cost->movsx;
22487 3054653 : return false;
22488 :
22489 630734413 : case ASHIFT:
22490 630734413 : if (SCALAR_INT_MODE_P (mode)
22491 243865004 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22492 673381207 : && CONST_INT_P (XEXP (x, 1)))
22493 : {
22494 42475211 : HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22495 42475211 : if (value == 1)
22496 : {
22497 2453935 : *total = cost->add;
22498 2453935 : return false;
22499 : }
22500 40021276 : if ((value == 2 || value == 3)
22501 4504546 : && cost->lea <= cost->shift_const)
22502 : {
22503 2123042 : *total = cost->lea;
22504 2123042 : return false;
22505 : }
22506 : }
22507 : /* FALLTHRU */
22508 :
22509 766544253 : case ROTATE:
22510 766544253 : case ASHIFTRT:
22511 766544253 : case LSHIFTRT:
22512 766544253 : case ROTATERT:
22513 766544253 : bool skip_op0, skip_op1;
22514 766544253 : *total = ix86_shift_rotate_cost (cost, code, mode,
22515 766544253 : CONSTANT_P (XEXP (x, 1)),
22516 : CONST_INT_P (XEXP (x, 1))
22517 : ? INTVAL (XEXP (x, 1)) : -1,
22518 : GET_CODE (XEXP (x, 1)) == AND,
22519 766544253 : SUBREG_P (XEXP (x, 1))
22520 766544253 : && GET_CODE (XEXP (XEXP (x, 1),
22521 : 0)) == AND,
22522 : &skip_op0, &skip_op1);
22523 766544253 : if (skip_op0 || skip_op1)
22524 : {
22525 22835 : if (!skip_op0)
22526 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22527 22835 : if (!skip_op1)
22528 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22529 22835 : return true;
22530 : }
22531 : return false;
22532 :
22533 230346 : case FMA:
22534 230346 : {
22535 230346 : rtx sub;
22536 :
22537 230346 : gcc_assert (FLOAT_MODE_P (mode));
22538 230346 : gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22539 :
22540 460692 : *total = ix86_vec_cost (mode,
22541 230346 : GET_MODE_INNER (mode) == SFmode
22542 : ? cost->fmass : cost->fmasd);
22543 230346 : *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22544 :
22545 : /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22546 230346 : sub = XEXP (x, 0);
22547 230346 : if (GET_CODE (sub) == NEG)
22548 51012 : sub = XEXP (sub, 0);
22549 230346 : *total += rtx_cost (sub, mode, FMA, 0, speed);
22550 :
22551 230346 : sub = XEXP (x, 2);
22552 230346 : if (GET_CODE (sub) == NEG)
22553 40520 : sub = XEXP (sub, 0);
22554 230346 : *total += rtx_cost (sub, mode, FMA, 2, speed);
22555 230346 : return true;
22556 : }
22557 :
22558 1739436799 : case MULT:
22559 1739436799 : if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22560 : {
22561 545476452 : rtx op0 = XEXP (x, 0);
22562 545476452 : rtx op1 = XEXP (x, 1);
22563 545476452 : int nbits;
22564 545476452 : if (CONST_INT_P (XEXP (x, 1)))
22565 : {
22566 527481392 : unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22567 1070508346 : for (nbits = 0; value != 0; value &= value - 1)
22568 543026954 : nbits++;
22569 : }
22570 : else
22571 : /* This is arbitrary. */
22572 : nbits = 7;
22573 :
22574 : /* Compute costs correctly for widening multiplication. */
22575 545476452 : if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22576 550930769 : && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22577 5454317 : == GET_MODE_SIZE (mode))
22578 : {
22579 5450247 : int is_mulwiden = 0;
22580 5450247 : machine_mode inner_mode = GET_MODE (op0);
22581 :
22582 5450247 : if (GET_CODE (op0) == GET_CODE (op1))
22583 5365936 : is_mulwiden = 1, op1 = XEXP (op1, 0);
22584 84311 : else if (CONST_INT_P (op1))
22585 : {
22586 74107 : if (GET_CODE (op0) == SIGN_EXTEND)
22587 21643 : is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22588 21643 : == INTVAL (op1);
22589 : else
22590 52464 : is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22591 : }
22592 :
22593 5440043 : if (is_mulwiden)
22594 5440043 : op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22595 : }
22596 :
22597 545476452 : int mult_init;
22598 : // Double word multiplication requires 3 mults and 2 adds.
22599 1106612010 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22600 : {
22601 328337697 : mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22602 328337697 : + 2 * cost->add;
22603 328337697 : nbits *= 3;
22604 : }
22605 374514310 : else mult_init = cost->mult_init[MODE_INDEX (mode)];
22606 :
22607 1090952904 : *total = (mult_init
22608 545476452 : + nbits * cost->mult_bit
22609 545476452 : + rtx_cost (op0, mode, outer_code, opno, speed)
22610 545476452 : + rtx_cost (op1, mode, outer_code, opno, speed));
22611 :
22612 545476452 : return true;
22613 : }
22614 1193960347 : *total = ix86_multiplication_cost (cost, mode);
22615 1193960347 : return false;
22616 :
22617 71742640 : case DIV:
22618 71742640 : case UDIV:
22619 71742640 : case MOD:
22620 71742640 : case UMOD:
22621 71742640 : *total = ix86_division_cost (cost, mode);
22622 71742640 : return false;
22623 :
22624 683061932 : case PLUS:
22625 683061932 : if (GET_MODE_CLASS (mode) == MODE_INT
22626 937951813 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22627 : {
22628 142231914 : if (GET_CODE (XEXP (x, 0)) == PLUS
22629 3916443 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22630 843528 : && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22631 843503 : && CONSTANT_P (XEXP (x, 1)))
22632 : {
22633 843446 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22634 843446 : if (val == 2 || val == 4 || val == 8)
22635 : {
22636 843342 : *total = cost->lea;
22637 843342 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22638 : outer_code, opno, speed);
22639 843342 : *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22640 : outer_code, opno, speed);
22641 843342 : *total += rtx_cost (XEXP (x, 1), mode,
22642 : outer_code, opno, speed);
22643 843342 : return true;
22644 : }
22645 : }
22646 141388468 : else if (GET_CODE (XEXP (x, 0)) == MULT
22647 51830344 : && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22648 : {
22649 51771357 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22650 51771357 : if (val == 2 || val == 4 || val == 8)
22651 : {
22652 7954980 : *total = cost->lea;
22653 7954980 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22654 : outer_code, opno, speed);
22655 7954980 : *total += rtx_cost (XEXP (x, 1), mode,
22656 : outer_code, opno, speed);
22657 7954980 : return true;
22658 : }
22659 : }
22660 89617111 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
22661 : {
22662 3072997 : rtx op = XEXP (XEXP (x, 0), 0);
22663 :
22664 : /* Add with carry, ignore the cost of adding a carry flag. */
22665 3072997 : if (ix86_carry_flag_operator (op, mode)
22666 3072997 : || ix86_carry_flag_unset_operator (op, mode))
22667 68709 : *total = cost->add;
22668 : else
22669 : {
22670 3004288 : *total = cost->lea;
22671 3004288 : *total += rtx_cost (op, mode,
22672 : outer_code, opno, speed);
22673 : }
22674 :
22675 3072997 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22676 : outer_code, opno, speed);
22677 3072997 : *total += rtx_cost (XEXP (x, 1), mode,
22678 : outer_code, opno, speed);
22679 3072997 : return true;
22680 : }
22681 : }
22682 : /* FALLTHRU */
22683 :
22684 1817104220 : case MINUS:
22685 : /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22686 1817104220 : if (GET_MODE_CLASS (mode) == MODE_INT
22687 515134833 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22688 233281601 : && GET_CODE (XEXP (x, 0)) == MINUS
22689 1817142361 : && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22690 14459 : || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22691 : {
22692 23682 : *total = cost->add;
22693 23682 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22694 : outer_code, opno, speed);
22695 23682 : *total += rtx_cost (XEXP (x, 1), mode,
22696 : outer_code, opno, speed);
22697 23682 : return true;
22698 : }
22699 :
22700 1817080538 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22701 2388753 : *total = cost->addss;
22702 1814691785 : else if (X87_FLOAT_MODE_P (mode))
22703 217772 : *total = cost->fadd;
22704 1814474013 : else if (FLOAT_MODE_P (mode))
22705 440822 : *total = ix86_vec_cost (mode, cost->addss);
22706 1814033191 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22707 1193600079 : *total = ix86_vec_cost (mode, cost->sse_op);
22708 1280154345 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22709 327411993 : *total = cost->add * 2;
22710 : else
22711 293021119 : *total = cost->add;
22712 : return false;
22713 :
22714 3983369 : case IOR:
22715 3983369 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22716 3737797 : || SSE_FLOAT_MODE_P (mode))
22717 : {
22718 : /* (ior (not ...) ...) can be a single insn in AVX512. */
22719 456 : if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22720 255190 : && (GET_MODE_SIZE (mode) == 64
22721 0 : || (TARGET_AVX512VL
22722 0 : && (GET_MODE_SIZE (mode) == 32
22723 0 : || GET_MODE_SIZE (mode) == 16))))
22724 : {
22725 0 : rtx right = GET_CODE (XEXP (x, 1)) != NOT
22726 0 : ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22727 :
22728 0 : *total = ix86_vec_cost (mode, cost->sse_op)
22729 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22730 : outer_code, opno, speed)
22731 0 : + rtx_cost (right, mode, outer_code, opno, speed);
22732 0 : return true;
22733 : }
22734 255190 : *total = ix86_vec_cost (mode, cost->sse_op);
22735 255190 : }
22736 3728179 : else if (TARGET_64BIT
22737 3439448 : && mode == TImode
22738 1692948 : && GET_CODE (XEXP (x, 0)) == ASHIFT
22739 254929 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22740 252937 : && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22741 252937 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22742 252937 : && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22743 252937 : && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22744 229424 : && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22745 : {
22746 : /* *concatditi3 is cheap. */
22747 229424 : rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22748 229424 : rtx op1 = XEXP (XEXP (x, 1), 0);
22749 1431 : *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22750 229424 : ? COSTS_N_INSNS (1) /* movq. */
22751 227993 : : set_src_cost (op0, DImode, speed);
22752 2336 : *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22753 229424 : ? COSTS_N_INSNS (1) /* movq. */
22754 227101 : : set_src_cost (op1, DImode, speed);
22755 229424 : return true;
22756 : }
22757 3498755 : else if (TARGET_64BIT
22758 3210024 : && mode == TImode
22759 1463524 : && GET_CODE (XEXP (x, 0)) == AND
22760 1402907 : && REG_P (XEXP (XEXP (x, 0), 0))
22761 1397647 : && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22762 1394984 : && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22763 1394984 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22764 910756 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22765 910756 : && GET_CODE (XEXP (x, 1)) == ASHIFT
22766 908629 : && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22767 908629 : && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22768 908629 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22769 4407384 : && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22770 : {
22771 : /* *insvti_highpart is cheap. */
22772 908629 : rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22773 908629 : *total = COSTS_N_INSNS (1) + 1;
22774 1436 : *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22775 908629 : ? COSTS_N_INSNS (1) /* movq. */
22776 907713 : : set_src_cost (op, DImode, speed);
22777 908629 : return true;
22778 : }
22779 5468983 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22780 748372 : *total = cost->add * 2;
22781 : else
22782 1841754 : *total = cost->add;
22783 : return false;
22784 :
22785 567965 : case XOR:
22786 567965 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22787 443259 : || SSE_FLOAT_MODE_P (mode))
22788 124706 : *total = ix86_vec_cost (mode, cost->sse_op);
22789 944603 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22790 16314 : *total = cost->add * 2;
22791 : else
22792 426945 : *total = cost->add;
22793 : return false;
22794 :
22795 7289662 : case AND:
22796 7289662 : if (address_no_seg_operand (x, mode))
22797 : {
22798 15582 : *total = cost->lea;
22799 15582 : return true;
22800 : }
22801 7274080 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22802 6879643 : || SSE_FLOAT_MODE_P (mode))
22803 : {
22804 : /* pandn is a single instruction. */
22805 428050 : if (GET_CODE (XEXP (x, 0)) == NOT)
22806 : {
22807 53737 : rtx right = XEXP (x, 1);
22808 :
22809 : /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22810 428 : if (GET_CODE (right) == NOT && TARGET_AVX512F
22811 53737 : && (GET_MODE_SIZE (mode) == 64
22812 0 : || (TARGET_AVX512VL
22813 0 : && (GET_MODE_SIZE (mode) == 32
22814 0 : || GET_MODE_SIZE (mode) == 16))))
22815 0 : right = XEXP (right, 0);
22816 :
22817 53737 : *total = ix86_vec_cost (mode, cost->sse_op)
22818 53737 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22819 : outer_code, opno, speed)
22820 53737 : + rtx_cost (right, mode, outer_code, opno, speed);
22821 53737 : return true;
22822 : }
22823 374313 : else if (GET_CODE (XEXP (x, 1)) == NOT)
22824 : {
22825 862 : *total = ix86_vec_cost (mode, cost->sse_op)
22826 862 : + rtx_cost (XEXP (x, 0), mode,
22827 : outer_code, opno, speed)
22828 862 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22829 : outer_code, opno, speed);
22830 862 : return true;
22831 : }
22832 373451 : *total = ix86_vec_cost (mode, cost->sse_op);
22833 373451 : }
22834 14378739 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22835 : {
22836 1138450 : if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22837 : {
22838 1670 : *total = cost->add * 2
22839 835 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22840 : outer_code, opno, speed)
22841 835 : + rtx_cost (XEXP (x, 1), mode,
22842 : outer_code, opno, speed);
22843 835 : return true;
22844 : }
22845 1137615 : else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
22846 : {
22847 0 : *total = cost->add * 2
22848 0 : + rtx_cost (XEXP (x, 0), mode,
22849 : outer_code, opno, speed)
22850 0 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22851 : outer_code, opno, speed);
22852 0 : return true;
22853 : }
22854 1137615 : *total = cost->add * 2;
22855 : }
22856 5707580 : else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22857 : {
22858 7578 : *total = cost->add
22859 3789 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22860 : outer_code, opno, speed)
22861 3789 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22862 3789 : return true;
22863 : }
22864 5703791 : else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
22865 : {
22866 112 : *total = cost->add
22867 56 : + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22868 56 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22869 : outer_code, opno, speed);
22870 56 : return true;
22871 : }
22872 : else
22873 5703735 : *total = cost->add;
22874 : return false;
22875 :
22876 498466 : case NOT:
22877 498466 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22878 : {
22879 : /* (not (xor ...)) can be a single insn in AVX512. */
22880 0 : if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
22881 6565 : && (GET_MODE_SIZE (mode) == 64
22882 0 : || (TARGET_AVX512VL
22883 0 : && (GET_MODE_SIZE (mode) == 32
22884 0 : || GET_MODE_SIZE (mode) == 16))))
22885 : {
22886 0 : *total = ix86_vec_cost (mode, cost->sse_op)
22887 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22888 : outer_code, opno, speed)
22889 0 : + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22890 : outer_code, opno, speed);
22891 0 : return true;
22892 : }
22893 :
22894 : // vnot is pxor -1.
22895 6565 : *total = ix86_vec_cost (mode, cost->sse_op) + 1;
22896 : }
22897 1128629 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22898 45213 : *total = cost->add * 2;
22899 : else
22900 446688 : *total = cost->add;
22901 : return false;
22902 :
22903 17973094 : case NEG:
22904 17973094 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22905 51412 : *total = cost->sse_op;
22906 17921682 : else if (X87_FLOAT_MODE_P (mode))
22907 15139 : *total = cost->fchs;
22908 17906543 : else if (FLOAT_MODE_P (mode))
22909 26855 : *total = ix86_vec_cost (mode, cost->sse_op);
22910 17879688 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22911 13245387 : *total = ix86_vec_cost (mode, cost->sse_op);
22912 9417441 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22913 1742761 : *total = cost->add * 3;
22914 : else
22915 2891540 : *total = cost->add;
22916 : return false;
22917 :
22918 54622446 : case COMPARE:
22919 54622446 : rtx op0, op1;
22920 54622446 : op0 = XEXP (x, 0);
22921 54622446 : op1 = XEXP (x, 1);
22922 54622446 : if (GET_CODE (op0) == ZERO_EXTRACT
22923 170323 : && XEXP (op0, 1) == const1_rtx
22924 152396 : && CONST_INT_P (XEXP (op0, 2))
22925 152360 : && op1 == const0_rtx)
22926 : {
22927 : /* This kind of construct is implemented using test[bwl].
22928 : Treat it as if we had an AND. */
22929 152360 : mode = GET_MODE (XEXP (op0, 0));
22930 304720 : *total = (cost->add
22931 152360 : + rtx_cost (XEXP (op0, 0), mode, outer_code,
22932 : opno, speed)
22933 152360 : + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22934 152360 : return true;
22935 : }
22936 :
22937 54470086 : if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22938 : {
22939 : /* This is an overflow detection, count it as a normal compare. */
22940 138691 : *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22941 138691 : return true;
22942 : }
22943 :
22944 54331395 : rtx geu;
22945 : /* Match x
22946 : (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22947 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22948 54331395 : if (mode == CCCmode
22949 347773 : && GET_CODE (op0) == NEG
22950 8016 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
22951 8013 : && REG_P (XEXP (geu, 0))
22952 8013 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
22953 759 : || GET_MODE (XEXP (geu, 0)) == CCmode)
22954 8013 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
22955 8013 : && XEXP (geu, 1) == const0_rtx
22956 8013 : && GET_CODE (op1) == LTU
22957 8013 : && REG_P (XEXP (op1, 0))
22958 8013 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22959 8013 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
22960 54339408 : && XEXP (op1, 1) == const0_rtx)
22961 : {
22962 : /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22963 8013 : *total = 0;
22964 8013 : return true;
22965 : }
22966 : /* Match x
22967 : (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22968 : (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22969 54323382 : if (mode == CCCmode
22970 339760 : && GET_CODE (op0) == NEG
22971 3 : && GET_CODE (XEXP (op0, 0)) == LTU
22972 3 : && REG_P (XEXP (XEXP (op0, 0), 0))
22973 3 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22974 3 : && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22975 3 : && XEXP (XEXP (op0, 0), 1) == const0_rtx
22976 3 : && GET_CODE (op1) == GEU
22977 3 : && REG_P (XEXP (op1, 0))
22978 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode
22979 3 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
22980 54323385 : && XEXP (op1, 1) == const0_rtx)
22981 : {
22982 : /* This is *x86_cmc. */
22983 3 : if (!speed)
22984 0 : *total = COSTS_N_BYTES (1);
22985 3 : else if (TARGET_SLOW_STC)
22986 0 : *total = COSTS_N_INSNS (2);
22987 : else
22988 3 : *total = COSTS_N_INSNS (1);
22989 3 : return true;
22990 : }
22991 :
22992 54323379 : if (SCALAR_INT_MODE_P (GET_MODE (op0))
22993 113026524 : && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22994 : {
22995 788144 : if (op1 == const0_rtx)
22996 232380 : *total = cost->add
22997 116190 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22998 : else
22999 1343908 : *total = 3*cost->add
23000 671954 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
23001 671954 : + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
23002 788144 : return true;
23003 : }
23004 :
23005 : /* The embedded comparison operand is completely free. */
23006 53535235 : if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
23007 382033 : *total = 0;
23008 :
23009 : return false;
23010 :
23011 1370580 : case FLOAT_EXTEND:
23012 : /* x87 represents all values extended to 80bit. */
23013 1370580 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23014 668719 : *total = 0;
23015 : else
23016 1403722 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23017 : return false;
23018 :
23019 83575 : case FLOAT_TRUNCATE:
23020 83575 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23021 58201 : *total = cost->fadd;
23022 : else
23023 50748 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23024 : return false;
23025 681637 : case FLOAT:
23026 681637 : case UNSIGNED_FLOAT:
23027 681637 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23028 : /* TODO: We do not have cost tables for x87. */
23029 92823 : *total = cost->fadd;
23030 588814 : else if (VECTOR_MODE_P (mode))
23031 0 : *total = ix86_vec_cost (mode, cost->cvtpi2ps);
23032 : else
23033 588814 : *total = cost->cvtsi2ss;
23034 : return false;
23035 :
23036 282987 : case FIX:
23037 282987 : case UNSIGNED_FIX:
23038 282987 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23039 : /* TODO: We do not have cost tables for x87. */
23040 282987 : *total = cost->fadd;
23041 0 : else if (VECTOR_MODE_P (mode))
23042 0 : *total = ix86_vec_cost (mode, cost->cvtps2pi);
23043 : else
23044 0 : *total = cost->cvtss2si;
23045 : return false;
23046 :
23047 354266 : case ABS:
23048 : /* SSE requires memory load for the constant operand. It may make
23049 : sense to account for this. Of course the constant operand may or
23050 : may not be reused. */
23051 354266 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23052 249844 : *total = cost->sse_op;
23053 104422 : else if (X87_FLOAT_MODE_P (mode))
23054 33437 : *total = cost->fabs;
23055 70985 : else if (FLOAT_MODE_P (mode))
23056 29580 : *total = ix86_vec_cost (mode, cost->sse_op);
23057 41405 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23058 6331 : *total = cost->sse_op;
23059 : return false;
23060 :
23061 28497 : case SQRT:
23062 28497 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23063 18207 : *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
23064 10290 : else if (X87_FLOAT_MODE_P (mode))
23065 4267 : *total = cost->fsqrt;
23066 6023 : else if (FLOAT_MODE_P (mode))
23067 6023 : *total = ix86_vec_cost (mode,
23068 : mode == SFmode ? cost->sqrtss : cost->sqrtsd);
23069 : return false;
23070 :
23071 3950076 : case UNSPEC:
23072 3950076 : if (XINT (x, 1) == UNSPEC_TP)
23073 125988 : *total = 0;
23074 3824088 : else if (XINT (x, 1) == UNSPEC_VTERNLOG)
23075 : {
23076 5210 : *total = cost->sse_op;
23077 5210 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23078 5210 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23079 5210 : *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
23080 5210 : return true;
23081 : }
23082 3818878 : else if (XINT (x, 1) == UNSPEC_PTEST)
23083 : {
23084 98913 : *total = cost->sse_op;
23085 98913 : rtx test_op0 = XVECEXP (x, 0, 0);
23086 98913 : if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
23087 : return false;
23088 98236 : if (GET_CODE (test_op0) == AND)
23089 : {
23090 23 : rtx and_op0 = XEXP (test_op0, 0);
23091 23 : if (GET_CODE (and_op0) == NOT)
23092 0 : and_op0 = XEXP (and_op0, 0);
23093 23 : *total += rtx_cost (and_op0, GET_MODE (and_op0),
23094 : AND, 0, speed)
23095 23 : + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
23096 : AND, 1, speed);
23097 : }
23098 : else
23099 98213 : *total = rtx_cost (test_op0, GET_MODE (test_op0),
23100 : UNSPEC, 0, speed);
23101 98236 : return true;
23102 : }
23103 : return false;
23104 :
23105 2013663 : case VEC_CONCAT:
23106 : /* ??? Assume all of these vector manipulation patterns are
23107 : recognizable. In which case they all pretty much have the
23108 : same cost.
23109 : ??? We should still recruse when computing cost. */
23110 2013663 : *total = cost->sse_op;
23111 2013663 : return true;
23112 :
23113 2394146 : case VEC_SELECT:
23114 : /* Special case extracting lower part from the vector.
23115 : This by itself needs to code and most of SSE/AVX instructions have
23116 : packed and single forms where the single form may be represented
23117 : by such VEC_SELECT.
23118 :
23119 : Use cost 1 (despite the fact that functionally equivalent SUBREG has
23120 : cost 0). Making VEC_SELECT completely free, for example instructs CSE
23121 : to forward propagate VEC_SELECT into
23122 :
23123 : (set (reg eax) (reg src))
23124 :
23125 : which then prevents fwprop and combining. See i.e.
23126 : gcc.target/i386/pr91103-1.c.
23127 :
23128 : ??? rtvec_series_p test should be, for valid patterns, equivalent to
23129 : vec_series_lowpart_p but is not, since the latter calls
23130 : can_cange_mode_class on ALL_REGS and this return false since x87 does
23131 : not support subregs at all. */
23132 2394146 : if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
23133 748757 : *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
23134 748757 : outer_code, opno, speed) + 1;
23135 : else
23136 : /* ??? We should still recruse when computing cost. */
23137 1645389 : *total = cost->sse_op;
23138 : return true;
23139 :
23140 1216204 : case VEC_DUPLICATE:
23141 2432408 : *total = rtx_cost (XEXP (x, 0),
23142 1216204 : GET_MODE (XEXP (x, 0)),
23143 : VEC_DUPLICATE, 0, speed);
23144 : /* It's broadcast instruction, not embedded broadcasting. */
23145 1216204 : if (outer_code == SET)
23146 1171262 : *total += cost->sse_op;
23147 :
23148 : return true;
23149 :
23150 714571 : case VEC_MERGE:
23151 714571 : mask = XEXP (x, 2);
23152 : /* Scalar versions of SSE instructions may be represented as:
23153 :
23154 : (vec_merge (vec_duplicate (operation ....))
23155 : (register or memory)
23156 : (const_int 1))
23157 :
23158 : In this case vec_merge and vec_duplicate is for free.
23159 : Just recurse into operation and second operand. */
23160 714571 : if (mask == const1_rtx
23161 205626 : && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
23162 : {
23163 71832 : *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23164 : outer_code, opno, speed)
23165 71832 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23166 71832 : return true;
23167 : }
23168 : /* This is masked instruction, assume the same cost,
23169 : as nonmasked variant. */
23170 642739 : else if (TARGET_AVX512F
23171 642739 : && (register_operand (mask, GET_MODE (mask))
23172 : /* Redunduant clean up of high bits for kmask with VL=2/4
23173 : .i.e (vec_merge op0, op1, (and op3 15)). */
23174 120573 : || (GET_CODE (mask) == AND
23175 369 : && register_operand (XEXP (mask, 0), GET_MODE (mask))
23176 369 : && CONST_INT_P (XEXP (mask, 1))
23177 369 : && ((INTVAL (XEXP (mask, 1)) == 3
23178 131 : && GET_MODE_NUNITS (mode) == 2)
23179 238 : || (INTVAL (XEXP (mask, 1)) == 15
23180 238 : && GET_MODE_NUNITS (mode) == 4)))))
23181 : {
23182 373873 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23183 373873 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23184 373873 : return true;
23185 : }
23186 : /* Combination of the two above:
23187 :
23188 : (vec_merge (vec_merge (vec_duplicate (operation ...))
23189 : (register or memory)
23190 : (reg:QI mask))
23191 : (register or memory)
23192 : (const_int 1))
23193 :
23194 : i.e. avx512fp16_vcvtss2sh_mask. */
23195 268866 : else if (TARGET_AVX512F
23196 120204 : && mask == const1_rtx
23197 46161 : && GET_CODE (XEXP (x, 0)) == VEC_MERGE
23198 27110 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
23199 271128 : && register_operand (XEXP (XEXP (x, 0), 2),
23200 2262 : GET_MODE (XEXP (XEXP (x, 0), 2))))
23201 : {
23202 2250 : *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23203 : mode, outer_code, opno, speed)
23204 2250 : + rtx_cost (XEXP (XEXP (x, 0), 1),
23205 : mode, outer_code, opno, speed)
23206 2250 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23207 2250 : return true;
23208 : }
23209 : /* vcmp. */
23210 266616 : else if (unspec_pcmp_p (mask)
23211 266616 : || (GET_CODE (mask) == NOT
23212 0 : && unspec_pcmp_p (XEXP (mask, 0))))
23213 : {
23214 1951 : rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
23215 1951 : rtx unsop0 = XVECEXP (uns, 0, 0);
23216 : /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
23217 : cost the same as register.
23218 : This is used by avx_cmp<mode>3_ltint_not. */
23219 1951 : if (SUBREG_P (unsop0))
23220 417 : unsop0 = XEXP (unsop0, 0);
23221 1951 : if (GET_CODE (unsop0) == NOT)
23222 18 : unsop0 = XEXP (unsop0, 0);
23223 1951 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23224 1951 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
23225 1951 : + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
23226 1951 : + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
23227 1951 : + cost->sse_op;
23228 1951 : return true;
23229 : }
23230 : else
23231 264665 : *total = cost->sse_op;
23232 264665 : return false;
23233 :
23234 107093204 : case MEM:
23235 : /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23236 : or variants in ix86_vector_duplicate_simode_const. */
23237 :
23238 107093204 : if (GET_MODE_SIZE (mode) >= 16
23239 18185462 : && VECTOR_MODE_P (mode)
23240 12097407 : && SYMBOL_REF_P (XEXP (x, 0))
23241 2217209 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23242 109082675 : && ix86_broadcast_from_constant (mode, x))
23243 : {
23244 491133 : *total = COSTS_N_INSNS (2) + speed;
23245 491133 : return true;
23246 : }
23247 :
23248 : /* An insn that accesses memory is slightly more expensive
23249 : than one that does not. */
23250 106602071 : if (speed)
23251 : {
23252 95313542 : *total += 1;
23253 95313542 : rtx addr = XEXP (x, 0);
23254 : /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23255 : so for MEM (reg) and MEM (reg + 4), the former costs 5,
23256 : the latter costs 9, it is not accurate for x86. Ideally
23257 : address_cost should be used, but it reduce cost too much.
23258 : So current solution is make constant disp as cheap as possible. */
23259 95313542 : if (GET_CODE (addr) == PLUS
23260 77709626 : && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23261 : /* Only handle (reg + disp) since other forms of addr are mostly LEA,
23262 : there's no additional cost for the plus of disp. */
23263 167411409 : && register_operand (XEXP (addr, 0), Pmode))
23264 : {
23265 56044236 : *total += 1;
23266 68838486 : *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23267 56044236 : return true;
23268 : }
23269 : }
23270 :
23271 : return false;
23272 :
23273 53604 : case ZERO_EXTRACT:
23274 53604 : if (XEXP (x, 1) == const1_rtx
23275 11409 : && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23276 0 : && GET_MODE (XEXP (x, 2)) == SImode
23277 0 : && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23278 : {
23279 : /* Ignore cost of zero extension and masking of last argument. */
23280 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23281 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23282 0 : *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23283 0 : return true;
23284 : }
23285 : return false;
23286 :
23287 29462238 : case IF_THEN_ELSE:
23288 29462238 : if (TARGET_XOP
23289 25487 : && VECTOR_MODE_P (mode)
23290 29467853 : && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23291 : {
23292 : /* vpcmov. */
23293 5047 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23294 5047 : if (!REG_P (XEXP (x, 0)))
23295 4887 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23296 5047 : if (!REG_P (XEXP (x, 1)))
23297 4854 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23298 5047 : if (!REG_P (XEXP (x, 2)))
23299 4856 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23300 5047 : return true;
23301 : }
23302 0 : else if (TARGET_CMOVE
23303 29457191 : && SCALAR_INT_MODE_P (mode)
23304 31883682 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23305 : {
23306 : /* cmov. */
23307 2229394 : *total = COSTS_N_INSNS (1);
23308 2229394 : if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23309 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23310 2229394 : if (!REG_P (XEXP (x, 1)))
23311 124513 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23312 2229394 : if (!REG_P (XEXP (x, 2)))
23313 726534 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23314 2229394 : return true;
23315 : }
23316 : return false;
23317 :
23318 : default:
23319 : return false;
23320 : }
23321 : }
23322 :
23323 : #if TARGET_MACHO
23324 :
23325 : static int current_machopic_label_num;
23326 :
23327 : /* Given a symbol name and its associated stub, write out the
23328 : definition of the stub. */
23329 :
23330 : void
23331 : machopic_output_stub (FILE *file, const char *symb, const char *stub)
23332 : {
23333 : unsigned int length;
23334 : char *binder_name, *symbol_name, lazy_ptr_name[32];
23335 : int label = ++current_machopic_label_num;
23336 :
23337 : /* For 64-bit we shouldn't get here. */
23338 : gcc_assert (!TARGET_64BIT);
23339 :
23340 : /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23341 : symb = targetm.strip_name_encoding (symb);
23342 :
23343 : length = strlen (stub);
23344 : binder_name = XALLOCAVEC (char, length + 32);
23345 : GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23346 :
23347 : length = strlen (symb);
23348 : symbol_name = XALLOCAVEC (char, length + 32);
23349 : GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23350 :
23351 : sprintf (lazy_ptr_name, "L%d$lz", label);
23352 :
23353 : if (MACHOPIC_ATT_STUB)
23354 : switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23355 : else if (MACHOPIC_PURE)
23356 : switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23357 : else
23358 : switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23359 :
23360 : fprintf (file, "%s:\n", stub);
23361 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23362 :
23363 : if (MACHOPIC_ATT_STUB)
23364 : {
23365 : fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23366 : }
23367 : else if (MACHOPIC_PURE)
23368 : {
23369 : /* PIC stub. */
23370 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23371 : rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23372 : output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23373 : fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23374 : label, lazy_ptr_name, label);
23375 : fprintf (file, "\tjmp\t*%%ecx\n");
23376 : }
23377 : else
23378 : fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23379 :
23380 : /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23381 : it needs no stub-binding-helper. */
23382 : if (MACHOPIC_ATT_STUB)
23383 : return;
23384 :
23385 : fprintf (file, "%s:\n", binder_name);
23386 :
23387 : if (MACHOPIC_PURE)
23388 : {
23389 : fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23390 : fprintf (file, "\tpushl\t%%ecx\n");
23391 : }
23392 : else
23393 : fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23394 :
23395 : fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23396 :
23397 : /* N.B. Keep the correspondence of these
23398 : 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23399 : old-pic/new-pic/non-pic stubs; altering this will break
23400 : compatibility with existing dylibs. */
23401 : if (MACHOPIC_PURE)
23402 : {
23403 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23404 : switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23405 : }
23406 : else
23407 : /* 16-byte -mdynamic-no-pic stub. */
23408 : switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23409 :
23410 : fprintf (file, "%s:\n", lazy_ptr_name);
23411 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23412 : fprintf (file, ASM_LONG "%s\n", binder_name);
23413 : }
23414 : #endif /* TARGET_MACHO */
23415 :
23416 : /* Order the registers for register allocator. */
23417 :
23418 : void
23419 214527 : x86_order_regs_for_local_alloc (void)
23420 : {
23421 214527 : int pos = 0;
23422 214527 : int i;
23423 :
23424 : /* First allocate the local general purpose registers. */
23425 19951011 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23426 26601348 : if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
23427 5587155 : reg_alloc_order [pos++] = i;
23428 :
23429 : /* Global general purpose registers. */
23430 19951011 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23431 22910505 : if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
23432 1277709 : reg_alloc_order [pos++] = i;
23433 :
23434 : /* x87 registers come first in case we are doing FP math
23435 : using them. */
23436 214527 : if (!TARGET_SSE_MATH)
23437 57537 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23438 51144 : reg_alloc_order [pos++] = i;
23439 :
23440 : /* SSE registers. */
23441 1930743 : for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23442 1716216 : reg_alloc_order [pos++] = i;
23443 1930743 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23444 1716216 : reg_alloc_order [pos++] = i;
23445 :
23446 : /* Extended REX SSE registers. */
23447 3646959 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23448 3432432 : reg_alloc_order [pos++] = i;
23449 :
23450 : /* Mask register. */
23451 1930743 : for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23452 1716216 : reg_alloc_order [pos++] = i;
23453 :
23454 : /* x87 registers. */
23455 214527 : if (TARGET_SSE_MATH)
23456 1873206 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23457 1665072 : reg_alloc_order [pos++] = i;
23458 :
23459 1930743 : for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23460 1716216 : reg_alloc_order [pos++] = i;
23461 :
23462 : /* Initialize the rest of array as we do not allocate some registers
23463 : at all. */
23464 1072635 : while (pos < FIRST_PSEUDO_REGISTER)
23465 858108 : reg_alloc_order [pos++] = 0;
23466 214527 : }
23467 :
23468 : static bool
23469 264627872 : ix86_ms_bitfield_layout_p (const_tree record_type)
23470 : {
23471 264627872 : return ((TARGET_MS_BITFIELD_LAYOUT
23472 215 : && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23473 264627872 : || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
23474 : }
23475 :
23476 : /* Returns an expression indicating where the this parameter is
23477 : located on entry to the FUNCTION. */
23478 :
23479 : static rtx
23480 1761 : x86_this_parameter (tree function)
23481 : {
23482 1761 : tree type = TREE_TYPE (function);
23483 1761 : bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23484 1761 : int nregs;
23485 :
23486 1761 : if (TARGET_64BIT)
23487 : {
23488 1759 : const int *parm_regs;
23489 :
23490 1759 : if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
23491 : parm_regs = x86_64_preserve_none_int_parameter_registers;
23492 1759 : else if (ix86_function_type_abi (type) == MS_ABI)
23493 : parm_regs = x86_64_ms_abi_int_parameter_registers;
23494 : else
23495 1759 : parm_regs = x86_64_int_parameter_registers;
23496 1759 : return gen_rtx_REG (Pmode, parm_regs[aggr]);
23497 : }
23498 :
23499 2 : nregs = ix86_function_regparm (type, function);
23500 :
23501 2 : if (nregs > 0 && !stdarg_p (type))
23502 : {
23503 0 : int regno;
23504 0 : unsigned int ccvt = ix86_get_callcvt (type);
23505 :
23506 0 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23507 0 : regno = aggr ? DX_REG : CX_REG;
23508 0 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23509 : {
23510 0 : regno = CX_REG;
23511 0 : if (aggr)
23512 0 : return gen_rtx_MEM (SImode,
23513 0 : plus_constant (Pmode, stack_pointer_rtx, 4));
23514 : }
23515 : else
23516 : {
23517 0 : regno = AX_REG;
23518 0 : if (aggr)
23519 : {
23520 0 : regno = DX_REG;
23521 0 : if (nregs == 1)
23522 0 : return gen_rtx_MEM (SImode,
23523 0 : plus_constant (Pmode,
23524 : stack_pointer_rtx, 4));
23525 : }
23526 : }
23527 0 : return gen_rtx_REG (SImode, regno);
23528 : }
23529 :
23530 4 : return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23531 4 : aggr ? 8 : 4));
23532 : }
23533 :
23534 : /* Determine whether x86_output_mi_thunk can succeed. */
23535 :
23536 : static bool
23537 4908 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23538 : const_tree function)
23539 : {
23540 : /* 64-bit can handle anything. */
23541 4908 : if (TARGET_64BIT)
23542 : return true;
23543 :
23544 : /* For 32-bit, everything's fine if we have one free register. */
23545 76 : if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23546 : return true;
23547 :
23548 : /* Need a free register for vcall_offset. */
23549 0 : if (vcall_offset)
23550 : return false;
23551 :
23552 : /* Need a free register for GOT references. */
23553 0 : if (flag_pic && !targetm.binds_local_p (function))
23554 : return false;
23555 :
23556 : /* Otherwise ok. */
23557 : return true;
23558 : }
23559 :
23560 : /* Output the assembler code for a thunk function. THUNK_DECL is the
23561 : declaration for the thunk function itself, FUNCTION is the decl for
23562 : the target function. DELTA is an immediate constant offset to be
23563 : added to THIS. If VCALL_OFFSET is nonzero, the word at
23564 : *(*this + vcall_offset) should be added to THIS. */
23565 :
23566 : static void
23567 1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23568 : HOST_WIDE_INT vcall_offset, tree function)
23569 : {
23570 1761 : const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23571 1761 : rtx this_param = x86_this_parameter (function);
23572 1761 : rtx this_reg, tmp, fnaddr;
23573 1761 : unsigned int tmp_regno;
23574 1761 : rtx_insn *insn;
23575 1761 : int saved_flag_force_indirect_call = flag_force_indirect_call;
23576 :
23577 1761 : if (TARGET_64BIT)
23578 : tmp_regno = R10_REG;
23579 : else
23580 : {
23581 2 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23582 2 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23583 : tmp_regno = AX_REG;
23584 2 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23585 : tmp_regno = DX_REG;
23586 : else
23587 2 : tmp_regno = CX_REG;
23588 :
23589 2 : if (flag_pic)
23590 2 : flag_force_indirect_call = 0;
23591 : }
23592 :
23593 1761 : emit_note (NOTE_INSN_PROLOGUE_END);
23594 :
23595 : /* CET is enabled, insert EB instruction. */
23596 1761 : if ((flag_cf_protection & CF_BRANCH))
23597 20 : emit_insn (gen_nop_endbr ());
23598 :
23599 : /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23600 : pull it in now and let DELTA benefit. */
23601 1761 : if (REG_P (this_param))
23602 : this_reg = this_param;
23603 2 : else if (vcall_offset)
23604 : {
23605 : /* Put the this parameter into %eax. */
23606 2 : this_reg = gen_rtx_REG (Pmode, AX_REG);
23607 1 : emit_move_insn (this_reg, this_param);
23608 : }
23609 : else
23610 : this_reg = NULL_RTX;
23611 :
23612 : /* Adjust the this parameter by a fixed constant. */
23613 1761 : if (delta)
23614 : {
23615 826 : rtx delta_rtx = GEN_INT (delta);
23616 826 : rtx delta_dst = this_reg ? this_reg : this_param;
23617 :
23618 826 : if (TARGET_64BIT)
23619 : {
23620 825 : if (!x86_64_general_operand (delta_rtx, Pmode))
23621 : {
23622 0 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23623 0 : emit_move_insn (tmp, delta_rtx);
23624 0 : delta_rtx = tmp;
23625 : }
23626 : }
23627 :
23628 827 : ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
23629 : }
23630 :
23631 : /* Adjust the this parameter by a value stored in the vtable. */
23632 1761 : if (vcall_offset)
23633 : {
23634 986 : rtx vcall_addr, vcall_mem, this_mem;
23635 :
23636 987 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23637 :
23638 986 : this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23639 987 : if (Pmode != ptr_mode)
23640 0 : this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23641 986 : emit_move_insn (tmp, this_mem);
23642 :
23643 : /* Adjust the this parameter. */
23644 987 : vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23645 986 : if (TARGET_64BIT
23646 986 : && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
23647 : {
23648 0 : rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23649 0 : emit_move_insn (tmp2, GEN_INT (vcall_offset));
23650 0 : vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23651 : }
23652 :
23653 986 : vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23654 987 : if (Pmode != ptr_mode)
23655 0 : emit_insn (gen_addsi_1_zext (this_reg,
23656 : gen_rtx_REG (ptr_mode,
23657 : REGNO (this_reg)),
23658 : vcall_mem));
23659 : else
23660 986 : ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
23661 : }
23662 :
23663 : /* If necessary, drop THIS back to its stack slot. */
23664 1761 : if (this_reg && this_reg != this_param)
23665 1 : emit_move_insn (this_param, this_reg);
23666 :
23667 1761 : fnaddr = XEXP (DECL_RTL (function), 0);
23668 1761 : if (TARGET_64BIT)
23669 : {
23670 25 : if (!flag_pic || targetm.binds_local_p (function)
23671 1784 : || TARGET_PECOFF)
23672 : ;
23673 : else
23674 : {
23675 0 : tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23676 0 : tmp = gen_rtx_CONST (Pmode, tmp);
23677 0 : fnaddr = gen_const_mem (Pmode, tmp);
23678 : }
23679 : }
23680 : else
23681 : {
23682 2 : if (!flag_pic || targetm.binds_local_p (function))
23683 : ;
23684 : #if TARGET_MACHO
23685 : else if (TARGET_MACHO)
23686 : {
23687 : fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23688 : fnaddr = XEXP (fnaddr, 0);
23689 : }
23690 : #endif /* TARGET_MACHO */
23691 : else
23692 : {
23693 0 : tmp = gen_rtx_REG (Pmode, CX_REG);
23694 0 : output_set_got (tmp, NULL_RTX);
23695 :
23696 0 : fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23697 0 : fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23698 0 : fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23699 0 : fnaddr = gen_const_mem (Pmode, fnaddr);
23700 : }
23701 : }
23702 :
23703 : /* Our sibling call patterns do not allow memories, because we have no
23704 : predicate that can distinguish between frame and non-frame memory.
23705 : For our purposes here, we can get away with (ab)using a jump pattern,
23706 : because we're going to do no optimization. */
23707 1761 : if (MEM_P (fnaddr))
23708 : {
23709 0 : if (sibcall_insn_operand (fnaddr, word_mode))
23710 : {
23711 0 : fnaddr = XEXP (DECL_RTL (function), 0);
23712 0 : tmp = gen_rtx_MEM (QImode, fnaddr);
23713 0 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23714 0 : tmp = emit_call_insn (tmp);
23715 0 : SIBLING_CALL_P (tmp) = 1;
23716 : }
23717 : else
23718 0 : emit_jump_insn (gen_indirect_jump (fnaddr));
23719 : }
23720 : else
23721 : {
23722 1761 : if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23723 : {
23724 : // CM_LARGE_PIC always uses pseudo PIC register which is
23725 : // uninitialized. Since FUNCTION is local and calling it
23726 : // doesn't go through PLT, we use scratch register %r11 as
23727 : // PIC register and initialize it here.
23728 3 : pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23729 3 : ix86_init_large_pic_reg (tmp_regno);
23730 3 : fnaddr = legitimize_pic_address (fnaddr,
23731 3 : gen_rtx_REG (Pmode, tmp_regno));
23732 : }
23733 :
23734 1761 : if (!sibcall_insn_operand (fnaddr, word_mode))
23735 : {
23736 9 : tmp = gen_rtx_REG (word_mode, tmp_regno);
23737 9 : if (GET_MODE (fnaddr) != word_mode)
23738 0 : fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
23739 9 : emit_move_insn (tmp, fnaddr);
23740 9 : fnaddr = tmp;
23741 : }
23742 :
23743 1761 : tmp = gen_rtx_MEM (QImode, fnaddr);
23744 1761 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23745 1761 : tmp = emit_call_insn (tmp);
23746 1761 : SIBLING_CALL_P (tmp) = 1;
23747 : }
23748 1761 : emit_barrier ();
23749 :
23750 : /* Emit just enough of rest_of_compilation to get the insns emitted. */
23751 1761 : insn = get_insns ();
23752 1761 : shorten_branches (insn);
23753 1761 : assemble_start_function (thunk_fndecl, fnname);
23754 1761 : final_start_function (insn, file, 1);
23755 1761 : final (insn, file, 1);
23756 1761 : final_end_function ();
23757 1761 : assemble_end_function (thunk_fndecl, fnname);
23758 :
23759 1761 : flag_force_indirect_call = saved_flag_force_indirect_call;
23760 1761 : }
23761 :
23762 : static void
23763 270554 : x86_file_start (void)
23764 : {
23765 270554 : default_file_start ();
23766 270554 : if (TARGET_16BIT)
23767 6 : fputs ("\t.code16gcc\n", asm_out_file);
23768 : #if TARGET_MACHO
23769 : darwin_file_start ();
23770 : #endif
23771 270554 : if (X86_FILE_START_VERSION_DIRECTIVE)
23772 : fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23773 270554 : if (X86_FILE_START_FLTUSED)
23774 : fputs ("\t.global\t__fltused\n", asm_out_file);
23775 270554 : if (ix86_asm_dialect == ASM_INTEL)
23776 54 : fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23777 270554 : }
23778 :
23779 : int
23780 107268545 : x86_field_alignment (tree type, int computed)
23781 : {
23782 107268545 : machine_mode mode;
23783 :
23784 107268545 : if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23785 : return computed;
23786 9066751 : if (TARGET_IAMCU)
23787 0 : return iamcu_alignment (type, computed);
23788 9066751 : type = strip_array_types (type);
23789 9066751 : mode = TYPE_MODE (type);
23790 9066751 : if (mode == DFmode || mode == DCmode
23791 8961705 : || GET_MODE_CLASS (mode) == MODE_INT
23792 3001300 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23793 : {
23794 6065451 : if (TYPE_ATOMIC (type) && computed > 32)
23795 : {
23796 0 : static bool warned;
23797 :
23798 0 : if (!warned && warn_psabi)
23799 : {
23800 0 : const char *url
23801 : = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
23802 :
23803 0 : warned = true;
23804 0 : inform (input_location, "the alignment of %<_Atomic %T%> "
23805 : "fields changed in %{GCC 11.1%}",
23806 0 : TYPE_MAIN_VARIANT (type), url);
23807 : }
23808 : }
23809 : else
23810 6065451 : return MIN (32, computed);
23811 : }
23812 : return computed;
23813 : }
23814 :
23815 : /* Print call to TARGET to FILE. */
23816 :
23817 : static void
23818 296 : x86_print_call_or_nop (FILE *file, const char *target,
23819 : const char *label)
23820 : {
23821 296 : if (flag_nop_mcount || !strcmp (target, "nop"))
23822 : {
23823 9 : if (TARGET_16BIT)
23824 : /* 3 byte no-op: lea 0(%si), %si */
23825 1 : fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
23826 : else
23827 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
23828 8 : fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
23829 : label);
23830 : }
23831 287 : else if (!TARGET_PECOFF && flag_pic)
23832 : {
23833 8 : gcc_assert (flag_plt);
23834 :
23835 8 : fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
23836 : }
23837 : else
23838 279 : fprintf (file, "%s\tcall\t%s\n", label, target);
23839 296 : }
23840 :
23841 : static bool
23842 316 : current_fentry_name (const char **name)
23843 : {
23844 316 : tree attr = lookup_attribute ("fentry_name",
23845 316 : DECL_ATTRIBUTES (current_function_decl));
23846 316 : if (!attr)
23847 : return false;
23848 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23849 2 : return true;
23850 : }
23851 :
23852 : static bool
23853 16 : current_fentry_section (const char **name)
23854 : {
23855 16 : tree attr = lookup_attribute ("fentry_section",
23856 16 : DECL_ATTRIBUTES (current_function_decl));
23857 16 : if (!attr)
23858 : return false;
23859 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23860 2 : return true;
23861 : }
23862 :
23863 : /* Return a caller-saved register which isn't live or a callee-saved
23864 : register which has been saved on stack in the prologue at entry for
23865 : profile. */
23866 :
23867 : static int
23868 17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
23869 : {
23870 : /* Use %r10 if the profiler is emitted before the prologue or it isn't
23871 : used by DRAP. */
23872 17 : if (ix86_profile_before_prologue ()
23873 4 : || !crtl->drap_reg
23874 17 : || REGNO (crtl->drap_reg) != R10_REG)
23875 : return R10_REG;
23876 :
23877 : /* The profiler is emitted after the prologue. If there is a
23878 : caller-saved register which isn't live or a callee-saved
23879 : register saved on stack in the prologue, use it. */
23880 :
23881 0 : bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
23882 :
23883 0 : int i;
23884 0 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23885 0 : if (GENERAL_REGNO_P (i)
23886 0 : && i != R10_REG
23887 : #ifdef NO_PROFILE_COUNTERS
23888 0 : && (r11_ok || i != R11_REG)
23889 : #else
23890 : && i != R11_REG
23891 : #endif
23892 0 : && TEST_HARD_REG_BIT (accessible_reg_set, i)
23893 0 : && (ix86_save_reg (i, true, true)
23894 0 : || (call_used_regs[i]
23895 0 : && !fixed_regs[i]
23896 0 : && !REGNO_REG_SET_P (reg_live, i))))
23897 0 : return i;
23898 :
23899 0 : sorry ("no register available for profiling %<-mcmodel=large%s%>",
23900 0 : ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
23901 :
23902 0 : return R10_REG;
23903 : }
23904 :
23905 : /* Output assembler code to FILE to increment profiler label # LABELNO
23906 : for profiling a function entry. */
23907 : void
23908 316 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23909 : {
23910 316 : if (cfun->machine->insn_queued_at_entrance)
23911 : {
23912 7 : if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
23913 6 : fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
23914 7 : unsigned int patch_area_size
23915 7 : = crtl->patch_area_size - crtl->patch_area_entry;
23916 7 : if (patch_area_size)
23917 2 : ix86_output_patchable_area (patch_area_size,
23918 : crtl->patch_area_entry == 0);
23919 : }
23920 :
23921 316 : const char *mcount_name = MCOUNT_NAME;
23922 :
23923 316 : bool fentry_section_p
23924 316 : = (flag_record_mcount
23925 617 : || lookup_attribute ("fentry_section",
23926 301 : DECL_ATTRIBUTES (current_function_decl)));
23927 :
23928 : const char *label = fentry_section_p ? "1:" : "";
23929 :
23930 316 : if (current_fentry_name (&mcount_name))
23931 : ;
23932 314 : else if (fentry_name)
23933 1 : mcount_name = fentry_name;
23934 313 : else if (flag_fentry)
23935 301 : mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
23936 :
23937 316 : if (TARGET_64BIT)
23938 : {
23939 : #ifndef NO_PROFILE_COUNTERS
23940 : if (ASSEMBLER_DIALECT == ASM_INTEL)
23941 : fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
23942 : else
23943 : fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
23944 : #endif
23945 :
23946 315 : int scratch;
23947 315 : const char *reg;
23948 315 : char legacy_reg[4] = { 0 };
23949 :
23950 315 : if (!TARGET_PECOFF)
23951 : {
23952 315 : switch (ix86_cmodel)
23953 : {
23954 7 : case CM_LARGE:
23955 7 : scratch = x86_64_select_profile_regnum (true);
23956 7 : reg = hi_reg_name[scratch];
23957 7 : if (LEGACY_INT_REGNO_P (scratch))
23958 : {
23959 0 : legacy_reg[0] = 'r';
23960 0 : legacy_reg[1] = reg[0];
23961 0 : legacy_reg[2] = reg[1];
23962 0 : reg = legacy_reg;
23963 : }
23964 7 : if (ASSEMBLER_DIALECT == ASM_INTEL)
23965 1 : fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
23966 : "\tcall\t%s\n", label, reg, mcount_name,
23967 : reg);
23968 : else
23969 6 : fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
23970 : label, mcount_name, reg, reg);
23971 : break;
23972 10 : case CM_LARGE_PIC:
23973 : #ifdef NO_PROFILE_COUNTERS
23974 10 : scratch = x86_64_select_profile_regnum (false);
23975 10 : reg = hi_reg_name[scratch];
23976 10 : if (LEGACY_INT_REGNO_P (scratch))
23977 : {
23978 0 : legacy_reg[0] = 'r';
23979 0 : legacy_reg[1] = reg[0];
23980 0 : legacy_reg[2] = reg[1];
23981 0 : reg = legacy_reg;
23982 : }
23983 10 : if (ASSEMBLER_DIALECT == ASM_INTEL)
23984 : {
23985 1 : fprintf (file, "1:movabs\tr11, "
23986 : "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
23987 1 : fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
23988 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
23989 1 : fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
23990 : mcount_name);
23991 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
23992 1 : fprintf (file, "\tcall\t%s\n", reg);
23993 1 : break;
23994 : }
23995 9 : fprintf (file,
23996 : "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
23997 9 : fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
23998 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
23999 9 : fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
24000 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24001 9 : fprintf (file, "\tcall\t*%%%s\n", reg);
24002 : #else
24003 : sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
24004 : #endif
24005 9 : break;
24006 12 : case CM_SMALL_PIC:
24007 12 : case CM_MEDIUM_PIC:
24008 12 : if (!flag_plt)
24009 : {
24010 3 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24011 0 : fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
24012 : label, mcount_name);
24013 : else
24014 3 : fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
24015 : label, mcount_name);
24016 : break;
24017 : }
24018 : /* fall through */
24019 295 : default:
24020 295 : x86_print_call_or_nop (file, mcount_name, label);
24021 295 : break;
24022 : }
24023 : }
24024 : else
24025 : x86_print_call_or_nop (file, mcount_name, label);
24026 : }
24027 1 : else if (flag_pic)
24028 : {
24029 : #ifndef NO_PROFILE_COUNTERS
24030 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24031 : fprintf (file,
24032 : "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
24033 : LPREFIX, labelno);
24034 : else
24035 : fprintf (file,
24036 : "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
24037 : LPREFIX, labelno);
24038 : #endif
24039 0 : if (flag_plt)
24040 0 : x86_print_call_or_nop (file, mcount_name, label);
24041 0 : else if (ASSEMBLER_DIALECT == ASM_INTEL)
24042 0 : fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
24043 : label, mcount_name);
24044 : else
24045 0 : fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
24046 : label, mcount_name);
24047 : }
24048 : else
24049 : {
24050 : #ifndef NO_PROFILE_COUNTERS
24051 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24052 : fprintf (file,
24053 : "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
24054 : LPREFIX, labelno);
24055 : else
24056 : fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
24057 : LPREFIX, labelno);
24058 : #endif
24059 1 : x86_print_call_or_nop (file, mcount_name, label);
24060 : }
24061 :
24062 316 : if (fentry_section_p)
24063 : {
24064 16 : const char *sname = "__mcount_loc";
24065 :
24066 16 : if (current_fentry_section (&sname))
24067 : ;
24068 14 : else if (fentry_section)
24069 1 : sname = fentry_section;
24070 :
24071 16 : fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
24072 16 : fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
24073 16 : fprintf (file, "\t.previous\n");
24074 : }
24075 316 : }
24076 :
24077 : /* We don't have exact information about the insn sizes, but we may assume
24078 : quite safely that we are informed about all 1 byte insns and memory
24079 : address sizes. This is enough to eliminate unnecessary padding in
24080 : 99% of cases. */
24081 :
24082 : int
24083 383570477 : ix86_min_insn_size (rtx_insn *insn)
24084 : {
24085 383570477 : int l = 0, len;
24086 :
24087 383570477 : if (!INSN_P (insn) || !active_insn_p (insn))
24088 500271 : return 0;
24089 :
24090 : /* Discard alignments we've emit and jump instructions. */
24091 383070206 : if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24092 383070206 : && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24093 : return 0;
24094 :
24095 : /* Important case - calls are always 5 bytes.
24096 : It is common to have many calls in the row. */
24097 383070201 : if (CALL_P (insn)
24098 9101120 : && symbolic_reference_mentioned_p (PATTERN (insn))
24099 391832445 : && !SIBLING_CALL_P (insn))
24100 : return 5;
24101 374547114 : len = get_attr_length (insn);
24102 374547114 : if (len <= 1)
24103 : return 1;
24104 :
24105 : /* For normal instructions we rely on get_attr_length being exact,
24106 : with a few exceptions. */
24107 365950213 : if (!JUMP_P (insn))
24108 : {
24109 360557909 : enum attr_type type = get_attr_type (insn);
24110 :
24111 360557909 : switch (type)
24112 : {
24113 94509 : case TYPE_MULTI:
24114 94509 : if (GET_CODE (PATTERN (insn)) == ASM_INPUT
24115 94509 : || asm_noperands (PATTERN (insn)) >= 0)
24116 527 : return 0;
24117 : break;
24118 : case TYPE_OTHER:
24119 : case TYPE_FCMP:
24120 : break;
24121 : default:
24122 : /* Otherwise trust get_attr_length. */
24123 : return len;
24124 : }
24125 :
24126 474191 : l = get_attr_length_address (insn);
24127 474191 : if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24128 : l = 4;
24129 : }
24130 383753 : if (l)
24131 90438 : return 1+l;
24132 : else
24133 5776057 : return 2;
24134 : }
24135 :
24136 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24137 :
24138 : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24139 : window. */
24140 :
24141 : static void
24142 45424 : ix86_avoid_jump_mispredicts (void)
24143 : {
24144 45424 : rtx_insn *insn, *start = get_insns ();
24145 45424 : int nbytes = 0, njumps = 0;
24146 45424 : bool isjump = false;
24147 :
24148 : /* Look for all minimal intervals of instructions containing 4 jumps.
24149 : The intervals are bounded by START and INSN. NBYTES is the total
24150 : size of instructions in the interval including INSN and not including
24151 : START. When the NBYTES is smaller than 16 bytes, it is possible
24152 : that the end of START and INSN ends up in the same 16byte page.
24153 :
24154 : The smallest offset in the page INSN can start is the case where START
24155 : ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24156 : We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
24157 :
24158 : Don't consider asm goto as jump, while it can contain a jump, it doesn't
24159 : have to, control transfer to label(s) can be performed through other
24160 : means, and also we estimate minimum length of all asm stmts as 0. */
24161 700742 : for (insn = start; insn; insn = NEXT_INSN (insn))
24162 : {
24163 655318 : int min_size;
24164 :
24165 655318 : if (LABEL_P (insn))
24166 : {
24167 956 : align_flags alignment = label_to_alignment (insn);
24168 956 : int align = alignment.levels[0].log;
24169 956 : int max_skip = alignment.levels[0].maxskip;
24170 :
24171 956 : if (max_skip > 15)
24172 : max_skip = 15;
24173 : /* If align > 3, only up to 16 - max_skip - 1 bytes can be
24174 : already in the current 16 byte page, because otherwise
24175 : ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
24176 : bytes to reach 16 byte boundary. */
24177 956 : if (align <= 0
24178 328 : || (align <= 3 && max_skip != (1 << align) - 1))
24179 956 : max_skip = 0;
24180 956 : if (dump_file)
24181 0 : fprintf (dump_file, "Label %i with max_skip %i\n",
24182 0 : INSN_UID (insn), max_skip);
24183 956 : if (max_skip)
24184 : {
24185 6278 : while (nbytes + max_skip >= 16)
24186 : {
24187 5950 : start = NEXT_INSN (start);
24188 310 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24189 5967 : || CALL_P (start))
24190 350 : njumps--, isjump = true;
24191 : else
24192 : isjump = false;
24193 5950 : nbytes -= ix86_min_insn_size (start);
24194 : }
24195 : }
24196 956 : continue;
24197 956 : }
24198 :
24199 654362 : min_size = ix86_min_insn_size (insn);
24200 654362 : nbytes += min_size;
24201 654362 : if (dump_file)
24202 0 : fprintf (dump_file, "Insn %i estimated to %i bytes\n",
24203 0 : INSN_UID (insn), min_size);
24204 46582 : if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
24205 654382 : || CALL_P (insn))
24206 47597 : njumps++;
24207 : else
24208 606765 : continue;
24209 :
24210 55982 : while (njumps > 3)
24211 : {
24212 8385 : start = NEXT_INSN (start);
24213 545 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24214 8385 : || CALL_P (start))
24215 1247 : njumps--, isjump = true;
24216 : else
24217 : isjump = false;
24218 8385 : nbytes -= ix86_min_insn_size (start);
24219 : }
24220 47597 : gcc_assert (njumps >= 0);
24221 47597 : if (dump_file)
24222 0 : fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24223 0 : INSN_UID (start), INSN_UID (insn), nbytes);
24224 :
24225 47597 : if (njumps == 3 && isjump && nbytes < 16)
24226 : {
24227 40 : int padsize = 15 - nbytes + ix86_min_insn_size (insn);
24228 :
24229 40 : if (dump_file)
24230 0 : fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24231 0 : INSN_UID (insn), padsize);
24232 40 : emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
24233 : }
24234 : }
24235 45424 : }
24236 : #endif
24237 :
24238 : /* AMD Athlon works faster
24239 : when RET is not destination of conditional jump or directly preceded
24240 : by other jump instruction. We avoid the penalty by inserting NOP just
24241 : before the RET instructions in such cases. */
24242 : static void
24243 45144 : ix86_pad_returns (void)
24244 : {
24245 45144 : edge e;
24246 45144 : edge_iterator ei;
24247 :
24248 90312 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24249 : {
24250 45168 : basic_block bb = e->src;
24251 45168 : rtx_insn *ret = BB_END (bb);
24252 45168 : rtx_insn *prev;
24253 45168 : bool replace = false;
24254 :
24255 45158 : if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24256 90326 : || optimize_bb_for_size_p (bb))
24257 23 : continue;
24258 179724 : for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24259 134161 : if (active_insn_p (prev) || LABEL_P (prev))
24260 : break;
24261 45145 : if (prev && LABEL_P (prev))
24262 : {
24263 43 : edge e;
24264 43 : edge_iterator ei;
24265 :
24266 56 : FOR_EACH_EDGE (e, ei, bb->preds)
24267 146 : if (EDGE_FREQUENCY (e) && e->src->index >= 0
24268 97 : && !(e->flags & EDGE_FALLTHRU))
24269 : {
24270 : replace = true;
24271 : break;
24272 : }
24273 : }
24274 43 : if (!replace)
24275 : {
24276 45109 : prev = prev_active_insn (ret);
24277 45109 : if (prev
24278 45109 : && ((JUMP_P (prev) && any_condjump_p (prev))
24279 44673 : || CALL_P (prev)))
24280 : replace = true;
24281 : /* Empty functions get branch mispredict even when
24282 : the jump destination is not visible to us. */
24283 45109 : if (!prev && !optimize_function_for_size_p (cfun))
24284 : replace = true;
24285 : }
24286 44691 : if (replace)
24287 : {
24288 489 : emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24289 489 : delete_insn (ret);
24290 : }
24291 : }
24292 45144 : }
24293 :
24294 : /* Count the minimum number of instructions in BB. Return 4 if the
24295 : number of instructions >= 4. */
24296 :
24297 : static int
24298 42 : ix86_count_insn_bb (basic_block bb)
24299 : {
24300 42 : rtx_insn *insn;
24301 42 : int insn_count = 0;
24302 :
24303 : /* Count number of instructions in this block. Return 4 if the number
24304 : of instructions >= 4. */
24305 297 : FOR_BB_INSNS (bb, insn)
24306 : {
24307 : /* Only happen in exit blocks. */
24308 291 : if (JUMP_P (insn)
24309 291 : && ANY_RETURN_P (PATTERN (insn)))
24310 : break;
24311 :
24312 267 : if (NONDEBUG_INSN_P (insn)
24313 102 : && GET_CODE (PATTERN (insn)) != USE
24314 351 : && GET_CODE (PATTERN (insn)) != CLOBBER)
24315 : {
24316 84 : insn_count++;
24317 84 : if (insn_count >= 4)
24318 : return insn_count;
24319 : }
24320 : }
24321 :
24322 : return insn_count;
24323 : }
24324 :
24325 :
24326 : /* Count the minimum number of instructions in code path in BB.
24327 : Return 4 if the number of instructions >= 4. */
24328 :
24329 : static int
24330 62 : ix86_count_insn (basic_block bb)
24331 : {
24332 62 : edge e;
24333 62 : edge_iterator ei;
24334 62 : int min_prev_count;
24335 :
24336 : /* Only bother counting instructions along paths with no
24337 : more than 2 basic blocks between entry and exit. Given
24338 : that BB has an edge to exit, determine if a predecessor
24339 : of BB has an edge from entry. If so, compute the number
24340 : of instructions in the predecessor block. If there
24341 : happen to be multiple such blocks, compute the minimum. */
24342 62 : min_prev_count = 4;
24343 145 : FOR_EACH_EDGE (e, ei, bb->preds)
24344 : {
24345 109 : edge prev_e;
24346 109 : edge_iterator prev_ei;
24347 :
24348 109 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24349 : {
24350 26 : min_prev_count = 0;
24351 26 : break;
24352 : }
24353 182 : FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24354 : {
24355 109 : if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24356 : {
24357 10 : int count = ix86_count_insn_bb (e->src);
24358 10 : if (count < min_prev_count)
24359 83 : min_prev_count = count;
24360 : break;
24361 : }
24362 : }
24363 : }
24364 :
24365 62 : if (min_prev_count < 4)
24366 32 : min_prev_count += ix86_count_insn_bb (bb);
24367 :
24368 62 : return min_prev_count;
24369 : }
24370 :
24371 : /* Pad short function to 4 instructions. */
24372 :
24373 : static void
24374 63 : ix86_pad_short_function (void)
24375 : {
24376 63 : edge e;
24377 63 : edge_iterator ei;
24378 :
24379 128 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24380 : {
24381 65 : rtx_insn *ret = BB_END (e->src);
24382 65 : if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24383 : {
24384 62 : int insn_count = ix86_count_insn (e->src);
24385 :
24386 : /* Pad short function. */
24387 62 : if (insn_count < 4)
24388 : {
24389 : rtx_insn *insn = ret;
24390 :
24391 : /* Find epilogue. */
24392 : while (insn
24393 60 : && (!NOTE_P (insn)
24394 26 : || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24395 37 : insn = PREV_INSN (insn);
24396 :
24397 23 : if (!insn)
24398 0 : insn = ret;
24399 :
24400 : /* Two NOPs count as one instruction. */
24401 23 : insn_count = 2 * (4 - insn_count);
24402 23 : emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24403 : }
24404 : }
24405 : }
24406 63 : }
24407 :
24408 : /* Fix up a Windows system unwinder issue. If an EH region falls through into
24409 : the epilogue, the Windows system unwinder will apply epilogue logic and
24410 : produce incorrect offsets. This can be avoided by adding a nop between
24411 : the last insn that can throw and the first insn of the epilogue. */
24412 :
24413 : static void
24414 0 : ix86_seh_fixup_eh_fallthru (void)
24415 : {
24416 0 : edge e;
24417 0 : edge_iterator ei;
24418 :
24419 0 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24420 : {
24421 0 : rtx_insn *insn, *next;
24422 :
24423 : /* Find the beginning of the epilogue. */
24424 0 : for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24425 0 : if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24426 : break;
24427 0 : if (insn == NULL)
24428 0 : continue;
24429 :
24430 : /* We only care about preceding insns that can throw. */
24431 0 : insn = prev_active_insn (insn);
24432 0 : if (insn == NULL || !can_throw_internal (insn))
24433 0 : continue;
24434 :
24435 : /* Do not separate calls from their debug information. */
24436 0 : for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
24437 0 : if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24438 0 : insn = next;
24439 : else
24440 : break;
24441 :
24442 0 : emit_insn_after (gen_nops (const1_rtx), insn);
24443 : }
24444 0 : }
24445 : /* Split vector load from parm_decl to elemental loads to avoid STLF
24446 : stalls. */
24447 : static void
24448 976823 : ix86_split_stlf_stall_load ()
24449 : {
24450 976823 : rtx_insn* insn, *start = get_insns ();
24451 976823 : unsigned window = 0;
24452 :
24453 26891072 : for (insn = start; insn; insn = NEXT_INSN (insn))
24454 : {
24455 26890228 : if (!NONDEBUG_INSN_P (insn))
24456 15255077 : continue;
24457 11635151 : window++;
24458 : /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24459 : other, just emulate for pipeline) before stalled load, stlf stall
24460 : case is as fast as no stall cases on CLX.
24461 : Since CFG is freed before machine_reorg, just do a rough
24462 : calculation of the window according to the layout. */
24463 11635151 : if (window > (unsigned) x86_stlf_window_ninsns)
24464 : return;
24465 :
24466 11617289 : if (any_uncondjump_p (insn)
24467 11581616 : || ANY_RETURN_P (PATTERN (insn))
24468 22824605 : || CALL_P (insn))
24469 : return;
24470 :
24471 10659172 : rtx set = single_set (insn);
24472 10659172 : if (!set)
24473 432763 : continue;
24474 10226409 : rtx src = SET_SRC (set);
24475 20452472 : if (!MEM_P (src)
24476 : /* Only handle V2DFmode load since it doesn't need any scratch
24477 : register. */
24478 1459497 : || GET_MODE (src) != E_V2DFmode
24479 5400 : || !MEM_EXPR (src)
24480 10230323 : || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24481 10226063 : continue;
24482 :
24483 346 : rtx zero = CONST0_RTX (V2DFmode);
24484 346 : rtx dest = SET_DEST (set);
24485 346 : rtx m = adjust_address (src, DFmode, 0);
24486 346 : rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24487 346 : emit_insn_before (loadlpd, insn);
24488 346 : m = adjust_address (src, DFmode, 8);
24489 346 : rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24490 346 : if (dump_file && (dump_flags & TDF_DETAILS))
24491 : {
24492 0 : fputs ("Due to potential STLF stall, split instruction:\n",
24493 : dump_file);
24494 0 : print_rtl_single (dump_file, insn);
24495 0 : fputs ("To:\n", dump_file);
24496 0 : print_rtl_single (dump_file, loadlpd);
24497 0 : print_rtl_single (dump_file, loadhpd);
24498 : }
24499 346 : PATTERN (insn) = loadhpd;
24500 346 : INSN_CODE (insn) = -1;
24501 346 : gcc_assert (recog_memoized (insn) != -1);
24502 : }
24503 : }
24504 :
24505 : /* Implement machine specific optimizations. We implement padding of returns
24506 : for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24507 : static void
24508 1471363 : ix86_reorg (void)
24509 : {
24510 : /* We are freeing block_for_insn in the toplev to keep compatibility
24511 : with old MDEP_REORGS that are not CFG based. Recompute it now. */
24512 1471363 : compute_bb_for_insn ();
24513 :
24514 1471363 : if (TARGET_SEH && current_function_has_exception_handlers ())
24515 : ix86_seh_fixup_eh_fallthru ();
24516 :
24517 1471363 : if (optimize && optimize_function_for_speed_p (cfun))
24518 : {
24519 979123 : if (TARGET_SSE2)
24520 976823 : ix86_split_stlf_stall_load ();
24521 979123 : if (TARGET_PAD_SHORT_FUNCTION)
24522 63 : ix86_pad_short_function ();
24523 979060 : else if (TARGET_PAD_RETURNS)
24524 45144 : ix86_pad_returns ();
24525 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24526 979123 : if (TARGET_FOUR_JUMP_LIMIT)
24527 45424 : ix86_avoid_jump_mispredicts ();
24528 : #endif
24529 : }
24530 1471363 : }
24531 :
24532 : /* Return nonzero when QImode register that must be represented via REX prefix
24533 : is used. */
24534 : bool
24535 9173928 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24536 : {
24537 9173928 : int i;
24538 9173928 : extract_insn_cached (insn);
24539 34734796 : for (i = 0; i < recog_data.n_operands; i++)
24540 4784935 : if (GENERAL_REG_P (recog_data.operand[i])
24541 22822765 : && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24542 : return true;
24543 : return false;
24544 : }
24545 :
24546 : /* Return true when INSN mentions register that must be encoded using REX
24547 : prefix. */
24548 : bool
24549 195932865 : x86_extended_reg_mentioned_p (rtx insn)
24550 : {
24551 195932865 : subrtx_iterator::array_type array;
24552 1026070465 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24553 : {
24554 878814833 : const_rtx x = *iter;
24555 878814833 : if (REG_P (x)
24556 878814833 : && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24557 252281852 : || REX2_INT_REGNO_P (REGNO (x))))
24558 48677233 : return true;
24559 : }
24560 147255632 : return false;
24561 195932865 : }
24562 :
24563 : /* Return true when INSN mentions register that must be encoded using REX2
24564 : prefix. */
24565 : bool
24566 2092856 : x86_extended_rex2reg_mentioned_p (rtx insn)
24567 : {
24568 2092856 : subrtx_iterator::array_type array;
24569 9742564 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24570 : {
24571 7650379 : const_rtx x = *iter;
24572 7650379 : if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24573 671 : return true;
24574 : }
24575 2092185 : return false;
24576 2092856 : }
24577 :
24578 : /* Return true when rtx operands mentions register that must be encoded using
24579 : evex prefix. */
24580 : bool
24581 10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
24582 : {
24583 10 : int i;
24584 28 : for (i = 0; i < nops; i++)
24585 22 : if (EXT_REX_SSE_REG_P (operands[i])
24586 40 : || x86_extended_rex2reg_mentioned_p (operands[i]))
24587 4 : return true;
24588 : return false;
24589 : }
24590 :
24591 : /* If profitable, negate (without causing overflow) integer constant
24592 : of mode MODE at location LOC. Return true in this case. */
24593 : bool
24594 5880888 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24595 : {
24596 5880888 : HOST_WIDE_INT val;
24597 :
24598 5880888 : if (!CONST_INT_P (*loc))
24599 : return false;
24600 :
24601 4963509 : switch (mode)
24602 : {
24603 2812825 : case E_DImode:
24604 : /* DImode x86_64 constants must fit in 32 bits. */
24605 2812825 : gcc_assert (x86_64_immediate_operand (*loc, mode));
24606 :
24607 : mode = SImode;
24608 : break;
24609 :
24610 : case E_SImode:
24611 : case E_HImode:
24612 : case E_QImode:
24613 : break;
24614 :
24615 0 : default:
24616 0 : gcc_unreachable ();
24617 : }
24618 :
24619 : /* Avoid overflows. */
24620 4963509 : if (mode_signbit_p (mode, *loc))
24621 : return false;
24622 :
24623 4962996 : val = INTVAL (*loc);
24624 :
24625 : /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24626 : Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24627 4962996 : if ((val < 0 && val != -128)
24628 3264452 : || val == 128)
24629 : {
24630 1709676 : *loc = GEN_INT (-val);
24631 1709676 : return true;
24632 : }
24633 :
24634 : return false;
24635 : }
24636 :
24637 : /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24638 : optabs would emit if we didn't have TFmode patterns. */
24639 :
24640 : void
24641 4497 : x86_emit_floatuns (rtx operands[2])
24642 : {
24643 4497 : rtx_code_label *neglab, *donelab;
24644 4497 : rtx i0, i1, f0, in, out;
24645 4497 : machine_mode mode, inmode;
24646 :
24647 4497 : inmode = GET_MODE (operands[1]);
24648 4497 : gcc_assert (inmode == SImode || inmode == DImode);
24649 :
24650 4497 : out = operands[0];
24651 4497 : in = force_reg (inmode, operands[1]);
24652 4497 : mode = GET_MODE (out);
24653 4497 : neglab = gen_label_rtx ();
24654 4497 : donelab = gen_label_rtx ();
24655 4497 : f0 = gen_reg_rtx (mode);
24656 :
24657 4497 : emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24658 :
24659 4497 : expand_float (out, in, 0);
24660 :
24661 4497 : emit_jump_insn (gen_jump (donelab));
24662 4497 : emit_barrier ();
24663 :
24664 4497 : emit_label (neglab);
24665 :
24666 4497 : i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24667 : 1, OPTAB_DIRECT);
24668 4497 : i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24669 : 1, OPTAB_DIRECT);
24670 4497 : i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24671 :
24672 4497 : expand_float (f0, i0, 0);
24673 :
24674 4497 : emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24675 :
24676 4497 : emit_label (donelab);
24677 4497 : }
24678 :
24679 : /* Return the diagnostic message string if conversion from FROMTYPE to
24680 : TOTYPE is not allowed, NULL otherwise. */
24681 :
24682 : static const char *
24683 1198959623 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24684 : {
24685 1198959623 : machine_mode from_mode = element_mode (fromtype);
24686 1198959623 : machine_mode to_mode = element_mode (totype);
24687 :
24688 1198959623 : if (!TARGET_SSE2 && from_mode != to_mode)
24689 : {
24690 : /* Do no allow conversions to/from BFmode/HFmode scalar types
24691 : when TARGET_SSE2 is not available. */
24692 466880 : if (from_mode == BFmode)
24693 : return N_("invalid conversion from type %<__bf16%> "
24694 : "without option %<-msse2%>");
24695 466879 : if (from_mode == HFmode)
24696 : return N_("invalid conversion from type %<_Float16%> "
24697 : "without option %<-msse2%>");
24698 466879 : if (to_mode == BFmode)
24699 : return N_("invalid conversion to type %<__bf16%> "
24700 : "without option %<-msse2%>");
24701 466879 : if (to_mode == HFmode)
24702 : return N_("invalid conversion to type %<_Float16%> "
24703 : "without option %<-msse2%>");
24704 : }
24705 :
24706 : /* Warn for silent implicit conversion between __bf16 and short,
24707 : since __bfloat16 is refined as real __bf16 instead of short
24708 : since GCC13. */
24709 1198959621 : if (element_mode (fromtype) != element_mode (totype)
24710 1198959621 : && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24711 : {
24712 : /* Warn for silent implicit conversion where user may expect
24713 : a bitcast. */
24714 7716257 : if ((TYPE_MODE (fromtype) == BFmode
24715 279 : && TYPE_MODE (totype) == HImode)
24716 7716535 : || (TYPE_MODE (totype) == BFmode
24717 423 : && TYPE_MODE (fromtype) == HImode))
24718 1 : warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24719 : "to real %<__bf16%> since GCC 13.1, be careful of "
24720 : "implicit conversion between %<__bf16%> and %<short%>; "
24721 : "an explicit bitcast may be needed here");
24722 : }
24723 :
24724 : /* Conversion allowed. */
24725 : return NULL;
24726 : }
24727 :
24728 : /* Return the diagnostic message string if the unary operation OP is
24729 : not permitted on TYPE, NULL otherwise. */
24730 :
24731 : static const char *
24732 94000765 : ix86_invalid_unary_op (int op, const_tree type)
24733 : {
24734 94000765 : machine_mode mmode = element_mode (type);
24735 : /* Reject all single-operand operations on BFmode/HFmode except for &
24736 : when TARGET_SSE2 is not available. */
24737 94000765 : if (!TARGET_SSE2 && op != ADDR_EXPR)
24738 : {
24739 110834 : if (mmode == BFmode)
24740 : return N_("operation not permitted on type %<__bf16%> "
24741 : "without option %<-msse2%>");
24742 110834 : if (mmode == HFmode)
24743 0 : return N_("operation not permitted on type %<_Float16%> "
24744 : "without option %<-msse2%>");
24745 : }
24746 :
24747 : /* Operation allowed. */
24748 : return NULL;
24749 : }
24750 :
24751 : /* Return the diagnostic message string if the binary operation OP is
24752 : not permitted on TYPE1 and TYPE2, NULL otherwise. */
24753 :
24754 : static const char *
24755 177355832 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
24756 : const_tree type2)
24757 : {
24758 177355832 : machine_mode type1_mode = element_mode (type1);
24759 177355832 : machine_mode type2_mode = element_mode (type2);
24760 : /* Reject all 2-operand operations on BFmode or HFmode
24761 : when TARGET_SSE2 is not available. */
24762 177355832 : if (!TARGET_SSE2)
24763 : {
24764 1006455 : if (type1_mode == BFmode || type2_mode == BFmode)
24765 : return N_("operation not permitted on type %<__bf16%> "
24766 : "without option %<-msse2%>");
24767 :
24768 1006455 : if (type1_mode == HFmode || type2_mode == HFmode)
24769 0 : return N_("operation not permitted on type %<_Float16%> "
24770 : "without option %<-msse2%>");
24771 : }
24772 :
24773 : /* Operation allowed. */
24774 : return NULL;
24775 : }
24776 :
24777 :
24778 : /* Target hook for scalar_mode_supported_p. */
24779 : static bool
24780 4481565 : ix86_scalar_mode_supported_p (scalar_mode mode)
24781 : {
24782 4481565 : if (DECIMAL_FLOAT_MODE_P (mode))
24783 625545 : return default_decimal_float_supported_p ();
24784 3856020 : else if (mode == TFmode)
24785 : return true;
24786 3536841 : else if (mode == HFmode || mode == BFmode)
24787 : return true;
24788 : else
24789 2900466 : return default_scalar_mode_supported_p (mode);
24790 : }
24791 :
24792 : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
24793 : if MODE is HFmode, and punt to the generic implementation otherwise. */
24794 :
24795 : static bool
24796 2191694 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24797 : {
24798 : /* NB: Always return TRUE for HFmode so that the _Float16 type will
24799 : be defined by the C front-end for AVX512FP16 intrinsics. We will
24800 : issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
24801 : enabled. */
24802 1873982 : return ((mode == HFmode || mode == BFmode)
24803 3747964 : ? true
24804 1556270 : : default_libgcc_floating_mode_supported_p (mode));
24805 : }
24806 :
24807 : /* Implements target hook vector_mode_supported_p. */
24808 : static bool
24809 1317468053 : ix86_vector_mode_supported_p (machine_mode mode)
24810 : {
24811 : /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
24812 : either. */
24813 1453057104 : if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
24814 : return false;
24815 1317467603 : if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24816 : return true;
24817 1109751324 : if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24818 : return true;
24819 495951132 : if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
24820 : return true;
24821 356907897 : if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
24822 : return true;
24823 222983928 : if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
24824 222927241 : && VALID_MMX_REG_MODE (mode))
24825 : return true;
24826 31660867 : if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
24827 31029603 : && VALID_MMX_REG_MODE_3DNOW (mode))
24828 : return true;
24829 22137744 : if (mode == V2QImode)
24830 22622 : return true;
24831 : return false;
24832 : }
24833 :
24834 : /* Target hook for c_mode_for_suffix. */
24835 : static machine_mode
24836 193204 : ix86_c_mode_for_suffix (char suffix)
24837 : {
24838 193204 : if (suffix == 'q')
24839 : return TFmode;
24840 37 : if (suffix == 'w')
24841 : return XFmode;
24842 :
24843 0 : return VOIDmode;
24844 : }
24845 :
24846 : /* Helper function to map common constraints to non-EGPR ones.
24847 : All related constraints have h prefix, and h plus Upper letter
24848 : means the constraint is strictly EGPR enabled, while h plus
24849 : lower letter indicates the constraint is strictly gpr16 only.
24850 :
24851 : Specially for "g" constraint, split it to rmi as there is
24852 : no corresponding general constraint define for backend.
24853 :
24854 : Here is the full list to map constraints that may involve
24855 : gpr to h prefixed.
24856 :
24857 : "g" -> "jrjmi"
24858 : "r" -> "jr"
24859 : "m" -> "jm"
24860 : "<" -> "j<"
24861 : ">" -> "j>"
24862 : "o" -> "jo"
24863 : "V" -> "jV"
24864 : "p" -> "jp"
24865 : "Bm" -> "ja"
24866 : */
24867 :
24868 43 : static void map_egpr_constraints (vec<const char *> &constraints)
24869 : {
24870 53 : for (size_t i = 0; i < constraints.length(); i++)
24871 : {
24872 10 : const char *cur = constraints[i];
24873 :
24874 10 : if (startswith (cur, "=@cc"))
24875 0 : continue;
24876 :
24877 10 : int len = strlen (cur);
24878 10 : auto_vec<char> buf;
24879 :
24880 24 : for (int j = 0; j < len; j++)
24881 : {
24882 14 : switch (cur[j])
24883 : {
24884 2 : case 'g':
24885 2 : buf.safe_push ('j');
24886 2 : buf.safe_push ('r');
24887 2 : buf.safe_push ('j');
24888 2 : buf.safe_push ('m');
24889 2 : buf.safe_push ('i');
24890 2 : break;
24891 8 : case 'r':
24892 8 : case 'm':
24893 8 : case '<':
24894 8 : case '>':
24895 8 : case 'o':
24896 8 : case 'V':
24897 8 : case 'p':
24898 8 : buf.safe_push ('j');
24899 8 : buf.safe_push (cur[j]);
24900 8 : break;
24901 0 : case 'B':
24902 0 : if (cur[j + 1] == 'm')
24903 : {
24904 0 : buf.safe_push ('j');
24905 0 : buf.safe_push ('a');
24906 0 : j++;
24907 : }
24908 : else
24909 : {
24910 0 : buf.safe_push (cur[j]);
24911 0 : buf.safe_push (cur[j + 1]);
24912 0 : j++;
24913 : }
24914 : break;
24915 0 : case 'T':
24916 0 : case 'Y':
24917 0 : case 'W':
24918 0 : case 'j':
24919 0 : buf.safe_push (cur[j]);
24920 0 : buf.safe_push (cur[j + 1]);
24921 0 : j++;
24922 0 : break;
24923 0 : case '{':
24924 0 : do
24925 : {
24926 0 : buf.safe_push (cur[j]);
24927 0 : } while (cur[j++] != '}');
24928 : break;
24929 4 : default:
24930 4 : buf.safe_push (cur[j]);
24931 4 : break;
24932 : }
24933 : }
24934 10 : buf.safe_push ('\0');
24935 20 : constraints[i] = xstrdup (buf.address ());
24936 10 : }
24937 43 : }
24938 :
24939 : /* Worker function for TARGET_MD_ASM_ADJUST.
24940 :
24941 : We implement asm flag outputs, and maintain source compatibility
24942 : with the old cc0-based compiler. */
24943 :
24944 : static rtx_insn *
24945 107583 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
24946 : vec<machine_mode> & /*input_modes*/,
24947 : vec<const char *> &constraints, vec<rtx> &/*uses*/,
24948 : vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
24949 : location_t loc)
24950 : {
24951 107583 : bool saw_asm_flag = false;
24952 :
24953 107583 : start_sequence ();
24954 :
24955 107583 : if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
24956 43 : map_egpr_constraints (constraints);
24957 :
24958 289674 : for (unsigned i = 0, n = outputs.length (); i < n; ++i)
24959 : {
24960 75344 : const char *con = constraints[i];
24961 75344 : if (!startswith (con, "=@cc"))
24962 75256 : continue;
24963 88 : con += 4;
24964 88 : if (strchr (con, ',') != NULL)
24965 : {
24966 1 : error_at (loc, "alternatives not allowed in %<asm%> flag output");
24967 1 : continue;
24968 : }
24969 :
24970 87 : bool invert = false;
24971 87 : if (con[0] == 'n')
24972 19 : invert = true, con++;
24973 :
24974 87 : machine_mode mode = CCmode;
24975 87 : rtx_code code = UNKNOWN;
24976 :
24977 87 : switch (con[0])
24978 : {
24979 15 : case 'a':
24980 15 : if (con[1] == 0)
24981 : mode = CCAmode, code = EQ;
24982 4 : else if (con[1] == 'e' && con[2] == 0)
24983 : mode = CCCmode, code = NE;
24984 : break;
24985 11 : case 'b':
24986 11 : if (con[1] == 0)
24987 : mode = CCCmode, code = EQ;
24988 6 : else if (con[1] == 'e' && con[2] == 0)
24989 : mode = CCAmode, code = NE;
24990 : break;
24991 14 : case 'c':
24992 14 : if (con[1] == 0)
24993 : mode = CCCmode, code = EQ;
24994 : break;
24995 8 : case 'e':
24996 8 : if (con[1] == 0)
24997 : mode = CCZmode, code = EQ;
24998 : break;
24999 11 : case 'g':
25000 11 : if (con[1] == 0)
25001 : mode = CCGCmode, code = GT;
25002 5 : else if (con[1] == 'e' && con[2] == 0)
25003 : mode = CCGCmode, code = GE;
25004 : break;
25005 10 : case 'l':
25006 10 : if (con[1] == 0)
25007 : mode = CCGCmode, code = LT;
25008 5 : else if (con[1] == 'e' && con[2] == 0)
25009 : mode = CCGCmode, code = LE;
25010 : break;
25011 4 : case 'o':
25012 4 : if (con[1] == 0)
25013 : mode = CCOmode, code = EQ;
25014 : break;
25015 4 : case 'p':
25016 4 : if (con[1] == 0)
25017 : mode = CCPmode, code = EQ;
25018 : break;
25019 4 : case 's':
25020 4 : if (con[1] == 0)
25021 : mode = CCSmode, code = EQ;
25022 : break;
25023 6 : case 'z':
25024 6 : if (con[1] == 0)
25025 : mode = CCZmode, code = EQ;
25026 : break;
25027 : }
25028 1 : if (code == UNKNOWN)
25029 : {
25030 1 : error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
25031 1 : continue;
25032 : }
25033 86 : if (invert)
25034 19 : code = reverse_condition (code);
25035 :
25036 86 : rtx dest = outputs[i];
25037 86 : if (!saw_asm_flag)
25038 : {
25039 : /* This is the first asm flag output. Here we put the flags
25040 : register in as the real output and adjust the condition to
25041 : allow it. */
25042 75 : constraints[i] = "=Bf";
25043 75 : outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
25044 75 : saw_asm_flag = true;
25045 : }
25046 : else
25047 : {
25048 : /* We don't need the flags register as output twice. */
25049 11 : constraints[i] = "=X";
25050 11 : outputs[i] = gen_rtx_SCRATCH (SImode);
25051 : }
25052 :
25053 86 : rtx x = gen_rtx_REG (mode, FLAGS_REG);
25054 86 : x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
25055 :
25056 86 : machine_mode dest_mode = GET_MODE (dest);
25057 86 : if (!SCALAR_INT_MODE_P (dest_mode))
25058 : {
25059 3 : error_at (loc, "invalid type for %<asm%> flag output");
25060 3 : continue;
25061 : }
25062 :
25063 83 : if (dest_mode == QImode)
25064 73 : emit_insn (gen_rtx_SET (dest, x));
25065 : else
25066 : {
25067 10 : rtx reg = gen_reg_rtx (QImode);
25068 10 : emit_insn (gen_rtx_SET (reg, x));
25069 :
25070 10 : reg = convert_to_mode (dest_mode, reg, 1);
25071 10 : emit_move_insn (dest, reg);
25072 : }
25073 : }
25074 :
25075 107583 : rtx_insn *seq = end_sequence ();
25076 :
25077 107583 : if (saw_asm_flag)
25078 : return seq;
25079 : else
25080 : {
25081 : /* If we had no asm flag outputs, clobber the flags. */
25082 107508 : clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
25083 107508 : SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
25084 107508 : return NULL;
25085 : }
25086 : }
25087 :
25088 : /* Implements target vector targetm.asm.encode_section_info. */
25089 :
25090 : static void ATTRIBUTE_UNUSED
25091 9846721 : ix86_encode_section_info (tree decl, rtx rtl, int first)
25092 : {
25093 9846721 : default_encode_section_info (decl, rtl, first);
25094 :
25095 9846721 : if (ix86_in_large_data_p (decl))
25096 32 : SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25097 9846721 : }
25098 :
25099 : /* Worker function for REVERSE_CONDITION. */
25100 :
25101 : enum rtx_code
25102 31890921 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
25103 : {
25104 31890921 : return (mode == CCFPmode
25105 31890921 : ? reverse_condition_maybe_unordered (code)
25106 27538384 : : reverse_condition (code));
25107 : }
25108 :
25109 : /* Output code to perform an x87 FP register move, from OPERANDS[1]
25110 : to OPERANDS[0]. */
25111 :
25112 : const char *
25113 651709 : output_387_reg_move (rtx_insn *insn, rtx *operands)
25114 : {
25115 651709 : if (REG_P (operands[0]))
25116 : {
25117 544769 : if (REG_P (operands[1])
25118 544769 : && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25119 : {
25120 296822 : if (REGNO (operands[0]) == FIRST_STACK_REG)
25121 276208 : return output_387_ffreep (operands, 0);
25122 : return "fstp\t%y0";
25123 : }
25124 247947 : if (STACK_TOP_P (operands[0]))
25125 247947 : return "fld%Z1\t%y1";
25126 : return "fst\t%y0";
25127 : }
25128 106940 : else if (MEM_P (operands[0]))
25129 : {
25130 106940 : gcc_assert (REG_P (operands[1]));
25131 106940 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25132 : return "fstp%Z0\t%y0";
25133 : else
25134 : {
25135 : /* There is no non-popping store to memory for XFmode.
25136 : So if we need one, follow the store with a load. */
25137 8381 : if (GET_MODE (operands[0]) == XFmode)
25138 : return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
25139 : else
25140 1873 : return "fst%Z0\t%y0";
25141 : }
25142 : }
25143 : else
25144 0 : gcc_unreachable();
25145 : }
25146 : #ifdef TARGET_SOLARIS
25147 : /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25148 :
25149 : static void
25150 : i386_solaris_elf_named_section (const char *name, unsigned int flags,
25151 : tree decl)
25152 : {
25153 : /* With Binutils 2.15, the "@unwind" marker must be specified on
25154 : every occurrence of the ".eh_frame" section, not just the first
25155 : one. */
25156 : if (TARGET_64BIT
25157 : && strcmp (name, ".eh_frame") == 0)
25158 : {
25159 : fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25160 : flags & SECTION_WRITE ? "aw" : "a");
25161 : return;
25162 : }
25163 :
25164 : #if !HAVE_GNU_AS
25165 : if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
25166 : {
25167 : solaris_elf_asm_comdat_section (name, flags, decl);
25168 : return;
25169 : }
25170 :
25171 : /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
25172 : SPARC assembler. One cannot mix single-letter flags and #exclude, so
25173 : only emit the latter here. */
25174 : if (flags & SECTION_EXCLUDE)
25175 : {
25176 : fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
25177 : return;
25178 : }
25179 : #endif
25180 :
25181 : default_elf_asm_named_section (name, flags, decl);
25182 : }
25183 : #endif /* TARGET_SOLARIS */
25184 :
25185 : /* Return the mangling of TYPE if it is an extended fundamental type. */
25186 :
25187 : static const char *
25188 1142360869 : ix86_mangle_type (const_tree type)
25189 : {
25190 1142360869 : type = TYPE_MAIN_VARIANT (type);
25191 :
25192 1142360869 : if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25193 : && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25194 : return NULL;
25195 :
25196 614132071 : if (type == float128_type_node || type == float64x_type_node)
25197 : return NULL;
25198 :
25199 613468103 : switch (TYPE_MODE (type))
25200 : {
25201 : case E_BFmode:
25202 : return "DF16b";
25203 276749 : case E_HFmode:
25204 : /* _Float16 is "DF16_".
25205 : Align with clang's decision in https://reviews.llvm.org/D33719. */
25206 276749 : return "DF16_";
25207 1158433 : case E_TFmode:
25208 : /* __float128 is "g". */
25209 1158433 : return "g";
25210 8296450 : case E_XFmode:
25211 : /* "long double" or __float80 is "e". */
25212 8296450 : return "e";
25213 : default:
25214 : return NULL;
25215 : }
25216 : }
25217 :
25218 : /* Create C++ tinfo symbols for only conditionally available fundamental
25219 : types. */
25220 :
25221 : static void
25222 5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
25223 : {
25224 5 : extern tree ix86_float16_type_node;
25225 5 : extern tree ix86_bf16_type_node;
25226 :
25227 5 : if (!TARGET_SSE2)
25228 : {
25229 0 : if (!float16_type_node)
25230 0 : float16_type_node = ix86_float16_type_node;
25231 0 : if (!bfloat16_type_node)
25232 0 : bfloat16_type_node = ix86_bf16_type_node;
25233 0 : callback (float16_type_node);
25234 0 : callback (bfloat16_type_node);
25235 0 : float16_type_node = NULL_TREE;
25236 0 : bfloat16_type_node = NULL_TREE;
25237 : }
25238 5 : }
25239 :
25240 : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
25241 :
25242 : static tree
25243 238 : ix86_stack_protect_guard (void)
25244 : {
25245 238 : if (TARGET_SSP_TLS_GUARD)
25246 : {
25247 235 : tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
25248 235 : int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
25249 235 : tree type = build_qualified_type (type_node, qual);
25250 235 : tree t;
25251 :
25252 235 : if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25253 : {
25254 1 : t = ix86_tls_stack_chk_guard_decl;
25255 :
25256 1 : if (t == NULL)
25257 : {
25258 1 : rtx x;
25259 :
25260 1 : t = build_decl
25261 1 : (UNKNOWN_LOCATION, VAR_DECL,
25262 : get_identifier (ix86_stack_protector_guard_symbol_str),
25263 : type);
25264 1 : TREE_STATIC (t) = 1;
25265 1 : TREE_PUBLIC (t) = 1;
25266 1 : DECL_EXTERNAL (t) = 1;
25267 1 : TREE_USED (t) = 1;
25268 1 : TREE_THIS_VOLATILE (t) = 1;
25269 1 : DECL_ARTIFICIAL (t) = 1;
25270 1 : DECL_IGNORED_P (t) = 1;
25271 :
25272 : /* Do not share RTL as the declaration is visible outside of
25273 : current function. */
25274 1 : x = DECL_RTL (t);
25275 1 : RTX_FLAG (x, used) = 1;
25276 :
25277 1 : ix86_tls_stack_chk_guard_decl = t;
25278 : }
25279 : }
25280 : else
25281 : {
25282 234 : tree asptrtype = build_pointer_type (type);
25283 :
25284 234 : t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25285 234 : t = build2 (MEM_REF, asptrtype, t,
25286 : build_int_cst (asptrtype, 0));
25287 234 : TREE_THIS_VOLATILE (t) = 1;
25288 : }
25289 :
25290 235 : return t;
25291 : }
25292 :
25293 3 : return default_stack_protect_guard ();
25294 : }
25295 :
25296 : static bool
25297 743 : ix86_stack_protect_runtime_enabled_p (void)
25298 : {
25299 : /* Naked functions should not enable stack protector. */
25300 743 : return !ix86_function_naked (current_function_decl);
25301 : }
25302 :
25303 : /* For 32-bit code we can save PIC register setup by using
25304 : __stack_chk_fail_local hidden function instead of calling
25305 : __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25306 : register, so it is better to call __stack_chk_fail directly. */
25307 :
25308 : static tree ATTRIBUTE_UNUSED
25309 264 : ix86_stack_protect_fail (void)
25310 : {
25311 264 : return TARGET_64BIT
25312 264 : ? default_external_stack_protect_fail ()
25313 1 : : default_hidden_stack_protect_fail ();
25314 : }
25315 :
25316 : /* Select a format to encode pointers in exception handling data. CODE
25317 : is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25318 : true if the symbol may be affected by dynamic relocations.
25319 :
25320 : ??? All x86 object file formats are capable of representing this.
25321 : After all, the relocation needed is the same as for the call insn.
25322 : Whether or not a particular assembler allows us to enter such, I
25323 : guess we'll have to see. */
25324 :
25325 : int
25326 780597 : asm_preferred_eh_data_format (int code, int global)
25327 : {
25328 : /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25329 780597 : if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25330 : {
25331 38401 : int type = DW_EH_PE_sdata8;
25332 38401 : if (ptr_mode == SImode
25333 24553 : || ix86_cmodel == CM_SMALL_PIC
25334 38485 : || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25335 : type = DW_EH_PE_sdata4;
25336 53789 : return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25337 : }
25338 :
25339 742196 : if (ix86_cmodel == CM_SMALL
25340 18676 : || (ix86_cmodel == CM_MEDIUM && code))
25341 723531 : return DW_EH_PE_udata4;
25342 :
25343 : return DW_EH_PE_absptr;
25344 : }
25345 :
25346 : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
25347 : from ix86_vector_costs::add_stmt_cost. */
25348 : static int
25349 14943338 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
25350 : machine_mode mode)
25351 : {
25352 14943338 : bool fp = FLOAT_MODE_P (mode);
25353 14943338 : int index;
25354 14943338 : switch (type_of_cost)
25355 : {
25356 2202081 : case scalar_stmt:
25357 2202081 : return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25358 :
25359 2085810 : case scalar_load:
25360 : /* load/store costs are relative to register move which is 2. Recompute
25361 : it to COSTS_N_INSNS so everything have same base. */
25362 4171620 : return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25363 2085810 : : ix86_cost->int_load [2]) / 2;
25364 :
25365 3855910 : case scalar_store:
25366 7711820 : return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25367 3855910 : : ix86_cost->int_store [2]) / 2;
25368 :
25369 973315 : case vector_stmt:
25370 1946630 : return ix86_vec_cost (mode,
25371 1946630 : fp ? ix86_cost->addss : ix86_cost->sse_op);
25372 :
25373 1720777 : case vector_load:
25374 1720777 : index = sse_store_index (mode);
25375 : /* See PR82713 - we may end up being called on non-vector type. */
25376 1720777 : if (index < 0)
25377 97717 : index = 2;
25378 1720777 : return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25379 :
25380 868991 : case vector_store:
25381 868991 : index = sse_store_index (mode);
25382 : /* See PR82713 - we may end up being called on non-vector type. */
25383 868991 : if (index < 0)
25384 89521 : index = 2;
25385 868991 : return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25386 :
25387 748176 : case vec_to_scalar:
25388 748176 : case scalar_to_vec:
25389 748176 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25390 :
25391 : /* We should have separate costs for unaligned loads and gather/scatter.
25392 : Do that incrementally. */
25393 392101 : case unaligned_load:
25394 392101 : index = sse_store_index (mode);
25395 : /* See PR82713 - we may end up being called on non-vector type. */
25396 392101 : if (index < 0)
25397 2663 : index = 2;
25398 392101 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25399 :
25400 785494 : case unaligned_store:
25401 785494 : index = sse_store_index (mode);
25402 : /* See PR82713 - we may end up being called on non-vector type. */
25403 785494 : if (index < 0)
25404 16522 : index = 2;
25405 785494 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25406 :
25407 0 : case vector_gather_load:
25408 0 : return ix86_vec_cost (mode,
25409 0 : COSTS_N_INSNS
25410 : (ix86_cost->gather_static
25411 : + ix86_cost->gather_per_elt
25412 0 : * GET_MODE_NUNITS (mode)) / 2);
25413 :
25414 0 : case vector_scatter_store:
25415 0 : return ix86_vec_cost (mode,
25416 0 : COSTS_N_INSNS
25417 : (ix86_cost->scatter_static
25418 : + ix86_cost->scatter_per_elt
25419 0 : * GET_MODE_NUNITS (mode)) / 2);
25420 :
25421 275566 : case cond_branch_taken:
25422 275566 : return ix86_cost->cond_taken_branch_cost;
25423 :
25424 5510 : case cond_branch_not_taken:
25425 5510 : return ix86_cost->cond_not_taken_branch_cost;
25426 :
25427 245659 : case vec_perm:
25428 245659 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25429 :
25430 69790 : case vec_promote_demote:
25431 69790 : if (fp)
25432 7927 : return vec_fp_conversion_cost (ix86_tune_cost, mode);
25433 61863 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25434 :
25435 714158 : case vec_construct:
25436 714158 : {
25437 714158 : int n = GET_MODE_NUNITS (mode);
25438 : /* N - 1 element inserts into an SSE vector, the possible
25439 : GPR -> XMM move is accounted for in add_stmt_cost. */
25440 1428316 : if (GET_MODE_BITSIZE (mode) <= 128)
25441 707672 : return (n - 1) * ix86_cost->sse_op;
25442 : /* One vinserti128 for combining two SSE vectors for AVX256. */
25443 12972 : else if (GET_MODE_BITSIZE (mode) == 256)
25444 5214 : return ((n - 2) * ix86_cost->sse_op
25445 5214 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25446 : /* One vinserti64x4 and two vinserti128 for combining SSE
25447 : and AVX256 vectors to AVX512. */
25448 2544 : else if (GET_MODE_BITSIZE (mode) == 512)
25449 : {
25450 1272 : machine_mode half_mode
25451 1272 : = mode_for_vector (GET_MODE_INNER (mode),
25452 2544 : GET_MODE_NUNITS (mode) / 2).require ();
25453 1272 : return ((n - 4) * ix86_cost->sse_op
25454 1272 : + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
25455 1272 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25456 : }
25457 0 : gcc_unreachable ();
25458 : }
25459 :
25460 0 : default:
25461 0 : gcc_unreachable ();
25462 : }
25463 : }
25464 :
25465 : /* Implement targetm.vectorize.builtin_vectorization_cost. */
25466 : static int
25467 9353319 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25468 : tree vectype, int)
25469 : {
25470 9353319 : machine_mode mode = TImode;
25471 9353319 : if (vectype != NULL)
25472 7720621 : mode = TYPE_MODE (vectype);
25473 9353319 : return ix86_default_vector_cost (type_of_cost, mode);
25474 : }
25475 :
25476 :
25477 : /* This function returns the calling abi specific va_list type node.
25478 : It returns the FNDECL specific va_list type. */
25479 :
25480 : static tree
25481 47580 : ix86_fn_abi_va_list (tree fndecl)
25482 : {
25483 47580 : if (!TARGET_64BIT)
25484 726 : return va_list_type_node;
25485 46854 : gcc_assert (fndecl != NULL_TREE);
25486 :
25487 46854 : if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
25488 12868 : return ms_va_list_type_node;
25489 : else
25490 33986 : return sysv_va_list_type_node;
25491 : }
25492 :
25493 : /* Returns the canonical va_list type specified by TYPE. If there
25494 : is no valid TYPE provided, it return NULL_TREE. */
25495 :
25496 : static tree
25497 246448 : ix86_canonical_va_list_type (tree type)
25498 : {
25499 246448 : if (TARGET_64BIT)
25500 : {
25501 245946 : if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
25502 5944 : return ms_va_list_type_node;
25503 :
25504 240002 : if ((TREE_CODE (type) == ARRAY_TYPE
25505 49915 : && integer_zerop (array_type_nelts_minus_one (type)))
25506 240002 : || POINTER_TYPE_P (type))
25507 : {
25508 188165 : tree elem_type = TREE_TYPE (type);
25509 188165 : if (TREE_CODE (elem_type) == RECORD_TYPE
25510 339528 : && lookup_attribute ("sysv_abi va_list",
25511 151363 : TYPE_ATTRIBUTES (elem_type)))
25512 151363 : return sysv_va_list_type_node;
25513 : }
25514 :
25515 88639 : return NULL_TREE;
25516 : }
25517 :
25518 502 : return std_canonical_va_list_type (type);
25519 : }
25520 :
25521 : /* Iterate through the target-specific builtin types for va_list.
25522 : IDX denotes the iterator, *PTREE is set to the result type of
25523 : the va_list builtin, and *PNAME to its internal type.
25524 : Returns zero if there is no element for this index, otherwise
25525 : IDX should be increased upon the next call.
25526 : Note, do not iterate a base builtin's name like __builtin_va_list.
25527 : Used from c_common_nodes_and_builtins. */
25528 :
25529 : static int
25530 613743 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25531 : {
25532 613743 : if (TARGET_64BIT)
25533 : {
25534 608367 : switch (idx)
25535 : {
25536 : default:
25537 : break;
25538 :
25539 202789 : case 0:
25540 202789 : *ptree = ms_va_list_type_node;
25541 202789 : *pname = "__builtin_ms_va_list";
25542 202789 : return 1;
25543 :
25544 202789 : case 1:
25545 202789 : *ptree = sysv_va_list_type_node;
25546 202789 : *pname = "__builtin_sysv_va_list";
25547 202789 : return 1;
25548 : }
25549 : }
25550 :
25551 : return 0;
25552 : }
25553 :
25554 : #undef TARGET_SCHED_DISPATCH
25555 : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25556 : #undef TARGET_SCHED_DISPATCH_DO
25557 : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25558 : #undef TARGET_SCHED_REASSOCIATION_WIDTH
25559 : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25560 : #undef TARGET_SCHED_REORDER
25561 : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25562 : #undef TARGET_SCHED_ADJUST_PRIORITY
25563 : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25564 : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25565 : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25566 : ix86_dependencies_evaluation_hook
25567 :
25568 :
25569 : /* Implementation of reassociation_width target hook used by
25570 : reassoc phase to identify parallelism level in reassociated
25571 : tree. Statements tree_code is passed in OPC. Arguments type
25572 : is passed in MODE. */
25573 :
25574 : static int
25575 28644 : ix86_reassociation_width (unsigned int op, machine_mode mode)
25576 : {
25577 28644 : int width = 1;
25578 : /* Vector part. */
25579 28644 : if (VECTOR_MODE_P (mode))
25580 : {
25581 8357 : int div = 1;
25582 8357 : if (INTEGRAL_MODE_P (mode))
25583 2588 : width = ix86_cost->reassoc_vec_int;
25584 5769 : else if (FLOAT_MODE_P (mode))
25585 5769 : width = ix86_cost->reassoc_vec_fp;
25586 :
25587 8357 : if (width == 1)
25588 : return 1;
25589 :
25590 : /* Znver1-4 Integer vector instructions execute in FP unit
25591 : and can execute 3 additions and one multiplication per cycle. */
25592 8352 : if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25593 8352 : || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
25594 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25595 : return 1;
25596 : /* Znver5 can do 2 integer multiplications per cycle with latency
25597 : of 3. */
25598 8352 : if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
25599 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25600 8352 : width = 6;
25601 :
25602 : /* Account for targets that splits wide vectors into multiple parts. */
25603 8352 : if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25604 0 : div = GET_MODE_BITSIZE (mode) / 256;
25605 8352 : else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25606 0 : div = GET_MODE_BITSIZE (mode) / 128;
25607 8352 : else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25608 0 : div = GET_MODE_BITSIZE (mode) / 64;
25609 8352 : width = (width + div - 1) / div;
25610 8352 : }
25611 : /* Scalar part. */
25612 : else if (INTEGRAL_MODE_P (mode))
25613 14319 : width = ix86_cost->reassoc_int;
25614 : else if (FLOAT_MODE_P (mode))
25615 5968 : width = ix86_cost->reassoc_fp;
25616 :
25617 : /* Avoid using too many registers in 32bit mode. */
25618 28639 : if (!TARGET_64BIT && width > 2)
25619 28644 : width = 2;
25620 : return width;
25621 : }
25622 :
25623 : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
25624 : place emms and femms instructions. */
25625 :
25626 : static machine_mode
25627 5140216 : ix86_preferred_simd_mode (scalar_mode mode)
25628 : {
25629 5140216 : if (!TARGET_SSE)
25630 862 : return word_mode;
25631 :
25632 5139354 : switch (mode)
25633 : {
25634 416126 : case E_QImode:
25635 416126 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25636 : return V64QImode;
25637 405745 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25638 : return V32QImode;
25639 : else
25640 383912 : return V16QImode;
25641 :
25642 186711 : case E_HImode:
25643 186711 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25644 : return V32HImode;
25645 176103 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25646 : return V16HImode;
25647 : else
25648 159615 : return V8HImode;
25649 :
25650 1523146 : case E_SImode:
25651 1523146 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25652 : return V16SImode;
25653 1455909 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25654 : return V8SImode;
25655 : else
25656 1299334 : return V4SImode;
25657 :
25658 1860728 : case E_DImode:
25659 1860728 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25660 : return V8DImode;
25661 1456507 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25662 : return V4DImode;
25663 : else
25664 1394080 : return V2DImode;
25665 :
25666 142385 : case E_HFmode:
25667 142385 : if (TARGET_AVX512FP16)
25668 : {
25669 141639 : if (TARGET_AVX512VL)
25670 : {
25671 68628 : if (TARGET_PREFER_AVX128)
25672 : return V8HFmode;
25673 68398 : else if (TARGET_PREFER_AVX256)
25674 : return V16HFmode;
25675 : }
25676 139204 : return V32HFmode;
25677 : }
25678 746 : return word_mode;
25679 :
25680 63087 : case E_BFmode:
25681 63087 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25682 : return V32BFmode;
25683 26590 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25684 : return V16BFmode;
25685 : else
25686 13523 : return V8BFmode;
25687 :
25688 618603 : case E_SFmode:
25689 618603 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25690 : return V16SFmode;
25691 418022 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25692 : return V8SFmode;
25693 : else
25694 349874 : return V4SFmode;
25695 :
25696 292470 : case E_DFmode:
25697 292470 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25698 : return V8DFmode;
25699 170521 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25700 : return V4DFmode;
25701 116061 : else if (TARGET_SSE2)
25702 : return V2DFmode;
25703 : /* FALLTHRU */
25704 :
25705 36154 : default:
25706 36154 : return word_mode;
25707 : }
25708 : }
25709 :
25710 : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
25711 : vectors. If AVX512F is enabled then try vectorizing with 512bit,
25712 : 256bit and 128bit vectors. */
25713 :
25714 : static unsigned int
25715 2192778 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25716 : {
25717 2192778 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25718 : {
25719 74751 : modes->safe_push (V64QImode);
25720 74751 : modes->safe_push (V32QImode);
25721 74751 : modes->safe_push (V16QImode);
25722 : }
25723 2118027 : else if (TARGET_AVX512F && all)
25724 : {
25725 558 : modes->safe_push (V32QImode);
25726 558 : modes->safe_push (V16QImode);
25727 558 : modes->safe_push (V64QImode);
25728 : }
25729 2117469 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25730 : {
25731 28693 : modes->safe_push (V32QImode);
25732 28693 : modes->safe_push (V16QImode);
25733 : }
25734 2088776 : else if (TARGET_AVX && all)
25735 : {
25736 24 : modes->safe_push (V16QImode);
25737 24 : modes->safe_push (V32QImode);
25738 : }
25739 2088752 : else if (TARGET_SSE2)
25740 2086487 : modes->safe_push (V16QImode);
25741 :
25742 2192778 : if (TARGET_MMX_WITH_SSE)
25743 1798127 : modes->safe_push (V8QImode);
25744 :
25745 2192778 : if (TARGET_SSE2)
25746 2190513 : modes->safe_push (V4QImode);
25747 :
25748 2192778 : return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
25749 : }
25750 :
25751 : /* Implemenation of targetm.vectorize.get_mask_mode. */
25752 :
25753 : static opt_machine_mode
25754 3076248 : ix86_get_mask_mode (machine_mode data_mode)
25755 : {
25756 3076248 : unsigned vector_size = GET_MODE_SIZE (data_mode);
25757 3076248 : unsigned nunits = GET_MODE_NUNITS (data_mode);
25758 3076248 : unsigned elem_size = vector_size / nunits;
25759 :
25760 : /* Scalar mask case. */
25761 317438 : if ((TARGET_AVX512F && vector_size == 64)
25762 2961337 : || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
25763 : /* AVX512FP16 only supports vector comparison
25764 : to kmask for _Float16. */
25765 2847579 : || (TARGET_AVX512VL && TARGET_AVX512FP16
25766 3213 : && GET_MODE_INNER (data_mode) == E_HFmode)
25767 5925433 : || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
25768 : {
25769 229294 : if (elem_size == 4
25770 229294 : || elem_size == 8
25771 103275 : || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
25772 201683 : return smallest_int_mode_for_size (nunits).require ();
25773 : }
25774 :
25775 2874565 : scalar_int_mode elem_mode
25776 2874565 : = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
25777 :
25778 2874565 : gcc_assert (elem_size * nunits == vector_size);
25779 :
25780 2874565 : return mode_for_vector (elem_mode, nunits);
25781 : }
25782 :
25783 :
25784 :
25785 : /* Return class of registers which could be used for pseudo of MODE
25786 : and of class RCLASS for spilling instead of memory. Return NO_REGS
25787 : if it is not possible or non-profitable. */
25788 :
25789 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25790 :
25791 : static reg_class_t
25792 6203686769 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
25793 : {
25794 6203686769 : if (0 && TARGET_GENERAL_REGS_SSE_SPILL
25795 : && TARGET_SSE2
25796 : && TARGET_INTER_UNIT_MOVES_TO_VEC
25797 : && TARGET_INTER_UNIT_MOVES_FROM_VEC
25798 : && (mode == SImode || (TARGET_64BIT && mode == DImode))
25799 : && INTEGER_CLASS_P (rclass))
25800 : return ALL_SSE_REGS;
25801 6203686769 : return NO_REGS;
25802 : }
25803 :
25804 : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
25805 : but returns a lower bound. */
25806 :
25807 : static unsigned int
25808 1878067 : ix86_max_noce_ifcvt_seq_cost (edge e)
25809 : {
25810 1878067 : bool predictable_p = predictable_edge_p (e);
25811 1878067 : if (predictable_p)
25812 : {
25813 145215 : if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
25814 8 : return param_max_rtl_if_conversion_predictable_cost;
25815 : }
25816 : else
25817 : {
25818 1732852 : if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
25819 73 : return param_max_rtl_if_conversion_unpredictable_cost;
25820 : }
25821 :
25822 : /* For modern machines with deeper pipeline, the penalty for branch
25823 : misprediction could be higher than before to reset the pipeline
25824 : slots. Add parameter br_mispredict_scale as a factor to describe
25825 : the impact of reseting the pipeline. */
25826 :
25827 1877986 : return BRANCH_COST (true, predictable_p)
25828 1877986 : * ix86_tune_cost->br_mispredict_scale;
25829 : }
25830 :
25831 : /* Return true if SEQ is a good candidate as a replacement for the
25832 : if-convertible sequence described in IF_INFO. */
25833 :
25834 : static bool
25835 204426 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
25836 : {
25837 204426 : if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
25838 : {
25839 : int cmov_cnt = 0;
25840 : /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
25841 : Maybe we should allow even more conditional moves as long as they
25842 : are used far enough not to stall the CPU, or also consider
25843 : IF_INFO->TEST_BB succ edge probabilities. */
25844 247 : for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
25845 : {
25846 205 : rtx set = single_set (insn);
25847 205 : if (!set)
25848 0 : continue;
25849 205 : if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
25850 163 : continue;
25851 42 : rtx src = SET_SRC (set);
25852 42 : machine_mode mode = GET_MODE (src);
25853 42 : if (GET_MODE_CLASS (mode) != MODE_INT
25854 0 : && GET_MODE_CLASS (mode) != MODE_FLOAT)
25855 0 : continue;
25856 42 : if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
25857 41 : || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
25858 1 : continue;
25859 : /* insn is CMOV or FCMOV. */
25860 41 : if (++cmov_cnt > 1)
25861 : return false;
25862 : }
25863 : }
25864 :
25865 : /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
25866 : for movdfcc/movsfcc, and could possibly fail cost comparison.
25867 : Increase branch cost will hurt performance for other modes, so
25868 : specially add some preference for floating point ifcvt. */
25869 204418 : if (!TARGET_SSE4_1 && if_info->x
25870 155403 : && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
25871 34100 : && if_info->speed_p)
25872 : {
25873 27058 : unsigned cost = seq_cost (seq, true);
25874 :
25875 27058 : if (cost <= if_info->original_cost)
25876 : return true;
25877 :
25878 25860 : return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
25879 : }
25880 :
25881 177360 : return default_noce_conversion_profitable_p (seq, if_info);
25882 : }
25883 :
25884 : /* x86-specific vector costs. */
25885 : class ix86_vector_costs : public vector_costs
25886 : {
25887 : public:
25888 : ix86_vector_costs (vec_info *, bool);
25889 :
25890 : unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
25891 : stmt_vec_info stmt_info, slp_tree node,
25892 : tree vectype, int misalign,
25893 : vect_cost_model_location where) override;
25894 : void finish_cost (const vector_costs *) override;
25895 :
25896 : private:
25897 :
25898 : /* Estimate register pressure of the vectorized code. */
25899 : void ix86_vect_estimate_reg_pressure ();
25900 : /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
25901 : estimation of register pressure.
25902 : ??? Currently it's only used by vec_construct/scalar_to_vec
25903 : where we know it's not loaded from memory. */
25904 : unsigned m_num_gpr_needed[3];
25905 : unsigned m_num_sse_needed[3];
25906 : /* Number of 256-bit vector permutation. */
25907 : unsigned m_num_avx256_vec_perm[3];
25908 : /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
25909 : unsigned m_num_reduc[X86_REDUC_LAST];
25910 : /* Don't do unroll if m_prefer_unroll is false, default is true. */
25911 : bool m_prefer_unroll;
25912 : };
25913 :
25914 1966356 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
25915 : : vector_costs (vinfo, costing_for_scalar),
25916 1966356 : m_num_gpr_needed (),
25917 1966356 : m_num_sse_needed (),
25918 1966356 : m_num_avx256_vec_perm (),
25919 1966356 : m_num_reduc (),
25920 1966356 : m_prefer_unroll (true)
25921 1966356 : {}
25922 :
25923 : /* Implement targetm.vectorize.create_costs. */
25924 :
25925 : static vector_costs *
25926 1966356 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
25927 : {
25928 1966356 : return new ix86_vector_costs (vinfo, costing_for_scalar);
25929 : }
25930 :
25931 : unsigned
25932 6656078 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
25933 : stmt_vec_info stmt_info, slp_tree node,
25934 : tree vectype, int,
25935 : vect_cost_model_location where)
25936 : {
25937 6656078 : unsigned retval = 0;
25938 6656078 : bool scalar_p
25939 : = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
25940 6656078 : int stmt_cost = - 1;
25941 :
25942 6656078 : bool fp = false;
25943 6656078 : machine_mode mode = scalar_p ? SImode : TImode;
25944 :
25945 6656078 : if (vectype != NULL)
25946 : {
25947 2977691 : fp = FLOAT_TYPE_P (vectype);
25948 2977691 : mode = TYPE_MODE (vectype);
25949 2977691 : if (scalar_p)
25950 242927 : mode = TYPE_MODE (TREE_TYPE (vectype));
25951 : }
25952 : /* When we are costing a scalar stmt use the scalar stmt to get at the
25953 : type of the operation. */
25954 3678387 : else if (scalar_p && stmt_info)
25955 3618067 : if (tree lhs = gimple_get_lhs (stmt_info->stmt))
25956 : {
25957 3445962 : fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
25958 3445962 : mode = TYPE_MODE (TREE_TYPE (lhs));
25959 : }
25960 :
25961 6656078 : if ((kind == vector_stmt || kind == scalar_stmt)
25962 1615817 : && stmt_info
25963 8265929 : && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
25964 : {
25965 1250771 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
25966 : /*machine_mode inner_mode = mode;
25967 : if (VECTOR_MODE_P (mode))
25968 : inner_mode = GET_MODE_INNER (mode);*/
25969 :
25970 1250771 : switch (subcode)
25971 : {
25972 502468 : case PLUS_EXPR:
25973 502468 : case POINTER_PLUS_EXPR:
25974 502468 : case MINUS_EXPR:
25975 502468 : if (kind == scalar_stmt)
25976 : {
25977 326475 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25978 69497 : stmt_cost = ix86_cost->addss;
25979 256978 : else if (X87_FLOAT_MODE_P (mode))
25980 128 : stmt_cost = ix86_cost->fadd;
25981 : else
25982 256850 : stmt_cost = ix86_cost->add;
25983 : }
25984 : else
25985 175993 : stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
25986 : : ix86_cost->sse_op);
25987 : break;
25988 :
25989 179475 : case MULT_EXPR:
25990 : /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
25991 : take it as MULT_EXPR. */
25992 179475 : case MULT_HIGHPART_EXPR:
25993 179475 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
25994 179475 : break;
25995 : /* There's no direct instruction for WIDEN_MULT_EXPR,
25996 : take emulation into account. */
25997 1018 : case WIDEN_MULT_EXPR:
25998 2036 : stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
25999 1018 : TYPE_UNSIGNED (vectype));
26000 1018 : break;
26001 :
26002 5990 : case NEGATE_EXPR:
26003 5990 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26004 1700 : stmt_cost = ix86_cost->sse_op;
26005 4290 : else if (X87_FLOAT_MODE_P (mode))
26006 0 : stmt_cost = ix86_cost->fchs;
26007 4290 : else if (VECTOR_MODE_P (mode))
26008 1836 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26009 : else
26010 2454 : stmt_cost = ix86_cost->add;
26011 : break;
26012 12377 : case TRUNC_DIV_EXPR:
26013 12377 : case CEIL_DIV_EXPR:
26014 12377 : case FLOOR_DIV_EXPR:
26015 12377 : case ROUND_DIV_EXPR:
26016 12377 : case TRUNC_MOD_EXPR:
26017 12377 : case CEIL_MOD_EXPR:
26018 12377 : case FLOOR_MOD_EXPR:
26019 12377 : case RDIV_EXPR:
26020 12377 : case ROUND_MOD_EXPR:
26021 12377 : case EXACT_DIV_EXPR:
26022 12377 : stmt_cost = ix86_division_cost (ix86_cost, mode);
26023 12377 : break;
26024 :
26025 54787 : case RSHIFT_EXPR:
26026 54787 : case LSHIFT_EXPR:
26027 54787 : case LROTATE_EXPR:
26028 54787 : case RROTATE_EXPR:
26029 54787 : {
26030 54787 : tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
26031 54787 : tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
26032 54787 : stmt_cost = ix86_shift_rotate_cost
26033 54787 : (ix86_cost,
26034 : (subcode == RSHIFT_EXPR
26035 31569 : && !TYPE_UNSIGNED (TREE_TYPE (op1)))
26036 : ? ASHIFTRT : LSHIFTRT, mode,
26037 54787 : TREE_CODE (op2) == INTEGER_CST,
26038 54787 : cst_and_fits_in_hwi (op2)
26039 32470 : ? int_cst_value (op2) : -1,
26040 : false, false, NULL, NULL);
26041 : }
26042 54787 : break;
26043 83216 : case NOP_EXPR:
26044 : /* Only sign-conversions are free. */
26045 83216 : if (tree_nop_conversion_p
26046 83216 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
26047 83216 : TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
26048 : stmt_cost = 0;
26049 83216 : else if (fp)
26050 6894 : stmt_cost = vec_fp_conversion_cost
26051 6894 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26052 : break;
26053 :
26054 13427 : case FLOAT_EXPR:
26055 13427 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26056 10334 : stmt_cost = ix86_cost->cvtsi2ss;
26057 3093 : else if (X87_FLOAT_MODE_P (mode))
26058 : /* TODO: We do not have cost tables for x87. */
26059 50 : stmt_cost = ix86_cost->fadd;
26060 : else
26061 3043 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26062 : break;
26063 :
26064 1706 : case FIX_TRUNC_EXPR:
26065 1706 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26066 0 : stmt_cost = ix86_cost->cvtss2si;
26067 1706 : else if (X87_FLOAT_MODE_P (mode))
26068 : /* TODO: We do not have cost tables for x87. */
26069 0 : stmt_cost = ix86_cost->fadd;
26070 : else
26071 1706 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26072 : break;
26073 :
26074 38525 : case COND_EXPR:
26075 38525 : {
26076 : /* SSE2 conditinal move sequence is:
26077 : pcmpgtd %xmm5, %xmm0 (accounted separately)
26078 : pand %xmm0, %xmm2
26079 : pandn %xmm1, %xmm0
26080 : por %xmm2, %xmm0
26081 : while SSE4 uses cmp + blend
26082 : and AVX512 masked moves.
26083 :
26084 : The condition is accounted separately since we usually have
26085 : p = a < b
26086 : c = p ? x : y
26087 : and we will account first statement as setcc. Exception is when
26088 : p is loaded from memory as bool and then we will not acocunt
26089 : the compare, but there is no way to check for this. */
26090 :
26091 38525 : int ninsns = TARGET_SSE4_1 ? 1 : 3;
26092 :
26093 : /* If one of parameters is 0 or -1 the sequence will be simplified:
26094 : (if_true & mask) | (if_false & ~mask) -> if_true & mask */
26095 19913 : if (ninsns > 1
26096 19913 : && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26097 19587 : || zerop (gimple_assign_rhs3 (stmt_info->stmt))
26098 11513 : || integer_minus_onep
26099 11513 : (gimple_assign_rhs2 (stmt_info->stmt))
26100 11087 : || integer_minus_onep
26101 11087 : (gimple_assign_rhs3 (stmt_info->stmt))))
26102 : ninsns = 1;
26103 :
26104 38525 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26105 2794 : stmt_cost = ninsns * ix86_cost->sse_op;
26106 35731 : else if (X87_FLOAT_MODE_P (mode))
26107 : /* x87 requires conditional branch. We don't have cost for
26108 : that. */
26109 : ;
26110 35722 : else if (VECTOR_MODE_P (mode))
26111 14725 : stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
26112 : else
26113 : /* compare (accounted separately) + cmov. */
26114 20997 : stmt_cost = ix86_cost->add;
26115 : }
26116 : break;
26117 :
26118 22115 : case MIN_EXPR:
26119 22115 : case MAX_EXPR:
26120 22115 : if (fp)
26121 : {
26122 1008 : if (X87_FLOAT_MODE_P (mode)
26123 384 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26124 : /* x87 requires conditional branch. We don't have cost for
26125 : that. */
26126 : ;
26127 : else
26128 : /* minss */
26129 1008 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26130 : }
26131 : else
26132 : {
26133 21107 : if (VECTOR_MODE_P (mode))
26134 : {
26135 4069 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26136 : /* vpmin was introduced in SSE3.
26137 : SSE2 needs pcmpgtd + pand + pandn + pxor.
26138 : If one of parameters is 0 or -1 the sequence is simplified
26139 : to pcmpgtd + pand. */
26140 4069 : if (!TARGET_SSSE3)
26141 : {
26142 3100 : if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26143 4434 : || integer_minus_onep
26144 1334 : (gimple_assign_rhs2 (stmt_info->stmt)))
26145 1766 : stmt_cost *= 2;
26146 : else
26147 1334 : stmt_cost *= 4;
26148 : }
26149 : }
26150 : else
26151 : /* cmp + cmov. */
26152 17038 : stmt_cost = ix86_cost->add * 2;
26153 : }
26154 : break;
26155 :
26156 940 : case ABS_EXPR:
26157 940 : case ABSU_EXPR:
26158 940 : if (fp)
26159 : {
26160 374 : if (X87_FLOAT_MODE_P (mode)
26161 150 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26162 : /* fabs. */
26163 0 : stmt_cost = ix86_cost->fabs;
26164 : else
26165 : /* andss of sign bit. */
26166 374 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26167 : }
26168 : else
26169 : {
26170 566 : if (VECTOR_MODE_P (mode))
26171 : {
26172 99 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26173 : /* vabs was introduced in SSE3.
26174 : SSE3 uses psrat + pxor + psub. */
26175 99 : if (!TARGET_SSSE3)
26176 75 : stmt_cost *= 3;
26177 : }
26178 : else
26179 : /* neg + cmov. */
26180 467 : stmt_cost = ix86_cost->add * 2;
26181 : }
26182 : break;
26183 :
26184 107774 : case BIT_IOR_EXPR:
26185 107774 : case BIT_XOR_EXPR:
26186 107774 : case BIT_AND_EXPR:
26187 107774 : case BIT_NOT_EXPR:
26188 107774 : gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
26189 : && !X87_FLOAT_MODE_P (mode));
26190 107774 : if (VECTOR_MODE_P (mode))
26191 35338 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26192 : else
26193 72436 : stmt_cost = ix86_cost->add;
26194 : break;
26195 :
26196 226953 : default:
26197 226953 : if (truth_value_p (subcode))
26198 : {
26199 73405 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26200 : /* CMPccS? insructions are cheap, so use sse_op. While they
26201 : produce a mask which may need to be turned to 0/1 by and,
26202 : expect that this will be optimized away in a common case. */
26203 0 : stmt_cost = ix86_cost->sse_op;
26204 73405 : else if (X87_FLOAT_MODE_P (mode))
26205 : /* fcmp + setcc. */
26206 0 : stmt_cost = ix86_cost->fadd + ix86_cost->add;
26207 73405 : else if (VECTOR_MODE_P (mode))
26208 14871 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26209 : else
26210 : /* setcc. */
26211 58534 : stmt_cost = ix86_cost->add;
26212 : break;
26213 : }
26214 : break;
26215 : }
26216 : }
26217 :
26218 : /* Record number of load/store/gather/scatter in vectorized body. */
26219 6656078 : if (where == vect_body && !m_costing_for_scalar)
26220 : {
26221 1703034 : int scale = 1;
26222 1703034 : if (vectype
26223 3397499 : && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
26224 59575 : && TARGET_AVX512_SPLIT_REGS)
26225 3388806 : || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26226 93449 : && TARGET_AVX256_SPLIT_REGS)))
26227 : scale = 2;
26228 :
26229 1703034 : switch (kind)
26230 : {
26231 : /* Emulated gather/scatter or any scalarization. */
26232 114230 : case scalar_load:
26233 114230 : case scalar_stmt:
26234 114230 : case scalar_store:
26235 114230 : case vector_gather_load:
26236 114230 : case vector_scatter_store:
26237 114230 : m_prefer_unroll = false;
26238 114230 : break;
26239 :
26240 474357 : case vector_stmt:
26241 474357 : case vec_to_scalar:
26242 : /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
26243 : unroll in the vectorizer will enable partial sum. */
26244 474357 : if (stmt_info
26245 474335 : && vect_is_reduction (stmt_info)
26246 521950 : && stmt_info->stmt)
26247 : {
26248 : /* Handle __builtin_fma. */
26249 47593 : if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
26250 : {
26251 6 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26252 6 : break;
26253 : }
26254 :
26255 47587 : if (!is_gimple_assign (stmt_info->stmt))
26256 : break;
26257 :
26258 45209 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26259 45209 : machine_mode inner_mode = GET_MODE_INNER (mode);
26260 45209 : tree rhs1, rhs2;
26261 45209 : bool native_vnni_p = true;
26262 45209 : gimple* def;
26263 45209 : machine_mode mode_rhs;
26264 45209 : switch (subcode)
26265 : {
26266 35233 : case PLUS_EXPR:
26267 35233 : case MINUS_EXPR:
26268 35233 : if (!fp || !flag_associative_math
26269 15940 : || flag_fp_contract_mode != FP_CONTRACT_FAST)
26270 : break;
26271 :
26272 : /* FMA condition for different modes. */
26273 15940 : if (((inner_mode == DFmode || inner_mode == SFmode)
26274 15928 : && !TARGET_FMA && !TARGET_AVX512VL)
26275 5776 : || (inner_mode == HFmode && !TARGET_AVX512FP16)
26276 5776 : || (inner_mode == BFmode && !TARGET_AVX10_2))
26277 : break;
26278 :
26279 : /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
26280 : to FMA/FNMA after vectorization. */
26281 5776 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26282 5776 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26283 5776 : if (subcode == PLUS_EXPR
26284 4538 : && TREE_CODE (rhs1) == SSA_NAME
26285 4538 : && (def = SSA_NAME_DEF_STMT (rhs1), true)
26286 4538 : && is_gimple_assign (def)
26287 8106 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26288 1402 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26289 4374 : else if (TREE_CODE (rhs2) == SSA_NAME
26290 4374 : && (def = SSA_NAME_DEF_STMT (rhs2), true)
26291 4374 : && is_gimple_assign (def)
26292 8716 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26293 4338 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26294 : break;
26295 :
26296 : /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
26297 : WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
26298 : SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
26299 374 : case DOT_PROD_EXPR:
26300 374 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26301 374 : mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
26302 374 : if (mode_rhs == QImode)
26303 : {
26304 211 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26305 211 : signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
26306 211 : signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
26307 :
26308 : /* vpdpbusd. */
26309 211 : if (signop1_p != signop2_p)
26310 53 : native_vnni_p
26311 53 : = (GET_MODE_SIZE (mode) == 64
26312 53 : ? TARGET_AVX512VNNI
26313 10 : : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
26314 53 : || TARGET_AVXVNNI));
26315 : else
26316 : /* vpdpbssd. */
26317 158 : native_vnni_p
26318 174 : = (GET_MODE_SIZE (mode) == 64
26319 158 : ? TARGET_AVX10_2
26320 142 : : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
26321 : }
26322 374 : m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
26323 :
26324 : /* Dislike to do unroll and partial sum for
26325 : emulated DOT_PROD_EXPR. */
26326 374 : if (!native_vnni_p)
26327 128 : m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
26328 : break;
26329 :
26330 80 : case SAD_EXPR:
26331 80 : m_num_reduc[X86_REDUC_SAD] += count * scale;
26332 80 : break;
26333 :
26334 : default:
26335 : break;
26336 : }
26337 : }
26338 :
26339 : default:
26340 : break;
26341 : }
26342 : }
26343 :
26344 :
26345 6656078 : combined_fn cfn;
26346 6656078 : if ((kind == vector_stmt || kind == scalar_stmt)
26347 1615817 : && stmt_info
26348 1609851 : && stmt_info->stmt
26349 8265929 : && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
26350 17524 : switch (cfn)
26351 : {
26352 63 : case CFN_FMA:
26353 63 : stmt_cost = ix86_vec_cost (mode,
26354 63 : mode == SFmode ? ix86_cost->fmass
26355 : : ix86_cost->fmasd);
26356 63 : break;
26357 24 : case CFN_MULH:
26358 24 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26359 24 : break;
26360 : default:
26361 : break;
26362 : }
26363 :
26364 6656078 : if (kind == vec_promote_demote)
26365 : {
26366 45080 : int outer_size
26367 : = tree_to_uhwi
26368 45080 : (TYPE_SIZE
26369 45080 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
26370 45080 : int inner_size
26371 : = tree_to_uhwi
26372 45080 : (TYPE_SIZE
26373 45080 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
26374 45080 : bool inner_fp = FLOAT_TYPE_P
26375 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
26376 :
26377 3831 : if (fp && inner_fp)
26378 3431 : stmt_cost = vec_fp_conversion_cost
26379 3431 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26380 41649 : else if (fp && !inner_fp)
26381 4106 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26382 37543 : else if (!fp && inner_fp)
26383 400 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26384 : else
26385 37143 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26386 : /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
26387 : greater than inner size we will end up doing two conversions and
26388 : packing them. We always pack pairs; if the size difference is greater
26389 : it is split into multiple demote operations. */
26390 45080 : if (inner_size > outer_size)
26391 17470 : stmt_cost = stmt_cost * 2
26392 17470 : + ix86_vec_cost (mode, ix86_cost->sse_op);
26393 : }
26394 :
26395 : /* If we do elementwise loads into a vector then we are bound by
26396 : latency and execution resources for the many scalar loads
26397 : (AGU and load ports). Try to account for this by scaling the
26398 : construction cost by the number of elements involved. */
26399 6656078 : if ((kind == vec_construct || kind == vec_to_scalar)
26400 6656078 : && ((node
26401 424883 : && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26402 436480 : || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
26403 36111 : && SLP_TREE_LANES (node) == 1))
26404 40984 : && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26405 : (SLP_TREE_REPRESENTATIVE (node))))
26406 : != INTEGER_CST))
26407 69296 : || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
26408 : {
26409 30692 : stmt_cost = ix86_default_vector_cost (kind, mode);
26410 30692 : stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
26411 : }
26412 6625386 : else if ((kind == vec_construct || kind == scalar_to_vec)
26413 445424 : && node
26414 415204 : && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26415 : {
26416 303058 : stmt_cost = ix86_default_vector_cost (kind, mode);
26417 303058 : unsigned i;
26418 303058 : tree op;
26419 1295105 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26420 688989 : if (TREE_CODE (op) == SSA_NAME)
26421 466416 : TREE_VISITED (op) = 0;
26422 992047 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26423 : {
26424 688989 : if (TREE_CODE (op) != SSA_NAME
26425 466416 : || TREE_VISITED (op))
26426 255956 : continue;
26427 433033 : TREE_VISITED (op) = 1;
26428 433033 : gimple *def = SSA_NAME_DEF_STMT (op);
26429 433033 : tree tem;
26430 : /* Look through a conversion. */
26431 433033 : if (is_gimple_assign (def)
26432 246627 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26433 26995 : && ((tem = gimple_assign_rhs1 (def)), true)
26434 460028 : && TREE_CODE (tem) == SSA_NAME)
26435 26784 : def = SSA_NAME_DEF_STMT (tem);
26436 : /* When the component is loaded from memory without sign-
26437 : or zero-extension we can move it to a vector register and/or
26438 : insert it via vpinsr with a memory operand. */
26439 433033 : if (gimple_assign_load_p (def)
26440 130193 : && tree_nop_conversion_p (TREE_TYPE (op),
26441 130193 : TREE_TYPE (gimple_assign_lhs (def)))
26442 687409 : && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
26443 5162 : || TARGET_SSE4_1))
26444 : ;
26445 : /* When the component is extracted from a vector it is already
26446 : in a vector register. */
26447 310010 : else if (is_gimple_assign (def)
26448 119435 : && gimple_assign_rhs_code (def) == BIT_FIELD_REF
26449 312744 : && VECTOR_TYPE_P (TREE_TYPE
26450 : (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
26451 : ;
26452 : else
26453 : {
26454 307691 : if (fp)
26455 : {
26456 : /* Scalar FP values residing in x87 registers need to be
26457 : spilled and reloaded. */
26458 13430 : auto mode2 = TYPE_MODE (TREE_TYPE (op));
26459 13430 : if (IS_STACK_MODE (mode2))
26460 : {
26461 971 : int cost
26462 : = (ix86_cost->hard_register.fp_store[mode2 == SFmode
26463 971 : ? 0 : 1]
26464 971 : + ix86_cost->sse_load[sse_store_index (mode2)]);
26465 971 : stmt_cost += COSTS_N_INSNS (cost) / 2;
26466 : }
26467 13430 : m_num_sse_needed[where]++;
26468 : }
26469 : else
26470 : {
26471 294261 : m_num_gpr_needed[where]++;
26472 :
26473 294261 : stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26474 : }
26475 : }
26476 : }
26477 992047 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26478 688989 : if (TREE_CODE (op) == SSA_NAME)
26479 466416 : TREE_VISITED (op) = 0;
26480 : }
26481 6656078 : if (stmt_cost == -1)
26482 5256269 : stmt_cost = ix86_default_vector_cost (kind, mode);
26483 :
26484 6656078 : if (kind == vec_perm && vectype
26485 177956 : && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26486 : /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body. */
26487 6659551 : && count != 0)
26488 : {
26489 3473 : bool real_perm = true;
26490 3473 : unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
26491 :
26492 3473 : if (node
26493 3470 : && SLP_TREE_LOAD_PERMUTATION (node).exists ()
26494 : /* Loop vectorization will have 4 times vec_perm
26495 : with index as {0, 0, 0, 0}.
26496 : But it actually generates
26497 : vec_perm_expr <vect, vect, 0, 0, 0, 0>
26498 : vec_perm_expr <vect, vect, 1, 1, 1, 1>
26499 : vec_perm_expr <vect, vect, 2, 2, 2, 2>
26500 : Need to be handled separately. */
26501 6298 : && is_a <bb_vec_info> (m_vinfo))
26502 : {
26503 39 : unsigned half = nunits / 2;
26504 39 : unsigned i = 0;
26505 39 : bool allsame = true;
26506 39 : unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
26507 39 : bool cross_lane_p = false;
26508 198 : for (i = 0 ; i != SLP_TREE_LANES (node); i++)
26509 : {
26510 197 : unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
26511 : /* allsame is just a broadcast. */
26512 197 : if (tmp != first)
26513 92 : allsame = false;
26514 :
26515 : /* 4 times vec_perm with number of lanes multiple of nunits. */
26516 197 : tmp = tmp & (nunits - 1);
26517 197 : unsigned index = i & (nunits - 1);
26518 197 : if ((index < half && tmp >= half)
26519 197 : || (index >= half && tmp < half))
26520 65 : cross_lane_p = true;
26521 :
26522 197 : if (!allsame && cross_lane_p)
26523 : break;
26524 : }
26525 :
26526 39 : if (i == SLP_TREE_LANES (node))
26527 : real_perm = false;
26528 : }
26529 :
26530 : if (real_perm)
26531 : {
26532 3472 : m_num_avx256_vec_perm[where] += count;
26533 3472 : if (dump_file && (dump_flags & TDF_DETAILS))
26534 : {
26535 228 : fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
26536 228 : if (stmt_info)
26537 225 : print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
26538 228 : fprintf (dump_file, " \n");
26539 : }
26540 : }
26541 : }
26542 :
26543 : /* Penalize DFmode vector operations for Bonnell. */
26544 6656078 : if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26545 6656140 : && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26546 12 : stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26547 :
26548 : /* Statements in an inner loop relative to the loop being
26549 : vectorized are weighted more heavily. The value here is
26550 : arbitrary and could potentially be improved with analysis. */
26551 6656078 : retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26552 :
26553 : /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26554 : for Silvermont as it has out of order integer pipeline and can execute
26555 : 2 scalar instruction per tick, but has in order SIMD pipeline. */
26556 6656078 : if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26557 6656078 : || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26558 1811 : && stmt_info && stmt_info->stmt)
26559 : {
26560 1595 : tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26561 1595 : if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26562 1198 : retval = (retval * 17) / 10;
26563 : }
26564 :
26565 6656078 : m_costs[where] += retval;
26566 :
26567 6656078 : return retval;
26568 : }
26569 :
26570 : void
26571 1690129 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26572 : {
26573 1690129 : unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26574 1690129 : unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26575 :
26576 : /* Any better way to have target available fp registers, currently use SSE_REGS. */
26577 1690129 : unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26578 6760516 : for (unsigned i = 0; i != 3; i++)
26579 : {
26580 5070387 : if (m_num_gpr_needed[i] > target_avail_regs)
26581 691 : m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26582 : /* Only measure sse registers pressure. */
26583 5070387 : if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26584 92 : m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26585 : }
26586 1690129 : }
26587 :
26588 : void
26589 1690129 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26590 : {
26591 1690129 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
26592 379885 : if (loop_vinfo && !m_costing_for_scalar)
26593 : {
26594 : /* We are currently not asking the vectorizer to compare costs
26595 : between different vector mode sizes. When using predication
26596 : that will end up always choosing the prefered mode size even
26597 : if there's a smaller mode covering all lanes. Test for this
26598 : situation and artificially reject the larger mode attempt.
26599 : ??? We currently lack masked ops for sub-SSE sized modes,
26600 : so we could restrict this rejection to AVX and AVX512 modes
26601 : but error on the safe side for now. */
26602 82416 : if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26603 22 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26604 15 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26605 82426 : && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26606 20 : > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26607 8 : m_costs[vect_body] = INT_MAX;
26608 :
26609 : /* We'd like to avoid using masking if there's an in-order reduction
26610 : to vectorize because that will also perform in-order adds of
26611 : masked elements (as neutral value, of course) here, but there
26612 : is currently no way to indicate to try un-masked with the same
26613 : mode. */
26614 :
26615 82416 : bool any_reduc_p = false;
26616 327520 : for (int i = 0; i != X86_REDUC_LAST; i++)
26617 245881 : if (m_num_reduc[i])
26618 : {
26619 : any_reduc_p = true;
26620 : break;
26621 : }
26622 :
26623 82416 : if (any_reduc_p
26624 : /* Not much gain for loop with gather and scatter. */
26625 777 : && m_prefer_unroll
26626 627 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
26627 : {
26628 956 : unsigned unroll_factor
26629 478 : = OPTION_SET_P (ix86_vect_unroll_limit)
26630 478 : ? ix86_vect_unroll_limit
26631 478 : : ix86_cost->vect_unroll_limit;
26632 :
26633 478 : if (unroll_factor > 1)
26634 : {
26635 1912 : for (int i = 0 ; i != X86_REDUC_LAST; i++)
26636 : {
26637 1434 : if (m_num_reduc[i])
26638 : {
26639 478 : unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
26640 : m_num_reduc[i]);
26641 1434 : unroll_factor = MIN (unroll_factor, tmp);
26642 : }
26643 : }
26644 :
26645 956 : m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
26646 : }
26647 : }
26648 :
26649 : }
26650 :
26651 1690129 : ix86_vect_estimate_reg_pressure ();
26652 :
26653 6760516 : for (int i = 0; i != 3; i++)
26654 5070387 : if (m_num_avx256_vec_perm[i]
26655 444 : && TARGET_AVX256_AVOID_VEC_PERM)
26656 7 : m_costs[i] = INT_MAX;
26657 :
26658 : /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26659 : a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26660 1690129 : if (loop_vinfo
26661 379885 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26662 46878 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26663 1690953 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26664 14 : m_suggested_epilogue_mode = V16QImode;
26665 : /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26666 : enable a 64bit SSE epilogue. */
26667 1690129 : if (loop_vinfo
26668 379885 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26669 46878 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26670 1692603 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26671 91 : m_suggested_epilogue_mode = V8QImode;
26672 :
26673 : /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
26674 : a masked epilogue if that doesn't seem detrimental. */
26675 1690129 : if (loop_vinfo
26676 379885 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26677 356446 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
26678 : /* Avoid a masked epilog if cascaded epilogues eventually get us
26679 : to one with VF 1 as that means no scalar epilog at all. */
26680 52197 : && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
26681 52197 : / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
26682 35 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26683 52196 : && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
26684 1690219 : && !OPTION_SET_P (param_vect_partial_vector_usage))
26685 : {
26686 84 : bool avoid = false;
26687 84 : if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26688 68 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
26689 : {
26690 68 : unsigned int peel_niter
26691 : = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
26692 68 : if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
26693 0 : peel_niter += 1;
26694 : /* When we know the number of scalar iterations of the epilogue,
26695 : avoid masking when a single vector epilog iteration handles
26696 : it in full. */
26697 68 : if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
26698 68 : % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
26699 : avoid = true;
26700 : }
26701 83 : if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
26702 7 : for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
26703 : {
26704 2 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
26705 : ;
26706 2 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
26707 : ;
26708 : else
26709 : {
26710 1 : int loop_depth
26711 2 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
26712 1 : DDR_LOOP_NEST (ddr));
26713 2 : if (DDR_NUM_DIST_VECTS (ddr) == 1
26714 1 : && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
26715 : {
26716 : /* Avoid the case when there's an outer loop that might
26717 : traverse a multi-dimensional array with the inner
26718 : loop just executing the masked epilogue with a
26719 : read-write where the next outer iteration might
26720 : read from the masked part of the previous write,
26721 : 'n' filling half a vector.
26722 : for (j = 0; j < m; ++j)
26723 : for (i = 0; i < n; ++i)
26724 : a[j][i] = c * a[j][i]; */
26725 : avoid = true;
26726 : break;
26727 : }
26728 : }
26729 : }
26730 : /* Avoid using masking if there's an in-order reduction
26731 : to vectorize because that will also perform in-order adds of
26732 : masked elements (as neutral value, of course). */
26733 84 : if (!avoid)
26734 : {
26735 331 : for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
26736 86 : if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
26737 86 : && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
26738 : == FOLD_LEFT_REDUCTION))
26739 : {
26740 : avoid = true;
26741 : break;
26742 : }
26743 : }
26744 82 : if (!avoid)
26745 : {
26746 81 : m_suggested_epilogue_mode = loop_vinfo->vector_mode;
26747 81 : m_masked_epilogue = 1;
26748 : }
26749 : }
26750 :
26751 1690129 : vector_costs::finish_cost (scalar_costs);
26752 1690129 : }
26753 :
26754 : /* Validate target specific memory model bits in VAL. */
26755 :
26756 : static unsigned HOST_WIDE_INT
26757 407670 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
26758 : {
26759 407670 : enum memmodel model = memmodel_from_int (val);
26760 407670 : bool strong;
26761 :
26762 407670 : if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
26763 : |MEMMODEL_MASK)
26764 407666 : || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
26765 : {
26766 4 : warning (OPT_Winvalid_memory_model,
26767 : "unknown architecture specific memory model");
26768 4 : return MEMMODEL_SEQ_CST;
26769 : }
26770 407666 : strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
26771 407666 : if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
26772 : {
26773 0 : warning (OPT_Winvalid_memory_model,
26774 : "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
26775 : "memory model");
26776 0 : return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
26777 : }
26778 407666 : if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
26779 : {
26780 0 : warning (OPT_Winvalid_memory_model,
26781 : "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
26782 : "memory model");
26783 0 : return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
26784 : }
26785 : return val;
26786 : }
26787 :
26788 : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
26789 : CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
26790 : CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
26791 : or number of vecsize_mangle variants that should be emitted. */
26792 :
26793 : static int
26794 7589 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
26795 : struct cgraph_simd_clone *clonei,
26796 : tree base_type, int num,
26797 : bool explicit_p)
26798 : {
26799 7589 : int ret = 1;
26800 :
26801 7589 : if (clonei->simdlen
26802 7589 : && (clonei->simdlen < 2
26803 1321 : || clonei->simdlen > 1024
26804 1321 : || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
26805 : {
26806 0 : if (explicit_p)
26807 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26808 : "unsupported simdlen %wd", clonei->simdlen.to_constant ());
26809 0 : return 0;
26810 : }
26811 :
26812 7589 : tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
26813 7589 : if (TREE_CODE (ret_type) != VOID_TYPE)
26814 6797 : switch (TYPE_MODE (ret_type))
26815 : {
26816 6797 : case E_QImode:
26817 6797 : case E_HImode:
26818 6797 : case E_SImode:
26819 6797 : case E_DImode:
26820 6797 : case E_SFmode:
26821 6797 : case E_DFmode:
26822 : /* case E_SCmode: */
26823 : /* case E_DCmode: */
26824 6797 : if (!AGGREGATE_TYPE_P (ret_type))
26825 : break;
26826 : /* FALLTHRU */
26827 2 : default:
26828 2 : if (explicit_p)
26829 2 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26830 : "unsupported return type %qT for simd", ret_type);
26831 2 : return 0;
26832 : }
26833 :
26834 7587 : tree t;
26835 7587 : int i;
26836 7587 : tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
26837 7587 : bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
26838 :
26839 7587 : for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
26840 20430 : t && t != void_list_node; t = TREE_CHAIN (t), i++)
26841 : {
26842 16670 : tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
26843 12848 : switch (TYPE_MODE (arg_type))
26844 : {
26845 12829 : case E_QImode:
26846 12829 : case E_HImode:
26847 12829 : case E_SImode:
26848 12829 : case E_DImode:
26849 12829 : case E_SFmode:
26850 12829 : case E_DFmode:
26851 : /* case E_SCmode: */
26852 : /* case E_DCmode: */
26853 12829 : if (!AGGREGATE_TYPE_P (arg_type))
26854 : break;
26855 : /* FALLTHRU */
26856 41 : default:
26857 41 : if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
26858 : break;
26859 5 : if (explicit_p)
26860 5 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26861 : "unsupported argument type %qT for simd", arg_type);
26862 : return 0;
26863 : }
26864 : }
26865 :
26866 7582 : if (!TREE_PUBLIC (node->decl) || !explicit_p)
26867 : {
26868 : /* If the function isn't exported, we can pick up just one ISA
26869 : for the clones. */
26870 114 : if (TARGET_AVX512F)
26871 0 : clonei->vecsize_mangle = 'e';
26872 114 : else if (TARGET_AVX2)
26873 1 : clonei->vecsize_mangle = 'd';
26874 113 : else if (TARGET_AVX)
26875 88 : clonei->vecsize_mangle = 'c';
26876 : else
26877 25 : clonei->vecsize_mangle = 'b';
26878 : ret = 1;
26879 : }
26880 : else
26881 : {
26882 7468 : clonei->vecsize_mangle = "bcde"[num];
26883 7468 : ret = 4;
26884 : }
26885 7582 : clonei->mask_mode = VOIDmode;
26886 7582 : switch (clonei->vecsize_mangle)
26887 : {
26888 1892 : case 'b':
26889 1892 : clonei->vecsize_int = 128;
26890 1892 : clonei->vecsize_float = 128;
26891 1892 : break;
26892 1955 : case 'c':
26893 1955 : clonei->vecsize_int = 128;
26894 1955 : clonei->vecsize_float = 256;
26895 1955 : break;
26896 1868 : case 'd':
26897 1868 : clonei->vecsize_int = 256;
26898 1868 : clonei->vecsize_float = 256;
26899 1868 : break;
26900 1867 : case 'e':
26901 1867 : clonei->vecsize_int = 512;
26902 1867 : clonei->vecsize_float = 512;
26903 1867 : if (TYPE_MODE (base_type) == QImode)
26904 19 : clonei->mask_mode = DImode;
26905 : else
26906 1848 : clonei->mask_mode = SImode;
26907 : break;
26908 : }
26909 7582 : if (clonei->simdlen == 0)
26910 : {
26911 6261 : if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
26912 3297 : clonei->simdlen = clonei->vecsize_int;
26913 : else
26914 2964 : clonei->simdlen = clonei->vecsize_float;
26915 6261 : clonei->simdlen = clonei->simdlen
26916 12522 : / GET_MODE_BITSIZE (TYPE_MODE (base_type));
26917 : }
26918 1321 : else if (clonei->simdlen > 16)
26919 : {
26920 : /* For compatibility with ICC, use the same upper bounds
26921 : for simdlen. In particular, for CTYPE below, use the return type,
26922 : unless the function returns void, in that case use the characteristic
26923 : type. If it is possible for given SIMDLEN to pass CTYPE value
26924 : in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
26925 : for 64-bit code), accept that SIMDLEN, otherwise warn and don't
26926 : emit corresponding clone. */
26927 12 : tree ctype = ret_type;
26928 12 : if (VOID_TYPE_P (ret_type))
26929 0 : ctype = base_type;
26930 24 : int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
26931 12 : if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
26932 8 : cnt /= clonei->vecsize_int;
26933 : else
26934 4 : cnt /= clonei->vecsize_float;
26935 12 : if (cnt > (TARGET_64BIT ? 16 : 8))
26936 : {
26937 0 : if (explicit_p)
26938 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26939 : "unsupported simdlen %wd",
26940 : clonei->simdlen.to_constant ());
26941 0 : return 0;
26942 : }
26943 : }
26944 : return ret;
26945 : }
26946 :
26947 : /* If SIMD clone NODE can't be used in a vectorized loop
26948 : in current function, return -1, otherwise return a badness of using it
26949 : (0 if it is most desirable from vecsize_mangle point of view, 1
26950 : slightly less desirable, etc.). */
26951 :
26952 : static int
26953 1768 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
26954 : {
26955 1768 : switch (node->simdclone->vecsize_mangle)
26956 : {
26957 621 : case 'b':
26958 621 : if (!TARGET_SSE2)
26959 : return -1;
26960 621 : if (!TARGET_AVX)
26961 : return 0;
26962 520 : return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
26963 627 : case 'c':
26964 627 : if (!TARGET_AVX)
26965 : return -1;
26966 582 : return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
26967 332 : case 'd':
26968 332 : if (!TARGET_AVX2)
26969 : return -1;
26970 139 : return TARGET_AVX512F ? 1 : 0;
26971 188 : case 'e':
26972 188 : if (!TARGET_AVX512F)
26973 130 : return -1;
26974 : return 0;
26975 0 : default:
26976 0 : gcc_unreachable ();
26977 : }
26978 : }
26979 :
26980 : /* This function adjusts the unroll factor based on
26981 : the hardware capabilities. For ex, bdver3 has
26982 : a loop buffer which makes unrolling of smaller
26983 : loops less important. This function decides the
26984 : unroll factor using number of memory references
26985 : (value 32 is used) as a heuristic. */
26986 :
26987 : static unsigned
26988 812975 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
26989 : {
26990 812975 : basic_block *bbs;
26991 812975 : rtx_insn *insn;
26992 812975 : unsigned i;
26993 812975 : unsigned mem_count = 0;
26994 :
26995 : /* Unroll small size loop when unroll factor is not explicitly
26996 : specified. */
26997 812975 : if (ix86_unroll_only_small_loops && !loop->unroll)
26998 : {
26999 769757 : if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
27000 70153 : return MIN (nunroll, ix86_cost->small_unroll_factor);
27001 : else
27002 : return 1;
27003 : }
27004 :
27005 43218 : if (!TARGET_ADJUST_UNROLL)
27006 : return nunroll;
27007 :
27008 : /* Count the number of memory references within the loop body.
27009 : This value determines the unrolling factor for bdver3 and bdver4
27010 : architectures. */
27011 7 : subrtx_iterator::array_type array;
27012 7 : bbs = get_loop_body (loop);
27013 21 : for (i = 0; i < loop->num_nodes; i++)
27014 102 : FOR_BB_INSNS (bbs[i], insn)
27015 88 : if (NONDEBUG_INSN_P (insn))
27016 464 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
27017 404 : if (const_rtx x = *iter)
27018 404 : if (MEM_P (x))
27019 : {
27020 25 : machine_mode mode = GET_MODE (x);
27021 50 : unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27022 25 : if (n_words > 4)
27023 0 : mem_count += 2;
27024 : else
27025 25 : mem_count += 1;
27026 : }
27027 7 : free (bbs);
27028 :
27029 7 : if (mem_count && mem_count <=32)
27030 7 : return MIN (nunroll, 32 / mem_count);
27031 :
27032 : return nunroll;
27033 7 : }
27034 :
27035 :
27036 : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
27037 :
27038 : static bool
27039 415274 : ix86_float_exceptions_rounding_supported_p (void)
27040 : {
27041 : /* For x87 floating point with standard excess precision handling,
27042 : there is no adddf3 pattern (since x87 floating point only has
27043 : XFmode operations) so the default hook implementation gets this
27044 : wrong. */
27045 415274 : return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
27046 : }
27047 :
27048 : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
27049 :
27050 : static void
27051 7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27052 : {
27053 7054 : if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
27054 : return;
27055 7054 : tree exceptions_var = create_tmp_var_raw (integer_type_node);
27056 7054 : if (TARGET_80387)
27057 : {
27058 7054 : tree fenv_index_type = build_index_type (size_int (6));
27059 7054 : tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
27060 7054 : tree fenv_var = create_tmp_var_raw (fenv_type);
27061 7054 : TREE_ADDRESSABLE (fenv_var) = 1;
27062 7054 : tree fenv_ptr = build_pointer_type (fenv_type);
27063 7054 : tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
27064 7054 : fenv_addr = fold_convert (ptr_type_node, fenv_addr);
27065 7054 : tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
27066 7054 : tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
27067 7054 : tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
27068 7054 : tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
27069 7054 : tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
27070 7054 : tree hold_fnclex = build_call_expr (fnclex, 0);
27071 7054 : fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
27072 : NULL_TREE, NULL_TREE);
27073 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
27074 : hold_fnclex);
27075 7054 : *clear = build_call_expr (fnclex, 0);
27076 7054 : tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
27077 7054 : tree fnstsw_call = build_call_expr (fnstsw, 0);
27078 7054 : tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
27079 : fnstsw_call, NULL_TREE, NULL_TREE);
27080 7054 : tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
27081 7054 : tree update_mod = build4 (TARGET_EXPR, integer_type_node,
27082 : exceptions_var, exceptions_x87,
27083 : NULL_TREE, NULL_TREE);
27084 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node,
27085 : sw_mod, update_mod);
27086 7054 : tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
27087 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
27088 : }
27089 7054 : if (TARGET_SSE && TARGET_SSE_MATH)
27090 : {
27091 7054 : tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
27092 7054 : tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
27093 7054 : tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
27094 7054 : tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
27095 7054 : tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
27096 7054 : tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
27097 : mxcsr_orig_var, stmxcsr_hold_call,
27098 : NULL_TREE, NULL_TREE);
27099 7054 : tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
27100 : mxcsr_orig_var,
27101 : build_int_cst (unsigned_type_node, 0x1f80));
27102 7054 : hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
27103 : build_int_cst (unsigned_type_node, 0xffffffc0));
27104 7054 : tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
27105 : mxcsr_mod_var, hold_mod_val,
27106 : NULL_TREE, NULL_TREE);
27107 7054 : tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27108 7054 : tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
27109 : hold_assign_orig, hold_assign_mod);
27110 7054 : hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
27111 : ldmxcsr_hold_call);
27112 7054 : if (*hold)
27113 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
27114 : else
27115 0 : *hold = hold_all;
27116 7054 : tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27117 7054 : if (*clear)
27118 7054 : *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
27119 : ldmxcsr_clear_call);
27120 : else
27121 0 : *clear = ldmxcsr_clear_call;
27122 7054 : tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
27123 7054 : tree exceptions_sse = fold_convert (integer_type_node,
27124 : stxmcsr_update_call);
27125 7054 : if (*update)
27126 : {
27127 7054 : tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
27128 : exceptions_var, exceptions_sse);
27129 7054 : tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
27130 : exceptions_var, exceptions_mod);
27131 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
27132 : exceptions_assign);
27133 : }
27134 : else
27135 0 : *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
27136 : exceptions_sse, NULL_TREE, NULL_TREE);
27137 7054 : tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
27138 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27139 : ldmxcsr_update_call);
27140 : }
27141 7054 : tree atomic_feraiseexcept
27142 7054 : = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
27143 7054 : tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
27144 : 1, exceptions_var);
27145 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27146 : atomic_feraiseexcept_call);
27147 : }
27148 :
27149 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27150 : /* For i386, common symbol is local only for non-PIE binaries. For
27151 : x86-64, common symbol is local only for non-PIE binaries or linker
27152 : supports copy reloc in PIE binaries. */
27153 :
27154 : static bool
27155 768052975 : ix86_binds_local_p (const_tree exp)
27156 : {
27157 768052975 : bool direct_extern_access
27158 768052975 : = (ix86_direct_extern_access
27159 1532612905 : && !(VAR_OR_FUNCTION_DECL_P (exp)
27160 764559930 : && lookup_attribute ("nodirect_extern_access",
27161 764559930 : DECL_ATTRIBUTES (exp))));
27162 768052975 : if (!direct_extern_access)
27163 1225 : ix86_has_no_direct_extern_access = true;
27164 768052975 : return default_binds_local_p_3 (exp, flag_shlib != 0, true,
27165 : direct_extern_access,
27166 : (direct_extern_access
27167 768051750 : && (!flag_pic
27168 132127531 : || (TARGET_64BIT
27169 768052975 : && HAVE_LD_PIE_COPYRELOC != 0))));
27170 : }
27171 :
27172 : /* If flag_pic or ix86_direct_extern_access is false, then neither
27173 : local nor global relocs should be placed in readonly memory. */
27174 :
27175 : static int
27176 5129143 : ix86_reloc_rw_mask (void)
27177 : {
27178 5129143 : return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
27179 : }
27180 : #endif
27181 :
27182 : /* Return true iff ADDR can be used as a symbolic base address. */
27183 :
27184 : static bool
27185 3162 : symbolic_base_address_p (rtx addr)
27186 : {
27187 0 : if (SYMBOL_REF_P (addr))
27188 : return true;
27189 :
27190 3138 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
27191 0 : return true;
27192 :
27193 : return false;
27194 : }
27195 :
27196 : /* Return true iff ADDR can be used as a base address. */
27197 :
27198 : static bool
27199 4734 : base_address_p (rtx addr)
27200 : {
27201 0 : if (REG_P (addr))
27202 : return true;
27203 :
27204 2944 : if (symbolic_base_address_p (addr))
27205 0 : return true;
27206 :
27207 : return false;
27208 : }
27209 :
27210 : /* If MEM is in the form of [(base+symbase)+offset], extract the three
27211 : parts of address and set to BASE, SYMBASE and OFFSET, otherwise
27212 : return false. */
27213 :
27214 : static bool
27215 3043 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
27216 : {
27217 3043 : rtx addr;
27218 :
27219 3043 : gcc_assert (MEM_P (mem));
27220 :
27221 3043 : addr = XEXP (mem, 0);
27222 :
27223 3043 : if (GET_CODE (addr) == CONST)
27224 10 : addr = XEXP (addr, 0);
27225 :
27226 3043 : if (base_address_p (addr))
27227 : {
27228 1352 : *base = addr;
27229 1352 : *symbase = const0_rtx;
27230 1352 : *offset = const0_rtx;
27231 1352 : return true;
27232 : }
27233 :
27234 1691 : if (GET_CODE (addr) == PLUS
27235 1691 : && base_address_p (XEXP (addr, 0)))
27236 : {
27237 462 : rtx addend = XEXP (addr, 1);
27238 :
27239 462 : if (GET_CODE (addend) == CONST)
27240 0 : addend = XEXP (addend, 0);
27241 :
27242 462 : if (CONST_INT_P (addend))
27243 : {
27244 244 : *base = XEXP (addr, 0);
27245 244 : *symbase = const0_rtx;
27246 244 : *offset = addend;
27247 244 : return true;
27248 : }
27249 :
27250 : /* Also accept REG + symbolic ref, with or without a CONST_INT
27251 : offset. */
27252 218 : if (REG_P (XEXP (addr, 0)))
27253 : {
27254 218 : if (symbolic_base_address_p (addend))
27255 : {
27256 0 : *base = XEXP (addr, 0);
27257 0 : *symbase = addend;
27258 0 : *offset = const0_rtx;
27259 0 : return true;
27260 : }
27261 :
27262 218 : if (GET_CODE (addend) == PLUS
27263 0 : && symbolic_base_address_p (XEXP (addend, 0))
27264 218 : && CONST_INT_P (XEXP (addend, 1)))
27265 : {
27266 0 : *base = XEXP (addr, 0);
27267 0 : *symbase = XEXP (addend, 0);
27268 0 : *offset = XEXP (addend, 1);
27269 0 : return true;
27270 : }
27271 : }
27272 : }
27273 :
27274 : return false;
27275 : }
27276 :
27277 : /* Given OPERANDS of consecutive load/store, check if we can merge
27278 : them into move multiple. LOAD is true if they are load instructions.
27279 : MODE is the mode of memory operands. */
27280 :
27281 : bool
27282 1697 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
27283 : machine_mode mode)
27284 : {
27285 1697 : HOST_WIDE_INT offval_1, offval_2, msize;
27286 1697 : rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
27287 : symbase_1, symbase_2, offset_1, offset_2;
27288 :
27289 1697 : if (load)
27290 : {
27291 1401 : mem_1 = operands[1];
27292 1401 : mem_2 = operands[3];
27293 1401 : reg_1 = operands[0];
27294 1401 : reg_2 = operands[2];
27295 : }
27296 : else
27297 : {
27298 296 : mem_1 = operands[0];
27299 296 : mem_2 = operands[2];
27300 296 : reg_1 = operands[1];
27301 296 : reg_2 = operands[3];
27302 : }
27303 :
27304 1697 : gcc_assert (REG_P (reg_1) && REG_P (reg_2));
27305 :
27306 1697 : if (REGNO (reg_1) != REGNO (reg_2))
27307 : return false;
27308 :
27309 : /* Check if the addresses are in the form of [base+offset]. */
27310 1697 : if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
27311 : return false;
27312 1346 : if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
27313 : return false;
27314 :
27315 : /* Check if the bases are the same. */
27316 250 : if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
27317 119 : return false;
27318 :
27319 131 : offval_1 = INTVAL (offset_1);
27320 131 : offval_2 = INTVAL (offset_2);
27321 131 : msize = GET_MODE_SIZE (mode);
27322 : /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
27323 131 : if (offval_1 + msize != offval_2)
27324 : return false;
27325 :
27326 : return true;
27327 : }
27328 :
27329 : /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27330 :
27331 : static bool
27332 342226 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
27333 : optimization_type opt_type)
27334 : {
27335 342226 : switch (op)
27336 : {
27337 216 : case asin_optab:
27338 216 : case acos_optab:
27339 216 : case log1p_optab:
27340 216 : case exp_optab:
27341 216 : case exp10_optab:
27342 216 : case exp2_optab:
27343 216 : case expm1_optab:
27344 216 : case ldexp_optab:
27345 216 : case scalb_optab:
27346 216 : case round_optab:
27347 216 : case lround_optab:
27348 216 : return opt_type == OPTIMIZE_FOR_SPEED;
27349 :
27350 263 : case rint_optab:
27351 263 : if (SSE_FLOAT_MODE_P (mode1)
27352 144 : && TARGET_SSE_MATH
27353 128 : && !flag_trapping_math
27354 21 : && !TARGET_SSE4_1
27355 : && mode1 != HFmode)
27356 21 : return opt_type == OPTIMIZE_FOR_SPEED;
27357 : return true;
27358 :
27359 1892 : case floor_optab:
27360 1892 : case ceil_optab:
27361 1892 : case btrunc_optab:
27362 1892 : if ((SSE_FLOAT_MODE_P (mode1)
27363 1594 : && TARGET_SSE_MATH
27364 1515 : && TARGET_SSE4_1)
27365 1825 : || mode1 == HFmode)
27366 : return true;
27367 1756 : return opt_type == OPTIMIZE_FOR_SPEED;
27368 :
27369 84 : case rsqrt_optab:
27370 84 : return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
27371 :
27372 : default:
27373 : return true;
27374 : }
27375 : }
27376 :
27377 : /* Address space support.
27378 :
27379 : This is not "far pointers" in the 16-bit sense, but an easy way
27380 : to use %fs and %gs segment prefixes. Therefore:
27381 :
27382 : (a) All address spaces have the same modes,
27383 : (b) All address spaces have the same addresss forms,
27384 : (c) While %fs and %gs are technically subsets of the generic
27385 : address space, they are probably not subsets of each other.
27386 : (d) Since we have no access to the segment base register values
27387 : without resorting to a system call, we cannot convert a
27388 : non-default address space to a default address space.
27389 : Therefore we do not claim %fs or %gs are subsets of generic.
27390 :
27391 : Therefore we can (mostly) use the default hooks. */
27392 :
27393 : /* All use of segmentation is assumed to make address 0 valid. */
27394 :
27395 : static bool
27396 67636174 : ix86_addr_space_zero_address_valid (addr_space_t as)
27397 : {
27398 67636174 : return as != ADDR_SPACE_GENERIC;
27399 : }
27400 :
27401 : static void
27402 774345 : ix86_init_libfuncs (void)
27403 : {
27404 774345 : if (TARGET_64BIT)
27405 : {
27406 759391 : set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
27407 759391 : set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
27408 : }
27409 : else
27410 : {
27411 14954 : set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
27412 14954 : set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
27413 : }
27414 :
27415 : #if TARGET_MACHO
27416 : darwin_rename_builtins ();
27417 : #endif
27418 774345 : }
27419 :
27420 : /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
27421 : FPU, assume that the fpcw is set to extended precision; when using
27422 : only SSE, rounding is correct; when using both SSE and the FPU,
27423 : the rounding precision is indeterminate, since either may be chosen
27424 : apparently at random. */
27425 :
27426 : static enum flt_eval_method
27427 89258230 : ix86_get_excess_precision (enum excess_precision_type type)
27428 : {
27429 89258230 : switch (type)
27430 : {
27431 85322578 : case EXCESS_PRECISION_TYPE_FAST:
27432 : /* The fastest type to promote to will always be the native type,
27433 : whether that occurs with implicit excess precision or
27434 : otherwise. */
27435 85322578 : return TARGET_AVX512FP16
27436 85322578 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
27437 85322578 : : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27438 3935571 : case EXCESS_PRECISION_TYPE_STANDARD:
27439 3935571 : case EXCESS_PRECISION_TYPE_IMPLICIT:
27440 : /* Otherwise, the excess precision we want when we are
27441 : in a standards compliant mode, and the implicit precision we
27442 : provide would be identical were it not for the unpredictable
27443 : cases. */
27444 3935571 : if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
27445 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27446 3929791 : else if (!TARGET_80387)
27447 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27448 3923669 : else if (!TARGET_MIX_SSE_I387)
27449 : {
27450 3923497 : if (!(TARGET_SSE && TARGET_SSE_MATH))
27451 : return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
27452 2936679 : else if (TARGET_SSE2)
27453 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27454 : }
27455 :
27456 : /* If we are in standards compliant mode, but we know we will
27457 : calculate in unpredictable precision, return
27458 : FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
27459 : excess precision if the target can't guarantee it will honor
27460 : it. */
27461 318 : return (type == EXCESS_PRECISION_TYPE_STANDARD
27462 318 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
27463 : : FLT_EVAL_METHOD_UNPREDICTABLE);
27464 81 : case EXCESS_PRECISION_TYPE_FLOAT16:
27465 81 : if (TARGET_80387
27466 75 : && !(TARGET_SSE_MATH && TARGET_SSE))
27467 4 : error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
27468 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27469 0 : default:
27470 0 : gcc_unreachable ();
27471 : }
27472 :
27473 : return FLT_EVAL_METHOD_UNPREDICTABLE;
27474 : }
27475 :
27476 : /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
27477 : bool
27478 347268 : ix86_bitint_type_info (int n, struct bitint_info *info)
27479 : {
27480 347268 : if (n <= 8)
27481 5332 : info->limb_mode = QImode;
27482 341936 : else if (n <= 16)
27483 1893 : info->limb_mode = HImode;
27484 340043 : else if (n <= 32 || (!TARGET_64BIT && n > 64))
27485 45556 : info->limb_mode = SImode;
27486 : else
27487 294487 : info->limb_mode = DImode;
27488 347268 : info->abi_limb_mode = info->limb_mode;
27489 347268 : info->big_endian = false;
27490 347268 : info->extended = false;
27491 347268 : return true;
27492 : }
27493 :
27494 : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
27495 : or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
27496 : based on long double bits, go with the default one for the others. */
27497 :
27498 : static machine_mode
27499 3618778 : ix86_c_mode_for_floating_type (enum tree_index ti)
27500 : {
27501 3618778 : if (ti == TI_LONG_DOUBLE_TYPE)
27502 604023 : return (TARGET_LONG_DOUBLE_64 ? DFmode
27503 603991 : : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
27504 3014755 : return default_mode_for_floating_type (ti);
27505 : }
27506 :
27507 : /* Returns modified FUNCTION_TYPE for cdtor callabi. */
27508 : tree
27509 13675 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
27510 : {
27511 13675 : if (TARGET_64BIT
27512 69 : || TARGET_RTD
27513 13744 : || ix86_function_type_abi (fntype) != MS_ABI)
27514 13675 : return fntype;
27515 : /* For 32-bit MS ABI add thiscall attribute. */
27516 0 : tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
27517 0 : TYPE_ATTRIBUTES (fntype));
27518 0 : return build_type_attribute_variant (fntype, attribs);
27519 : }
27520 :
27521 : /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
27522 : decrements by exactly 2 no matter what the position was, there is no pushb.
27523 :
27524 : But as CIE data alignment factor on this arch is -4 for 32bit targets
27525 : and -8 for 64bit targets, we need to make sure all stack pointer adjustments
27526 : are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
27527 :
27528 : poly_int64
27529 271673257 : ix86_push_rounding (poly_int64 bytes)
27530 : {
27531 351044654 : return ROUND_UP (bytes, UNITS_PER_WORD);
27532 : }
27533 :
27534 : /* Use 8 bits metadata start from bit48 for LAM_U48,
27535 : 6 bits metadat start from bit57 for LAM_U57. */
27536 : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
27537 : ? 48 \
27538 : : (ix86_lam_type == lam_u57 ? 57 : 0))
27539 : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
27540 : ? 8 \
27541 : : (ix86_lam_type == lam_u57 ? 6 : 0))
27542 :
27543 : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
27544 : bool
27545 6195836 : ix86_memtag_can_tag_addresses ()
27546 : {
27547 6195836 : return ix86_lam_type != lam_none && TARGET_LP64;
27548 : }
27549 :
27550 : /* Implement TARGET_MEMTAG_TAG_BITSIZE. */
27551 : unsigned char
27552 450 : ix86_memtag_tag_bitsize ()
27553 : {
27554 450 : return IX86_HWASAN_TAG_SIZE;
27555 : }
27556 :
27557 : /* Implement TARGET_MEMTAG_SET_TAG. */
27558 : rtx
27559 106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
27560 : {
27561 : /* default_memtag_insert_random_tag may
27562 : generate tag with value more than 6 bits. */
27563 106 : if (ix86_lam_type == lam_u57)
27564 : {
27565 106 : unsigned HOST_WIDE_INT and_imm
27566 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27567 :
27568 106 : emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27569 : }
27570 106 : tag = expand_simple_binop (Pmode, ASHIFT, tag,
27571 106 : GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27572 : /* unsignedp = */1, OPTAB_WIDEN);
27573 106 : rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27574 : /* unsignedp = */1, OPTAB_DIRECT);
27575 106 : return ret;
27576 : }
27577 :
27578 : /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27579 : rtx
27580 180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27581 : {
27582 180 : rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27583 180 : GEN_INT (IX86_HWASAN_SHIFT), target,
27584 : /* unsignedp = */0,
27585 : OPTAB_DIRECT);
27586 180 : rtx ret = gen_reg_rtx (QImode);
27587 : /* Mask off bit63 when LAM_U57. */
27588 180 : if (ix86_lam_type == lam_u57)
27589 : {
27590 180 : unsigned HOST_WIDE_INT and_imm
27591 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27592 180 : emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27593 180 : gen_int_mode (and_imm, QImode)));
27594 : }
27595 : else
27596 0 : emit_move_insn (ret, gen_lowpart (QImode, tag));
27597 180 : return ret;
27598 : }
27599 :
27600 : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27601 : rtx
27602 114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27603 : {
27604 : /* Leave bit63 alone. */
27605 114 : rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27606 114 : + (HOST_WIDE_INT_1U << 63) - 1),
27607 114 : Pmode);
27608 114 : rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27609 : tag_mask, target, true,
27610 : OPTAB_DIRECT);
27611 114 : gcc_assert (untagged_base);
27612 114 : return untagged_base;
27613 : }
27614 :
27615 : /* Implement TARGET_MEMTAG_ADD_TAG. */
27616 : rtx
27617 90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27618 : {
27619 90 : rtx base_tag = gen_reg_rtx (QImode);
27620 90 : rtx base_addr = gen_reg_rtx (Pmode);
27621 90 : rtx tagged_addr = gen_reg_rtx (Pmode);
27622 90 : rtx new_tag = gen_reg_rtx (QImode);
27623 180 : unsigned HOST_WIDE_INT and_imm
27624 90 : = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27625 :
27626 : /* When there's "overflow" in tag adding,
27627 : need to mask the most significant bit off. */
27628 90 : emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
27629 90 : emit_move_insn (base_addr,
27630 : ix86_memtag_untagged_pointer (base, NULL_RTX));
27631 90 : emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27632 90 : emit_move_insn (new_tag, base_tag);
27633 90 : emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27634 90 : emit_move_insn (tagged_addr,
27635 : ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
27636 90 : return plus_constant (Pmode, tagged_addr, offset);
27637 : }
27638 :
27639 : /* Implement TARGET_HAVE_CCMP. */
27640 : static bool
27641 8094506 : ix86_have_ccmp ()
27642 : {
27643 8094506 : return (bool) TARGET_APX_CCMP;
27644 : }
27645 :
27646 : /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27647 : static bool
27648 4549977 : ix86_mode_can_transfer_bits (machine_mode mode)
27649 : {
27650 4549977 : if (GET_MODE_CLASS (mode) == MODE_FLOAT
27651 4503667 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
27652 111112 : switch (GET_MODE_INNER (mode))
27653 : {
27654 53370 : case E_SFmode:
27655 53370 : case E_DFmode:
27656 : /* These suffer from normalization upon load when not using SSE. */
27657 53370 : return !(ix86_fpmath & FPMATH_387);
27658 : default:
27659 : return true;
27660 : }
27661 :
27662 : return true;
27663 : }
27664 :
27665 : /* Implement TARGET_REDZONE_CLOBBER. */
27666 : static rtx
27667 2 : ix86_redzone_clobber ()
27668 : {
27669 2 : cfun->machine->asm_redzone_clobber_seen = true;
27670 2 : if (ix86_using_red_zone ())
27671 : {
27672 2 : rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
27673 2 : rtx mem = gen_rtx_MEM (BLKmode, base);
27674 2 : set_mem_size (mem, RED_ZONE_SIZE);
27675 2 : return mem;
27676 : }
27677 : return NULL_RTX;
27678 : }
27679 :
27680 : /* Target-specific selftests. */
27681 :
27682 : #if CHECKING_P
27683 :
27684 : namespace selftest {
27685 :
27686 : /* Verify that hard regs are dumped as expected (in compact mode). */
27687 :
27688 : static void
27689 4 : ix86_test_dumping_hard_regs ()
27690 : {
27691 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
27692 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
27693 4 : }
27694 :
27695 : /* Test dumping an insn with repeated references to the same SCRATCH,
27696 : to verify the rtx_reuse code. */
27697 :
27698 : static void
27699 4 : ix86_test_dumping_memory_blockage ()
27700 : {
27701 4 : set_new_first_and_last_insn (NULL, NULL);
27702 :
27703 4 : rtx pat = gen_memory_blockage ();
27704 4 : rtx_reuse_manager r;
27705 4 : r.preprocess (pat);
27706 :
27707 : /* Verify that the repeated references to the SCRATCH show use
27708 : reuse IDS. The first should be prefixed with a reuse ID,
27709 : and the second should be dumped as a "reuse_rtx" of that ID.
27710 : The expected string assumes Pmode == DImode. */
27711 4 : if (Pmode == DImode)
27712 4 : ASSERT_RTL_DUMP_EQ_WITH_REUSE
27713 : ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
27714 : " (unspec:BLK [\n"
27715 : " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
27716 : " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
27717 4 : }
27718 :
27719 : /* Verify loading an RTL dump; specifically a dump of copying
27720 : a param on x86_64 from a hard reg into the frame.
27721 : This test is target-specific since the dump contains target-specific
27722 : hard reg names. */
27723 :
27724 : static void
27725 4 : ix86_test_loading_dump_fragment_1 ()
27726 : {
27727 4 : rtl_dump_test t (SELFTEST_LOCATION,
27728 4 : locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
27729 :
27730 4 : rtx_insn *insn = get_insn_by_uid (1);
27731 :
27732 : /* The block structure and indentation here is purely for
27733 : readability; it mirrors the structure of the rtx. */
27734 4 : tree mem_expr;
27735 4 : {
27736 4 : rtx pat = PATTERN (insn);
27737 4 : ASSERT_EQ (SET, GET_CODE (pat));
27738 4 : {
27739 4 : rtx dest = SET_DEST (pat);
27740 4 : ASSERT_EQ (MEM, GET_CODE (dest));
27741 : /* Verify the "/c" was parsed. */
27742 4 : ASSERT_TRUE (RTX_FLAG (dest, call));
27743 4 : ASSERT_EQ (SImode, GET_MODE (dest));
27744 4 : {
27745 4 : rtx addr = XEXP (dest, 0);
27746 4 : ASSERT_EQ (PLUS, GET_CODE (addr));
27747 4 : ASSERT_EQ (DImode, GET_MODE (addr));
27748 4 : {
27749 4 : rtx lhs = XEXP (addr, 0);
27750 : /* Verify that the "frame" REG was consolidated. */
27751 4 : ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
27752 : }
27753 4 : {
27754 4 : rtx rhs = XEXP (addr, 1);
27755 4 : ASSERT_EQ (CONST_INT, GET_CODE (rhs));
27756 4 : ASSERT_EQ (-4, INTVAL (rhs));
27757 : }
27758 : }
27759 : /* Verify the "[1 i+0 S4 A32]" was parsed. */
27760 4 : ASSERT_EQ (1, MEM_ALIAS_SET (dest));
27761 : /* "i" should have been handled by synthesizing a global int
27762 : variable named "i". */
27763 4 : mem_expr = MEM_EXPR (dest);
27764 4 : ASSERT_NE (mem_expr, NULL);
27765 4 : ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
27766 4 : ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
27767 4 : ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
27768 4 : ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
27769 : /* "+0". */
27770 4 : ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
27771 4 : ASSERT_EQ (0, MEM_OFFSET (dest));
27772 : /* "S4". */
27773 4 : ASSERT_EQ (4, MEM_SIZE (dest));
27774 : /* "A32. */
27775 4 : ASSERT_EQ (32, MEM_ALIGN (dest));
27776 : }
27777 4 : {
27778 4 : rtx src = SET_SRC (pat);
27779 4 : ASSERT_EQ (REG, GET_CODE (src));
27780 4 : ASSERT_EQ (SImode, GET_MODE (src));
27781 4 : ASSERT_EQ (5, REGNO (src));
27782 4 : tree reg_expr = REG_EXPR (src);
27783 : /* "i" here should point to the same var as for the MEM_EXPR. */
27784 4 : ASSERT_EQ (reg_expr, mem_expr);
27785 : }
27786 : }
27787 4 : }
27788 :
27789 : /* Verify that the RTL loader copes with a call_insn dump.
27790 : This test is target-specific since the dump contains a target-specific
27791 : hard reg name. */
27792 :
27793 : static void
27794 4 : ix86_test_loading_call_insn ()
27795 : {
27796 : /* The test dump includes register "xmm0", where requires TARGET_SSE
27797 : to exist. */
27798 4 : if (!TARGET_SSE)
27799 0 : return;
27800 :
27801 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
27802 :
27803 4 : rtx_insn *insn = get_insns ();
27804 4 : ASSERT_EQ (CALL_INSN, GET_CODE (insn));
27805 :
27806 : /* "/j". */
27807 4 : ASSERT_TRUE (RTX_FLAG (insn, jump));
27808 :
27809 4 : rtx pat = PATTERN (insn);
27810 4 : ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
27811 :
27812 : /* Verify REG_NOTES. */
27813 4 : {
27814 : /* "(expr_list:REG_CALL_DECL". */
27815 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
27816 4 : rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
27817 4 : ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
27818 :
27819 : /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
27820 4 : rtx_expr_list *note1 = note0->next ();
27821 4 : ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
27822 :
27823 4 : ASSERT_EQ (NULL, note1->next ());
27824 : }
27825 :
27826 : /* Verify CALL_INSN_FUNCTION_USAGE. */
27827 4 : {
27828 : /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
27829 4 : rtx_expr_list *usage
27830 4 : = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
27831 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
27832 4 : ASSERT_EQ (DFmode, GET_MODE (usage));
27833 4 : ASSERT_EQ (USE, GET_CODE (usage->element ()));
27834 4 : ASSERT_EQ (NULL, usage->next ());
27835 : }
27836 4 : }
27837 :
27838 : /* Verify that the RTL loader copes a dump from print_rtx_function.
27839 : This test is target-specific since the dump contains target-specific
27840 : hard reg names. */
27841 :
27842 : static void
27843 4 : ix86_test_loading_full_dump ()
27844 : {
27845 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
27846 :
27847 4 : ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27848 :
27849 4 : rtx_insn *insn_1 = get_insn_by_uid (1);
27850 4 : ASSERT_EQ (NOTE, GET_CODE (insn_1));
27851 :
27852 4 : rtx_insn *insn_7 = get_insn_by_uid (7);
27853 4 : ASSERT_EQ (INSN, GET_CODE (insn_7));
27854 4 : ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
27855 :
27856 4 : rtx_insn *insn_15 = get_insn_by_uid (15);
27857 4 : ASSERT_EQ (INSN, GET_CODE (insn_15));
27858 4 : ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
27859 :
27860 : /* Verify crtl->return_rtx. */
27861 4 : ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
27862 4 : ASSERT_EQ (0, REGNO (crtl->return_rtx));
27863 4 : ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
27864 4 : }
27865 :
27866 : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
27867 : In particular, verify that it correctly loads the 2nd operand.
27868 : This test is target-specific since these are machine-specific
27869 : operands (and enums). */
27870 :
27871 : static void
27872 4 : ix86_test_loading_unspec ()
27873 : {
27874 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
27875 :
27876 4 : ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27877 :
27878 4 : ASSERT_TRUE (cfun);
27879 :
27880 : /* Test of an UNSPEC. */
27881 4 : rtx_insn *insn = get_insns ();
27882 4 : ASSERT_EQ (INSN, GET_CODE (insn));
27883 4 : rtx set = single_set (insn);
27884 4 : ASSERT_NE (NULL, set);
27885 4 : rtx dst = SET_DEST (set);
27886 4 : ASSERT_EQ (MEM, GET_CODE (dst));
27887 4 : rtx src = SET_SRC (set);
27888 4 : ASSERT_EQ (UNSPEC, GET_CODE (src));
27889 4 : ASSERT_EQ (BLKmode, GET_MODE (src));
27890 4 : ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
27891 :
27892 4 : rtx v0 = XVECEXP (src, 0, 0);
27893 :
27894 : /* Verify that the two uses of the first SCRATCH have pointer
27895 : equality. */
27896 4 : rtx scratch_a = XEXP (dst, 0);
27897 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
27898 :
27899 4 : rtx scratch_b = XEXP (v0, 0);
27900 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
27901 :
27902 4 : ASSERT_EQ (scratch_a, scratch_b);
27903 :
27904 : /* Verify that the two mems are thus treated as equal. */
27905 4 : ASSERT_TRUE (rtx_equal_p (dst, v0));
27906 :
27907 : /* Verify that the insn is recognized. */
27908 4 : ASSERT_NE(-1, recog_memoized (insn));
27909 :
27910 : /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
27911 4 : insn = NEXT_INSN (insn);
27912 4 : ASSERT_EQ (INSN, GET_CODE (insn));
27913 :
27914 4 : set = single_set (insn);
27915 4 : ASSERT_NE (NULL, set);
27916 :
27917 4 : src = SET_SRC (set);
27918 4 : ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
27919 4 : ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
27920 4 : }
27921 :
27922 : /* Run all target-specific selftests. */
27923 :
27924 : static void
27925 4 : ix86_run_selftests (void)
27926 : {
27927 4 : ix86_test_dumping_hard_regs ();
27928 4 : ix86_test_dumping_memory_blockage ();
27929 :
27930 : /* Various tests of loading RTL dumps, here because they contain
27931 : ix86-isms (e.g. names of hard regs). */
27932 4 : ix86_test_loading_dump_fragment_1 ();
27933 4 : ix86_test_loading_call_insn ();
27934 4 : ix86_test_loading_full_dump ();
27935 4 : ix86_test_loading_unspec ();
27936 4 : }
27937 :
27938 : } // namespace selftest
27939 :
27940 : #endif /* CHECKING_P */
27941 :
27942 : static const scoped_attribute_specs *const ix86_attribute_table[] =
27943 : {
27944 : &ix86_gnu_attribute_table
27945 : };
27946 :
27947 : /* Initialize the GCC target structure. */
27948 : #undef TARGET_RETURN_IN_MEMORY
27949 : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27950 :
27951 : #undef TARGET_LEGITIMIZE_ADDRESS
27952 : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
27953 :
27954 : #undef TARGET_ATTRIBUTE_TABLE
27955 : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27956 : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
27957 : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
27958 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27959 : # undef TARGET_MERGE_DECL_ATTRIBUTES
27960 : # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27961 : #endif
27962 :
27963 : #undef TARGET_INVALID_CONVERSION
27964 : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
27965 :
27966 : #undef TARGET_INVALID_UNARY_OP
27967 : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
27968 :
27969 : #undef TARGET_INVALID_BINARY_OP
27970 : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
27971 :
27972 : #undef TARGET_COMP_TYPE_ATTRIBUTES
27973 : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27974 :
27975 : #undef TARGET_INIT_BUILTINS
27976 : #define TARGET_INIT_BUILTINS ix86_init_builtins
27977 : #undef TARGET_BUILTIN_DECL
27978 : #define TARGET_BUILTIN_DECL ix86_builtin_decl
27979 : #undef TARGET_EXPAND_BUILTIN
27980 : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27981 :
27982 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27983 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27984 : ix86_builtin_vectorized_function
27985 :
27986 : #undef TARGET_VECTORIZE_BUILTIN_GATHER
27987 : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
27988 :
27989 : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
27990 : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
27991 :
27992 : #undef TARGET_BUILTIN_RECIPROCAL
27993 : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27994 :
27995 : #undef TARGET_ASM_FUNCTION_EPILOGUE
27996 : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27997 :
27998 : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
27999 : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
28000 : ix86_print_patchable_function_entry
28001 :
28002 : #undef TARGET_ENCODE_SECTION_INFO
28003 : #ifndef SUBTARGET_ENCODE_SECTION_INFO
28004 : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28005 : #else
28006 : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28007 : #endif
28008 :
28009 : #undef TARGET_ASM_OPEN_PAREN
28010 : #define TARGET_ASM_OPEN_PAREN ""
28011 : #undef TARGET_ASM_CLOSE_PAREN
28012 : #define TARGET_ASM_CLOSE_PAREN ""
28013 :
28014 : #undef TARGET_ASM_BYTE_OP
28015 : #define TARGET_ASM_BYTE_OP ASM_BYTE
28016 :
28017 : #undef TARGET_ASM_ALIGNED_HI_OP
28018 : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28019 : #undef TARGET_ASM_ALIGNED_SI_OP
28020 : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28021 : #ifdef ASM_QUAD
28022 : #undef TARGET_ASM_ALIGNED_DI_OP
28023 : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28024 : #endif
28025 :
28026 : #undef TARGET_PROFILE_BEFORE_PROLOGUE
28027 : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
28028 :
28029 : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
28030 : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
28031 :
28032 : #undef TARGET_ASM_UNALIGNED_HI_OP
28033 : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28034 : #undef TARGET_ASM_UNALIGNED_SI_OP
28035 : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28036 : #undef TARGET_ASM_UNALIGNED_DI_OP
28037 : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28038 :
28039 : #undef TARGET_PRINT_OPERAND
28040 : #define TARGET_PRINT_OPERAND ix86_print_operand
28041 : #undef TARGET_PRINT_OPERAND_ADDRESS
28042 : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
28043 : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
28044 : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
28045 : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
28046 : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
28047 :
28048 : #undef TARGET_SCHED_INIT_GLOBAL
28049 : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
28050 : #undef TARGET_SCHED_ADJUST_COST
28051 : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28052 : #undef TARGET_SCHED_ISSUE_RATE
28053 : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28054 : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28055 : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28056 : ia32_multipass_dfa_lookahead
28057 : #undef TARGET_SCHED_MACRO_FUSION_P
28058 : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
28059 : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
28060 : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
28061 :
28062 : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28063 : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28064 :
28065 : #undef TARGET_MEMMODEL_CHECK
28066 : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
28067 :
28068 : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
28069 : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
28070 :
28071 : #ifdef HAVE_AS_TLS
28072 : #undef TARGET_HAVE_TLS
28073 : #define TARGET_HAVE_TLS true
28074 : #endif
28075 : #undef TARGET_CANNOT_FORCE_CONST_MEM
28076 : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28077 : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28078 : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28079 :
28080 : #undef TARGET_DELEGITIMIZE_ADDRESS
28081 : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28082 :
28083 : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
28084 : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
28085 :
28086 : #undef TARGET_MS_BITFIELD_LAYOUT_P
28087 : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28088 :
28089 : #if TARGET_MACHO
28090 : #undef TARGET_BINDS_LOCAL_P
28091 : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28092 : #else
28093 : #undef TARGET_BINDS_LOCAL_P
28094 : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
28095 : #endif
28096 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28097 : #undef TARGET_BINDS_LOCAL_P
28098 : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28099 : #endif
28100 :
28101 : #undef TARGET_ASM_OUTPUT_MI_THUNK
28102 : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28103 : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28104 : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28105 :
28106 : #undef TARGET_ASM_FILE_START
28107 : #define TARGET_ASM_FILE_START x86_file_start
28108 :
28109 : #undef TARGET_OPTION_OVERRIDE
28110 : #define TARGET_OPTION_OVERRIDE ix86_option_override
28111 :
28112 : #undef TARGET_REGISTER_MOVE_COST
28113 : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
28114 : #undef TARGET_MEMORY_MOVE_COST
28115 : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
28116 : #undef TARGET_RTX_COSTS
28117 : #define TARGET_RTX_COSTS ix86_rtx_costs
28118 : #undef TARGET_INSN_COST
28119 : #define TARGET_INSN_COST ix86_insn_cost
28120 : #undef TARGET_ADDRESS_COST
28121 : #define TARGET_ADDRESS_COST ix86_address_cost
28122 :
28123 : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
28124 : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
28125 : ix86_use_by_pieces_infrastructure_p
28126 :
28127 : #undef TARGET_OVERLAP_OP_BY_PIECES_P
28128 : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
28129 :
28130 : #undef TARGET_FLAGS_REGNUM
28131 : #define TARGET_FLAGS_REGNUM FLAGS_REG
28132 : #undef TARGET_FIXED_CONDITION_CODE_REGS
28133 : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28134 : #undef TARGET_CC_MODES_COMPATIBLE
28135 : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28136 :
28137 : #undef TARGET_MACHINE_DEPENDENT_REORG
28138 : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28139 :
28140 : #undef TARGET_BUILD_BUILTIN_VA_LIST
28141 : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28142 :
28143 : #undef TARGET_FOLD_BUILTIN
28144 : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
28145 :
28146 : #undef TARGET_GIMPLE_FOLD_BUILTIN
28147 : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
28148 :
28149 : #undef TARGET_COMPARE_VERSION_PRIORITY
28150 : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
28151 :
28152 : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
28153 : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
28154 : ix86_generate_version_dispatcher_body
28155 :
28156 : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
28157 : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
28158 : ix86_get_function_versions_dispatcher
28159 :
28160 : #undef TARGET_ENUM_VA_LIST_P
28161 : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
28162 :
28163 : #undef TARGET_FN_ABI_VA_LIST
28164 : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28165 :
28166 : #undef TARGET_CANONICAL_VA_LIST_TYPE
28167 : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28168 :
28169 : #undef TARGET_EXPAND_BUILTIN_VA_START
28170 : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28171 :
28172 : #undef TARGET_MD_ASM_ADJUST
28173 : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
28174 :
28175 : #undef TARGET_C_EXCESS_PRECISION
28176 : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
28177 : #undef TARGET_C_BITINT_TYPE_INFO
28178 : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
28179 : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
28180 : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
28181 : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
28182 : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
28183 : #undef TARGET_PROMOTE_PROTOTYPES
28184 : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28185 : #undef TARGET_PUSH_ARGUMENT
28186 : #define TARGET_PUSH_ARGUMENT ix86_push_argument
28187 : #undef TARGET_SETUP_INCOMING_VARARGS
28188 : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28189 : #undef TARGET_MUST_PASS_IN_STACK
28190 : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28191 : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
28192 : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
28193 : #undef TARGET_FUNCTION_ARG_ADVANCE
28194 : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
28195 : #undef TARGET_FUNCTION_ARG
28196 : #define TARGET_FUNCTION_ARG ix86_function_arg
28197 : #undef TARGET_INIT_PIC_REG
28198 : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
28199 : #undef TARGET_USE_PSEUDO_PIC_REG
28200 : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
28201 : #undef TARGET_FUNCTION_ARG_BOUNDARY
28202 : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
28203 : #undef TARGET_PASS_BY_REFERENCE
28204 : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28205 : #undef TARGET_INTERNAL_ARG_POINTER
28206 : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28207 : #undef TARGET_UPDATE_STACK_BOUNDARY
28208 : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28209 : #undef TARGET_GET_DRAP_RTX
28210 : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28211 : #undef TARGET_STRICT_ARGUMENT_NAMING
28212 : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28213 : #undef TARGET_STATIC_CHAIN
28214 : #define TARGET_STATIC_CHAIN ix86_static_chain
28215 : #undef TARGET_TRAMPOLINE_INIT
28216 : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
28217 : #undef TARGET_RETURN_POPS_ARGS
28218 : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
28219 :
28220 : #undef TARGET_WARN_FUNC_RETURN
28221 : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
28222 :
28223 : #undef TARGET_LEGITIMATE_COMBINED_INSN
28224 : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
28225 :
28226 : #undef TARGET_ASAN_SHADOW_OFFSET
28227 : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
28228 :
28229 : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
28230 : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28231 :
28232 : #undef TARGET_SCALAR_MODE_SUPPORTED_P
28233 : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28234 :
28235 : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
28236 : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
28237 : ix86_libgcc_floating_mode_supported_p
28238 :
28239 : #undef TARGET_VECTOR_MODE_SUPPORTED_P
28240 : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28241 :
28242 : #undef TARGET_C_MODE_FOR_SUFFIX
28243 : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
28244 :
28245 : #ifdef HAVE_AS_TLS
28246 : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
28247 : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
28248 : #endif
28249 :
28250 : #ifdef SUBTARGET_INSERT_ATTRIBUTES
28251 : #undef TARGET_INSERT_ATTRIBUTES
28252 : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
28253 : #endif
28254 :
28255 : #undef TARGET_MANGLE_TYPE
28256 : #define TARGET_MANGLE_TYPE ix86_mangle_type
28257 :
28258 : #undef TARGET_EMIT_SUPPORT_TINFOS
28259 : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
28260 :
28261 : #undef TARGET_STACK_PROTECT_GUARD
28262 : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
28263 :
28264 : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
28265 : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
28266 : ix86_stack_protect_runtime_enabled_p
28267 :
28268 : #if !TARGET_MACHO
28269 : #undef TARGET_STACK_PROTECT_FAIL
28270 : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
28271 : #endif
28272 :
28273 : #undef TARGET_FUNCTION_VALUE
28274 : #define TARGET_FUNCTION_VALUE ix86_function_value
28275 :
28276 : #undef TARGET_FUNCTION_VALUE_REGNO_P
28277 : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
28278 :
28279 : #undef TARGET_ZERO_CALL_USED_REGS
28280 : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
28281 :
28282 : #undef TARGET_PROMOTE_FUNCTION_MODE
28283 : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
28284 :
28285 : #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
28286 : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
28287 :
28288 : #undef TARGET_MEMBER_TYPE_FORCES_BLK
28289 : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
28290 :
28291 : #undef TARGET_INSTANTIATE_DECLS
28292 : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
28293 :
28294 : #undef TARGET_SECONDARY_RELOAD
28295 : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
28296 : #undef TARGET_SECONDARY_MEMORY_NEEDED
28297 : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
28298 : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
28299 : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
28300 :
28301 : #undef TARGET_CLASS_MAX_NREGS
28302 : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
28303 :
28304 : #undef TARGET_PREFERRED_RELOAD_CLASS
28305 : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
28306 : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
28307 : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
28308 : /* When this hook returns true for MODE, the compiler allows
28309 : registers explicitly used in the rtl to be used as spill registers
28310 : but prevents the compiler from extending the lifetime of these
28311 : registers. */
28312 : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
28313 : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
28314 : #undef TARGET_CLASS_LIKELY_SPILLED_P
28315 : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
28316 : #undef TARGET_CALLEE_SAVE_COST
28317 : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
28318 :
28319 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
28320 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
28321 : ix86_builtin_vectorization_cost
28322 : #undef TARGET_VECTORIZE_VEC_PERM_CONST
28323 : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
28324 : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
28325 : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
28326 : ix86_preferred_simd_mode
28327 : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
28328 : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
28329 : ix86_split_reduction
28330 : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
28331 : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
28332 : ix86_autovectorize_vector_modes
28333 : #undef TARGET_VECTORIZE_GET_MASK_MODE
28334 : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
28335 : #undef TARGET_VECTORIZE_CREATE_COSTS
28336 : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
28337 :
28338 : #undef TARGET_SET_CURRENT_FUNCTION
28339 : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
28340 :
28341 : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
28342 : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
28343 :
28344 : #undef TARGET_OPTION_SAVE
28345 : #define TARGET_OPTION_SAVE ix86_function_specific_save
28346 :
28347 : #undef TARGET_OPTION_RESTORE
28348 : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
28349 :
28350 : #undef TARGET_OPTION_POST_STREAM_IN
28351 : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
28352 :
28353 : #undef TARGET_OPTION_PRINT
28354 : #define TARGET_OPTION_PRINT ix86_function_specific_print
28355 :
28356 : #undef TARGET_CAN_INLINE_P
28357 : #define TARGET_CAN_INLINE_P ix86_can_inline_p
28358 :
28359 : #undef TARGET_LEGITIMATE_ADDRESS_P
28360 : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
28361 :
28362 : #undef TARGET_REGISTER_PRIORITY
28363 : #define TARGET_REGISTER_PRIORITY ix86_register_priority
28364 :
28365 : #undef TARGET_REGISTER_USAGE_LEVELING_P
28366 : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
28367 :
28368 : #undef TARGET_LEGITIMATE_CONSTANT_P
28369 : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
28370 :
28371 : #undef TARGET_COMPUTE_FRAME_LAYOUT
28372 : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
28373 :
28374 : #undef TARGET_FRAME_POINTER_REQUIRED
28375 : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
28376 :
28377 : #undef TARGET_CAN_ELIMINATE
28378 : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
28379 :
28380 : #undef TARGET_EXTRA_LIVE_ON_ENTRY
28381 : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
28382 :
28383 : #undef TARGET_ASM_CODE_END
28384 : #define TARGET_ASM_CODE_END ix86_code_end
28385 :
28386 : #undef TARGET_CONDITIONAL_REGISTER_USAGE
28387 : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
28388 :
28389 : #undef TARGET_CANONICALIZE_COMPARISON
28390 : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
28391 :
28392 : #undef TARGET_LOOP_UNROLL_ADJUST
28393 : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
28394 :
28395 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
28396 : #undef TARGET_SPILL_CLASS
28397 : #define TARGET_SPILL_CLASS ix86_spill_class
28398 :
28399 : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
28400 : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
28401 : ix86_simd_clone_compute_vecsize_and_simdlen
28402 :
28403 : #undef TARGET_SIMD_CLONE_ADJUST
28404 : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
28405 :
28406 : #undef TARGET_SIMD_CLONE_USABLE
28407 : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
28408 :
28409 : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
28410 : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
28411 :
28412 : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
28413 : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
28414 : ix86_float_exceptions_rounding_supported_p
28415 :
28416 : #undef TARGET_MODE_EMIT
28417 : #define TARGET_MODE_EMIT ix86_emit_mode_set
28418 :
28419 : #undef TARGET_MODE_NEEDED
28420 : #define TARGET_MODE_NEEDED ix86_mode_needed
28421 :
28422 : #undef TARGET_MODE_AFTER
28423 : #define TARGET_MODE_AFTER ix86_mode_after
28424 :
28425 : #undef TARGET_MODE_ENTRY
28426 : #define TARGET_MODE_ENTRY ix86_mode_entry
28427 :
28428 : #undef TARGET_MODE_EXIT
28429 : #define TARGET_MODE_EXIT ix86_mode_exit
28430 :
28431 : #undef TARGET_MODE_PRIORITY
28432 : #define TARGET_MODE_PRIORITY ix86_mode_priority
28433 :
28434 : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
28435 : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
28436 :
28437 : #undef TARGET_OFFLOAD_OPTIONS
28438 : #define TARGET_OFFLOAD_OPTIONS \
28439 : ix86_offload_options
28440 :
28441 : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
28442 : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
28443 :
28444 : #undef TARGET_OPTAB_SUPPORTED_P
28445 : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
28446 :
28447 : #undef TARGET_HARD_REGNO_SCRATCH_OK
28448 : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
28449 :
28450 : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
28451 : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
28452 :
28453 : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
28454 : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
28455 :
28456 : #undef TARGET_INIT_LIBFUNCS
28457 : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
28458 :
28459 : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
28460 : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
28461 :
28462 : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
28463 : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
28464 :
28465 : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
28466 : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
28467 :
28468 : #undef TARGET_HARD_REGNO_NREGS
28469 : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
28470 : #undef TARGET_HARD_REGNO_MODE_OK
28471 : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
28472 :
28473 : #undef TARGET_MODES_TIEABLE_P
28474 : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
28475 :
28476 : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
28477 : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
28478 : ix86_hard_regno_call_part_clobbered
28479 :
28480 : #undef TARGET_INSN_CALLEE_ABI
28481 : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
28482 :
28483 : #undef TARGET_CAN_CHANGE_MODE_CLASS
28484 : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
28485 :
28486 : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
28487 : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
28488 :
28489 : #undef TARGET_STATIC_RTX_ALIGNMENT
28490 : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
28491 : #undef TARGET_CONSTANT_ALIGNMENT
28492 : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
28493 :
28494 : #undef TARGET_EMPTY_RECORD_P
28495 : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
28496 :
28497 : #undef TARGET_WARN_PARAMETER_PASSING_ABI
28498 : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
28499 :
28500 : #undef TARGET_GET_MULTILIB_ABI_NAME
28501 : #define TARGET_GET_MULTILIB_ABI_NAME \
28502 : ix86_get_multilib_abi_name
28503 :
28504 : #undef TARGET_IFUNC_REF_LOCAL_OK
28505 : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
28506 :
28507 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
28508 : # undef TARGET_ASM_RELOC_RW_MASK
28509 : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
28510 : #endif
28511 :
28512 : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
28513 : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
28514 :
28515 : #undef TARGET_MEMTAG_ADD_TAG
28516 : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
28517 :
28518 : #undef TARGET_MEMTAG_SET_TAG
28519 : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
28520 :
28521 : #undef TARGET_MEMTAG_EXTRACT_TAG
28522 : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
28523 :
28524 : #undef TARGET_MEMTAG_UNTAGGED_POINTER
28525 : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
28526 :
28527 : #undef TARGET_MEMTAG_TAG_BITSIZE
28528 : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
28529 :
28530 : #undef TARGET_GEN_CCMP_FIRST
28531 : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
28532 :
28533 : #undef TARGET_GEN_CCMP_NEXT
28534 : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
28535 :
28536 : #undef TARGET_HAVE_CCMP
28537 : #define TARGET_HAVE_CCMP ix86_have_ccmp
28538 :
28539 : #undef TARGET_MODE_CAN_TRANSFER_BITS
28540 : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
28541 :
28542 : #undef TARGET_REDZONE_CLOBBER
28543 : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
28544 :
28545 : static bool
28546 94254 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
28547 : {
28548 : #ifdef OPTION_GLIBC
28549 94254 : if (OPTION_GLIBC)
28550 94254 : return (built_in_function)fcode == BUILT_IN_MEMPCPY;
28551 : else
28552 : return false;
28553 : #else
28554 : return false;
28555 : #endif
28556 : }
28557 :
28558 : #undef TARGET_LIBC_HAS_FAST_FUNCTION
28559 : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
28560 :
28561 : static unsigned
28562 78746 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
28563 : bool boundary_p)
28564 : {
28565 : #ifdef OPTION_GLIBC
28566 78746 : bool glibc_p = OPTION_GLIBC;
28567 : #else
28568 : bool glibc_p = false;
28569 : #endif
28570 78746 : if (glibc_p)
28571 : {
28572 : /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28573 78746 : unsigned int libmvec_ret = 0;
28574 78746 : if (!flag_trapping_math
28575 8296 : && flag_unsafe_math_optimizations
28576 3374 : && flag_finite_math_only
28577 3348 : && !flag_signed_zeros
28578 3348 : && !flag_errno_math)
28579 3348 : switch (cfn)
28580 : {
28581 1396 : CASE_CFN_COS:
28582 1396 : CASE_CFN_COS_FN:
28583 1396 : CASE_CFN_SIN:
28584 1396 : CASE_CFN_SIN_FN:
28585 1396 : if (!boundary_p)
28586 : {
28587 : /* With non-default rounding modes, libmvec provides
28588 : complete garbage in results. E.g.
28589 : _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28590 : returns 0.00333309174f rather than 1.40129846e-45f. */
28591 587 : if (flag_rounding_math)
28592 : return ~0U;
28593 : /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28594 : claims libmvec maximum error is 4ulps.
28595 : My own random testing indicates 2ulps for SFmode and
28596 : 0.5ulps for DFmode, but let's go with the 4ulps. */
28597 : libmvec_ret = 4;
28598 : }
28599 : break;
28600 : default:
28601 : break;
28602 : }
28603 78746 : unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28604 : boundary_p);
28605 78746 : return MAX (ret, libmvec_ret);
28606 : }
28607 0 : return default_libm_function_max_error (cfn, mode, boundary_p);
28608 : }
28609 :
28610 : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
28611 : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28612 :
28613 : #if TARGET_MACHO
28614 : static bool
28615 : ix86_cannot_copy_insn_p (rtx_insn *insn)
28616 : {
28617 : if (TARGET_64BIT)
28618 : return false;
28619 :
28620 : rtx set = single_set (insn);
28621 : if (set)
28622 : {
28623 : rtx src = SET_SRC (set);
28624 : if (GET_CODE (src) == UNSPEC
28625 : && XINT (src, 1) == UNSPEC_SET_GOT)
28626 : return true;
28627 : }
28628 : return false;
28629 : }
28630 :
28631 : #undef TARGET_CANNOT_COPY_INSN_P
28632 : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28633 :
28634 : #endif
28635 :
28636 : #if CHECKING_P
28637 : #undef TARGET_RUN_TARGET_SELFTESTS
28638 : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28639 : #endif /* #if CHECKING_P */
28640 :
28641 : #undef TARGET_DOCUMENTATION_NAME
28642 : #define TARGET_DOCUMENTATION_NAME "x86"
28643 :
28644 : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28645 : sbitmap
28646 735781 : ix86_get_separate_components (void)
28647 : {
28648 735781 : HOST_WIDE_INT offset, to_allocate;
28649 735781 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28650 735781 : bitmap_clear (components);
28651 735781 : struct machine_function *m = cfun->machine;
28652 :
28653 735781 : offset = m->frame.stack_pointer_offset;
28654 735781 : to_allocate = offset - m->frame.sse_reg_save_offset;
28655 :
28656 : /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
28657 : Experiments show that APX PPX can speed up the prologue. If the function
28658 : does not exit early during actual execution, then using APX PPX is faster.
28659 : If the function always exits early during actual execution, then shrink
28660 : wrap separate reduces the number of MOV (PUSH/POP) instructions actually
28661 : executed, thus speeding up execution.
28662 : foo:
28663 : movl $1, %eax
28664 : testq %rdi, %rdi
28665 : jne.L60
28666 : ret ---> early return.
28667 : .L60:
28668 : subq $88, %rsp ---> belong to prologue.
28669 : xorl %eax, %eax
28670 : movq %rbx, 40 (%rsp) ---> belong to prologue.
28671 : movq 8 (%rdi), %rbx
28672 : movq %rbp, 48 (%rsp) ---> belong to prologue.
28673 : movq %rdi, %rbp
28674 : testq %rbx, %rbx
28675 : jne.L61
28676 : movq 40 (%rsp), %rbx
28677 : movq 48 (%rsp), %rbp
28678 : addq $88, %rsp
28679 : ret
28680 : .L61:
28681 : movq %r12, 56 (%rsp) ---> belong to prologue.
28682 : movq %r13, 64 (%rsp) ---> belong to prologue.
28683 : movq %r14, 72 (%rsp) ---> belong to prologue.
28684 : ... ...
28685 :
28686 : Disable shrink wrap separate when PPX is enabled. */
28687 735781 : if ((TARGET_APX_PPX && !crtl->calls_eh_return)
28688 735316 : || cfun->machine->func_type != TYPE_NORMAL
28689 : || TARGET_SEH
28690 735218 : || crtl->stack_realign_needed
28691 725596 : || m->call_ms2sysv)
28692 : return components;
28693 :
28694 : /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
28695 : Disable shrink wrap separate when MOV is prohibited. */
28696 723674 : if (save_regs_using_push_pop (to_allocate))
28697 : return components;
28698 :
28699 32609055 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28700 32258420 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
28701 : {
28702 : /* Skip registers with large offsets, where a pseudo may be needed. */
28703 605503 : if (IN_RANGE (offset, -0x8000, 0x7fff))
28704 604430 : bitmap_set_bit (components, regno);
28705 650810 : offset += UNITS_PER_WORD;
28706 : }
28707 :
28708 : /* Don't mess with the following registers. */
28709 350635 : if (frame_pointer_needed)
28710 6306 : bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
28711 :
28712 350635 : if (crtl->drap_reg)
28713 128 : bitmap_clear_bit (components, REGNO (crtl->drap_reg));
28714 :
28715 350635 : if (pic_offset_table_rtx)
28716 29534 : bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
28717 :
28718 : return components;
28719 : }
28720 :
28721 : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
28722 : sbitmap
28723 9439949 : ix86_components_for_bb (basic_block bb)
28724 : {
28725 9439949 : bitmap in = DF_LIVE_IN (bb);
28726 9439949 : bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
28727 9439949 : bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
28728 :
28729 9439949 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28730 9439949 : bitmap_clear (components);
28731 :
28732 9439949 : function_abi_aggregator callee_abis;
28733 9439949 : rtx_insn *insn;
28734 110356657 : FOR_BB_INSNS (bb, insn)
28735 100916708 : if (CALL_P (insn))
28736 3054056 : callee_abis.note_callee_abi (insn_callee_abi (insn));
28737 9439949 : HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
28738 :
28739 : /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
28740 877915257 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28741 868475308 : if (!fixed_regs[regno]
28742 868475308 : && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
28743 441582573 : || bitmap_bit_p (in, regno)
28744 415733232 : || bitmap_bit_p (gen, regno)
28745 403127763 : || bitmap_bit_p (kill, regno)))
28746 38719748 : bitmap_set_bit (components, regno);
28747 :
28748 9439949 : return components;
28749 : }
28750 :
28751 : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
28752 : void
28753 488211 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
28754 : {
28755 : /* Nothing to do for x86. */
28756 488211 : }
28757 :
28758 : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
28759 : void
28760 175865 : ix86_emit_prologue_components (sbitmap components)
28761 : {
28762 175865 : HOST_WIDE_INT cfa_offset;
28763 175865 : struct machine_function *m = cfun->machine;
28764 :
28765 175865 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
28766 175865 : - m->frame.stack_pointer_offset;
28767 16355445 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28768 16179580 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
28769 : {
28770 796062 : if (bitmap_bit_p (components, regno))
28771 202417 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
28772 846032 : cfa_offset -= UNITS_PER_WORD;
28773 : }
28774 175865 : }
28775 :
28776 : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
28777 : void
28778 156089 : ix86_emit_epilogue_components (sbitmap components)
28779 : {
28780 156089 : HOST_WIDE_INT cfa_offset;
28781 156089 : struct machine_function *m = cfun->machine;
28782 156089 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
28783 156089 : - m->frame.stack_pointer_offset;
28784 :
28785 14516277 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28786 14360188 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
28787 : {
28788 709608 : if (bitmap_bit_p (components, regno))
28789 : {
28790 273612 : rtx reg = gen_rtx_REG (word_mode, regno);
28791 273612 : rtx mem;
28792 273612 : rtx_insn *insn;
28793 :
28794 273612 : mem = choose_baseaddr (cfa_offset, NULL);
28795 273612 : mem = gen_frame_mem (word_mode, mem);
28796 273612 : insn = emit_move_insn (reg, mem);
28797 :
28798 273612 : RTX_FRAME_RELATED_P (insn) = 1;
28799 273612 : add_reg_note (insn, REG_CFA_RESTORE, reg);
28800 : }
28801 766197 : cfa_offset -= UNITS_PER_WORD;
28802 : }
28803 156089 : }
28804 :
28805 : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
28806 : void
28807 45765 : ix86_set_handled_components (sbitmap components)
28808 : {
28809 4256145 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28810 4210380 : if (bitmap_bit_p (components, regno))
28811 : {
28812 107683 : cfun->machine->reg_is_wrapped_separately[regno] = true;
28813 107683 : cfun->machine->use_fast_prologue_epilogue = true;
28814 107683 : cfun->machine->frame.save_regs_using_mov = true;
28815 : }
28816 45765 : }
28817 :
28818 : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
28819 : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
28820 : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
28821 : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
28822 : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
28823 : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
28824 : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
28825 : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
28826 : ix86_emit_prologue_components
28827 : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
28828 : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
28829 : ix86_emit_epilogue_components
28830 : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
28831 : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
28832 :
28833 : struct gcc_target targetm = TARGET_INITIALIZER;
28834 :
28835 : #include "gt-i386.h"
|