Line data Source code
1 : /* Subroutines used for code generation on IA-32.
2 : Copyright (C) 1988-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #define INCLUDE_STRING
21 : #define IN_TARGET_CODE 1
22 :
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "memmodel.h"
30 : #include "gimple.h"
31 : #include "cfghooks.h"
32 : #include "cfgloop.h"
33 : #include "df.h"
34 : #include "tm_p.h"
35 : #include "stringpool.h"
36 : #include "expmed.h"
37 : #include "optabs.h"
38 : #include "regs.h"
39 : #include "emit-rtl.h"
40 : #include "recog.h"
41 : #include "cgraph.h"
42 : #include "diagnostic.h"
43 : #include "cfgbuild.h"
44 : #include "alias.h"
45 : #include "fold-const.h"
46 : #include "attribs.h"
47 : #include "calls.h"
48 : #include "stor-layout.h"
49 : #include "varasm.h"
50 : #include "output.h"
51 : #include "insn-attr.h"
52 : #include "flags.h"
53 : #include "except.h"
54 : #include "explow.h"
55 : #include "expr.h"
56 : #include "cfgrtl.h"
57 : #include "common/common-target.h"
58 : #include "langhooks.h"
59 : #include "reload.h"
60 : #include "gimplify.h"
61 : #include "dwarf2.h"
62 : #include "tm-constrs.h"
63 : #include "cselib.h"
64 : #include "sched-int.h"
65 : #include "opts.h"
66 : #include "tree-pass.h"
67 : #include "context.h"
68 : #include "pass_manager.h"
69 : #include "target-globals.h"
70 : #include "gimple-iterator.h"
71 : #include "gimple-fold.h"
72 : #include "tree-vectorizer.h"
73 : #include "shrink-wrap.h"
74 : #include "builtins.h"
75 : #include "rtl-iter.h"
76 : #include "tree-iterator.h"
77 : #include "dbgcnt.h"
78 : #include "case-cfn-macros.h"
79 : #include "dojump.h"
80 : #include "fold-const-call.h"
81 : #include "tree-vrp.h"
82 : #include "tree-ssanames.h"
83 : #include "selftest.h"
84 : #include "selftest-rtl.h"
85 : #include "print-rtl.h"
86 : #include "intl.h"
87 : #include "ifcvt.h"
88 : #include "symbol-summary.h"
89 : #include "sreal.h"
90 : #include "ipa-cp.h"
91 : #include "ipa-prop.h"
92 : #include "ipa-fnsummary.h"
93 : #include "wide-int-bitmask.h"
94 : #include "tree-vector-builder.h"
95 : #include "debug.h"
96 : #include "dwarf2out.h"
97 : #include "i386-options.h"
98 : #include "i386-builtins.h"
99 : #include "i386-expand.h"
100 : #include "i386-features.h"
101 : #include "function-abi.h"
102 : #include "rtl-error.h"
103 : #include "gimple-pretty-print.h"
104 :
105 : /* This file should be included last. */
106 : #include "target-def.h"
107 :
108 : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109 : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110 :
111 :
112 : #ifndef CHECK_STACK_LIMIT
113 : #define CHECK_STACK_LIMIT (-1)
114 : #endif
115 :
116 : /* Return index of given mode in mult and division cost tables. */
117 : #define MODE_INDEX(mode) \
118 : ((mode) == QImode ? 0 \
119 : : (mode) == HImode ? 1 \
120 : : (mode) == SImode ? 2 \
121 : : (mode) == DImode ? 3 \
122 : : 4)
123 :
124 :
125 : /* Set by -mtune. */
126 : const struct processor_costs *ix86_tune_cost = NULL;
127 :
128 : /* Set by -mtune or -Os. */
129 : const struct processor_costs *ix86_cost = NULL;
130 :
131 : /* In case the average insn count for single function invocation is
132 : lower than this constant, emit fast (but longer) prologue and
133 : epilogue code. */
134 : #define FAST_PROLOGUE_INSN_COUNT 20
135 :
136 : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137 : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138 : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139 : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140 :
141 : /* Array of the smallest class containing reg number REGNO, indexed by
142 : REGNO. Used by REGNO_REG_CLASS in i386.h. */
143 :
144 : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145 : {
146 : /* ax, dx, cx, bx */
147 : AREG, DREG, CREG, BREG,
148 : /* si, di, bp, sp */
149 : SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 : /* FP registers */
151 : FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 : FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 : /* arg pointer, flags, fpsr, frame */
154 : NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 : /* SSE registers */
156 : SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 : /* MMX registers */
159 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 : MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 : /* REX registers */
162 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 : /* SSE REX registers */
165 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 : SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 : /* AVX-512 SSE registers */
168 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 : ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 : /* Mask registers. */
173 : ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 : MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 : /* REX2 registers */
176 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 : GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 : };
181 :
182 : /* The "default" register map used in 32bit mode. */
183 :
184 : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185 : {
186 : /* general regs */
187 : 0, 2, 1, 3, 6, 7, 4, 5,
188 : /* fp regs */
189 : 12, 13, 14, 15, 16, 17, 18, 19,
190 : /* arg, flags, fpsr, frame */
191 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 : /* SSE */
194 : 21, 22, 23, 24, 25, 26, 27, 28,
195 : /* MMX */
196 : 29, 30, 31, 32, 33, 34, 35, 36,
197 : /* extended integer registers */
198 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 : /* extended sse registers */
201 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 : /* AVX-512 registers 16-23 */
204 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 : /* AVX-512 registers 24-31 */
207 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 : /* Mask registers */
210 : 93, 94, 95, 96, 97, 98, 99, 100
211 : };
212 :
213 : /* The "default" register map used in 64bit mode. */
214 :
215 : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216 : {
217 : /* general regs */
218 : 0, 1, 2, 3, 4, 5, 6, 7,
219 : /* fp regs */
220 : 33, 34, 35, 36, 37, 38, 39, 40,
221 : /* arg, flags, fpsr, frame */
222 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 : /* SSE */
225 : 17, 18, 19, 20, 21, 22, 23, 24,
226 : /* MMX */
227 : 41, 42, 43, 44, 45, 46, 47, 48,
228 : /* extended integer registers */
229 : 8, 9, 10, 11, 12, 13, 14, 15,
230 : /* extended SSE registers */
231 : 25, 26, 27, 28, 29, 30, 31, 32,
232 : /* AVX-512 registers 16-23 */
233 : 67, 68, 69, 70, 71, 72, 73, 74,
234 : /* AVX-512 registers 24-31 */
235 : 75, 76, 77, 78, 79, 80, 81, 82,
236 : /* Mask registers */
237 : 118, 119, 120, 121, 122, 123, 124, 125,
238 : /* rex2 extend interger registers */
239 : 130, 131, 132, 133, 134, 135, 136, 137,
240 : 138, 139, 140, 141, 142, 143, 144, 145
241 : };
242 :
243 : /* Define the register numbers to be used in Dwarf debugging information.
244 : The SVR4 reference port C compiler uses the following register numbers
245 : in its Dwarf output code:
246 : 0 for %eax (gcc regno = 0)
247 : 1 for %ecx (gcc regno = 2)
248 : 2 for %edx (gcc regno = 1)
249 : 3 for %ebx (gcc regno = 3)
250 : 4 for %esp (gcc regno = 7)
251 : 5 for %ebp (gcc regno = 6)
252 : 6 for %esi (gcc regno = 4)
253 : 7 for %edi (gcc regno = 5)
254 : The following three DWARF register numbers are never generated by
255 : the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 : believed these numbers have these meanings.
257 : 8 for %eip (no gcc equivalent)
258 : 9 for %eflags (gcc regno = 17)
259 : 10 for %trapno (no gcc equivalent)
260 : It is not at all clear how we should number the FP stack registers
261 : for the x86 architecture. If the version of SDB on x86/svr4 were
262 : a bit less brain dead with respect to floating-point then we would
263 : have a precedent to follow with respect to DWARF register numbers
264 : for x86 FP registers, but the SDB on x86/svr4 was so completely
265 : broken with respect to FP registers that it is hardly worth thinking
266 : of it as something to strive for compatibility with.
267 : The version of x86/svr4 SDB I had does (partially)
268 : seem to believe that DWARF register number 11 is associated with
269 : the x86 register %st(0), but that's about all. Higher DWARF
270 : register numbers don't seem to be associated with anything in
271 : particular, and even for DWARF regno 11, SDB only seemed to under-
272 : stand that it should say that a variable lives in %st(0) (when
273 : asked via an `=' command) if we said it was in DWARF regno 11,
274 : but SDB still printed garbage when asked for the value of the
275 : variable in question (via a `/' command).
276 : (Also note that the labels SDB printed for various FP stack regs
277 : when doing an `x' command were all wrong.)
278 : Note that these problems generally don't affect the native SVR4
279 : C compiler because it doesn't allow the use of -O with -g and
280 : because when it is *not* optimizing, it allocates a memory
281 : location for each floating-point variable, and the memory
282 : location is what gets described in the DWARF AT_location
283 : attribute for the variable in question.
284 : Regardless of the severe mental illness of the x86/svr4 SDB, we
285 : do something sensible here and we use the following DWARF
286 : register numbers. Note that these are all stack-top-relative
287 : numbers.
288 : 11 for %st(0) (gcc regno = 8)
289 : 12 for %st(1) (gcc regno = 9)
290 : 13 for %st(2) (gcc regno = 10)
291 : 14 for %st(3) (gcc regno = 11)
292 : 15 for %st(4) (gcc regno = 12)
293 : 16 for %st(5) (gcc regno = 13)
294 : 17 for %st(6) (gcc regno = 14)
295 : 18 for %st(7) (gcc regno = 15)
296 : */
297 : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298 : {
299 : /* general regs */
300 : 0, 2, 1, 3, 6, 7, 5, 4,
301 : /* fp regs */
302 : 11, 12, 13, 14, 15, 16, 17, 18,
303 : /* arg, flags, fpsr, frame */
304 : IGNORED_DWARF_REGNUM, 9,
305 : IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 : /* SSE registers */
307 : 21, 22, 23, 24, 25, 26, 27, 28,
308 : /* MMX registers */
309 : 29, 30, 31, 32, 33, 34, 35, 36,
310 : /* extended integer registers */
311 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 : /* extended sse registers */
314 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 : /* AVX-512 registers 16-23 */
317 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 : /* AVX-512 registers 24-31 */
320 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 : INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 : /* Mask registers */
323 : 93, 94, 95, 96, 97, 98, 99, 100
324 : };
325 :
326 : /* Define parameter passing and return registers. */
327 :
328 : static int const x86_64_int_parameter_registers[6] =
329 : {
330 : DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331 : };
332 :
333 : static int const x86_64_ms_abi_int_parameter_registers[4] =
334 : {
335 : CX_REG, DX_REG, R8_REG, R9_REG
336 : };
337 :
338 : /* Similar as Clang's preserve_none function parameter passing.
339 : NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p. */
340 :
341 : static int const x86_64_preserve_none_int_parameter_registers[6] =
342 : {
343 : R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
344 : };
345 :
346 : static int const x86_64_int_return_registers[2] =
347 : {
348 : AX_REG, DX_REG
349 : };
350 :
351 : /* Define the structure for the machine field in struct function. */
352 :
353 : struct GTY(()) stack_local_entry {
354 : unsigned short mode;
355 : unsigned short n;
356 : rtx rtl;
357 : struct stack_local_entry *next;
358 : };
359 :
360 : /* Which cpu are we scheduling for. */
361 : enum attr_cpu ix86_schedule;
362 :
363 : /* Which cpu are we optimizing for. */
364 : enum processor_type ix86_tune;
365 :
366 : /* Which instruction set architecture to use. */
367 : enum processor_type ix86_arch;
368 :
369 : /* True if processor has SSE prefetch instruction. */
370 : unsigned char ix86_prefetch_sse;
371 :
372 : /* Preferred alignment for stack boundary in bits. */
373 : unsigned int ix86_preferred_stack_boundary;
374 :
375 : /* Alignment for incoming stack boundary in bits specified at
376 : command line. */
377 : unsigned int ix86_user_incoming_stack_boundary;
378 :
379 : /* Default alignment for incoming stack boundary in bits. */
380 : unsigned int ix86_default_incoming_stack_boundary;
381 :
382 : /* Alignment for incoming stack boundary in bits. */
383 : unsigned int ix86_incoming_stack_boundary;
384 :
385 : /* True if there is no direct access to extern symbols. */
386 : bool ix86_has_no_direct_extern_access;
387 :
388 : /* Calling abi specific va_list type nodes. */
389 : tree sysv_va_list_type_node;
390 : tree ms_va_list_type_node;
391 :
392 : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
393 : char internal_label_prefix[16];
394 : int internal_label_prefix_len;
395 :
396 : /* Fence to use after loop using movnt. */
397 : tree x86_mfence;
398 :
399 : /* Register class used for passing given 64bit part of the argument.
400 : These represent classes as documented by the PS ABI, with the exception
401 : of SSESF, SSEDF classes, that are basically SSE class, just gcc will
402 : use SF or DFmode move instead of DImode to avoid reformatting penalties.
403 :
404 : Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
405 : whenever possible (upper half does contain padding). */
406 : enum x86_64_reg_class
407 : {
408 : X86_64_NO_CLASS,
409 : X86_64_INTEGER_CLASS,
410 : X86_64_INTEGERSI_CLASS,
411 : X86_64_SSE_CLASS,
412 : X86_64_SSEHF_CLASS,
413 : X86_64_SSESF_CLASS,
414 : X86_64_SSEDF_CLASS,
415 : X86_64_SSEUP_CLASS,
416 : X86_64_X87_CLASS,
417 : X86_64_X87UP_CLASS,
418 : X86_64_COMPLEX_X87_CLASS,
419 : X86_64_MEMORY_CLASS
420 : };
421 :
422 : #define MAX_CLASSES 8
423 :
424 : /* Table of constants used by fldpi, fldln2, etc.... */
425 : static REAL_VALUE_TYPE ext_80387_constants_table [5];
426 : static bool ext_80387_constants_init;
427 :
428 :
429 : static rtx ix86_function_value (const_tree, const_tree, bool);
430 : static bool ix86_function_value_regno_p (const unsigned int);
431 : static unsigned int ix86_function_arg_boundary (machine_mode,
432 : const_tree);
433 : static bool ix86_overaligned_stack_slot_required (void);
434 : static rtx ix86_static_chain (const_tree, bool);
435 : static int ix86_function_regparm (const_tree, const_tree);
436 : static void ix86_compute_frame_layout (void);
437 : static tree ix86_canonical_va_list_type (tree);
438 : static unsigned int split_stack_prologue_scratch_regno (void);
439 : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
440 :
441 : static bool ix86_can_inline_p (tree, tree);
442 : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
443 :
444 : typedef enum ix86_flags_cc
445 : {
446 : X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
447 : X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
448 : X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
449 : X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
450 : } ix86_cc;
451 :
452 : static const char *ix86_ccmp_dfv_mapping[] =
453 : {
454 : "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
455 : "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
456 : "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
457 : "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
458 : };
459 :
460 :
461 : /* Whether -mtune= or -march= were specified */
462 : int ix86_tune_defaulted;
463 : int ix86_arch_specified;
464 :
465 : /* Return true if a red-zone is in use. We can't use red-zone when
466 : there are local indirect jumps, like "indirect_jump" or "tablejump",
467 : which jumps to another place in the function, since "call" in the
468 : indirect thunk pushes the return address onto stack, destroying
469 : red-zone.
470 :
471 : NB: Don't use red-zone for functions with no_caller_saved_registers
472 : and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
473 : for 31 GPRs or 15 GPRs + 16 XMM registers.
474 :
475 : TODO: If we can reserve the first 2 WORDs, for PUSH and, another
476 : for CALL, in red-zone, we can allow local indirect jumps with
477 : indirect thunk. */
478 :
479 : bool
480 9898619 : ix86_using_red_zone (void)
481 : {
482 9898619 : return (TARGET_RED_ZONE
483 8956449 : && !TARGET_64BIT_MS_ABI
484 8653740 : && ((!TARGET_APX_EGPR && !TARGET_SSE)
485 8630735 : || (cfun->machine->call_saved_registers
486 8630735 : != TYPE_NO_CALLER_SAVED_REGISTERS))
487 18552298 : && (!cfun->machine->has_local_indirect_jump
488 47510 : || cfun->machine->indirect_branch_type == indirect_branch_keep));
489 : }
490 :
491 : /* Return true, if profiling code should be emitted before
492 : prologue. Otherwise it returns false.
493 : Note: For x86 with "hotfix" it is sorried. */
494 : static bool
495 4496363 : ix86_profile_before_prologue (void)
496 : {
497 4496363 : return flag_fentry != 0;
498 : }
499 :
500 : /* Update register usage after having seen the compiler flags. */
501 :
502 : static void
503 829760 : ix86_conditional_register_usage (void)
504 : {
505 829760 : int i, c_mask;
506 :
507 : /* If there are no caller-saved registers, preserve all registers.
508 : except fixed_regs and registers used for function return value
509 : since aggregate_value_p checks call_used_regs[regno] on return
510 : value. */
511 829760 : if (cfun
512 67800 : && (cfun->machine->call_saved_registers
513 67800 : == TYPE_NO_CALLER_SAVED_REGISTERS))
514 462489 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
515 457516 : if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
516 422283 : call_used_regs[i] = 0;
517 :
518 : /* For 32-bit targets, disable the REX registers. */
519 829760 : if (! TARGET_64BIT)
520 : {
521 134550 : for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
522 119600 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
523 134550 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
524 119600 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
525 254150 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
526 239200 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
527 : }
528 :
529 : /* See the definition of CALL_USED_REGISTERS in i386.h. */
530 829760 : c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
531 :
532 829760 : CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
533 :
534 77167680 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
535 : {
536 : /* Set/reset conditionally defined registers from
537 : CALL_USED_REGISTERS initializer. */
538 76337920 : if (call_used_regs[i] > 1)
539 13196537 : call_used_regs[i] = !!(call_used_regs[i] & c_mask);
540 :
541 : /* Calculate registers of CLOBBERED_REGS register set
542 : as call used registers from GENERAL_REGS register set. */
543 76337920 : if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
544 76337920 : && call_used_regs[i])
545 23107545 : SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
546 : }
547 :
548 : /* If MMX is disabled, disable the registers. */
549 829760 : if (! TARGET_MMX)
550 400526 : accessible_reg_set &= ~reg_class_contents[MMX_REGS];
551 :
552 : /* If SSE is disabled, disable the registers. */
553 829760 : if (! TARGET_SSE)
554 394552 : accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
555 :
556 : /* If the FPU is disabled, disable the registers. */
557 829760 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
558 395772 : accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
559 :
560 : /* If AVX512F is disabled, disable the registers. */
561 829760 : if (! TARGET_AVX512F)
562 : {
563 10014020 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
564 9424960 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
565 :
566 1178120 : accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
567 : }
568 :
569 : /* If APX is disabled, disable the registers. */
570 829760 : if (! (TARGET_APX_EGPR && TARGET_64BIT))
571 : {
572 14094530 : for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
573 13265440 : CLEAR_HARD_REG_BIT (accessible_reg_set, i);
574 : }
575 829760 : }
576 :
577 : /* Canonicalize a comparison from one we don't have to one we do have. */
578 :
579 : static void
580 24135710 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
581 : bool op0_preserve_value)
582 : {
583 : /* The order of operands in x87 ficom compare is forced by combine in
584 : simplify_comparison () function. Float operator is treated as RTX_OBJ
585 : with a precedence over other operators and is always put in the first
586 : place. Swap condition and operands to match ficom instruction. */
587 24135710 : if (!op0_preserve_value
588 23331142 : && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
589 : {
590 14 : enum rtx_code scode = swap_condition ((enum rtx_code) *code);
591 :
592 : /* We are called only for compares that are split to SAHF instruction.
593 : Ensure that we have setcc/jcc insn for the swapped condition. */
594 14 : if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
595 : {
596 6 : std::swap (*op0, *op1);
597 6 : *code = (int) scode;
598 6 : return;
599 : }
600 : }
601 :
602 : /* SUB (a, b) underflows precisely when a < b. Convert
603 : (compare (minus (a b)) a) to (compare (a b))
604 : to match *sub<mode>_3 pattern. */
605 23331136 : if (!op0_preserve_value
606 23331136 : && (*code == GTU || *code == LEU)
607 1809527 : && GET_CODE (*op0) == MINUS
608 80494 : && rtx_equal_p (XEXP (*op0, 0), *op1))
609 : {
610 488 : *op1 = XEXP (*op0, 1);
611 488 : *op0 = XEXP (*op0, 0);
612 488 : *code = (int) swap_condition ((enum rtx_code) *code);
613 488 : return;
614 : }
615 :
616 : /* Swap operands of GTU comparison to canonicalize
617 : addcarry/subborrow comparison. */
618 24135216 : if (!op0_preserve_value
619 23330648 : && *code == GTU
620 825511 : && GET_CODE (*op0) == PLUS
621 321189 : && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
622 44232 : && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
623 24175346 : && GET_CODE (*op1) == ZERO_EXTEND)
624 : {
625 36826 : std::swap (*op0, *op1);
626 36826 : *code = (int) swap_condition ((enum rtx_code) *code);
627 36826 : return;
628 : }
629 : }
630 :
631 : /* Hook to determine if one function can safely inline another. */
632 :
633 : static bool
634 9873628 : ix86_can_inline_p (tree caller, tree callee)
635 : {
636 9873628 : tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
637 9873628 : tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
638 :
639 : /* Changes of those flags can be tolerated for always inlines. Lets hope
640 : user knows what he is doing. */
641 9873628 : unsigned HOST_WIDE_INT always_inline_safe_mask
642 : = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
643 : | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
644 : | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
645 : | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
646 : | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
647 : | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
648 : | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
649 :
650 :
651 9873628 : if (!callee_tree)
652 9276553 : callee_tree = target_option_default_node;
653 9873628 : if (!caller_tree)
654 9276618 : caller_tree = target_option_default_node;
655 9873628 : if (callee_tree == caller_tree)
656 : return true;
657 :
658 5292 : struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
659 5292 : struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
660 5292 : bool ret = false;
661 5292 : bool always_inline
662 5292 : = (DECL_DISREGARD_INLINE_LIMITS (callee)
663 9939 : && lookup_attribute ("always_inline",
664 4647 : DECL_ATTRIBUTES (callee)));
665 :
666 : /* If callee only uses GPRs, ignore MASK_80387. */
667 5292 : if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
668 1030 : always_inline_safe_mask |= MASK_80387;
669 :
670 5292 : cgraph_node *callee_node = cgraph_node::get (callee);
671 : /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
672 : function can inline a SSE2 function but a SSE2 function can't inline
673 : a SSE4 function. */
674 5292 : if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
675 : != callee_opts->x_ix86_isa_flags)
676 5056 : || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
677 : != callee_opts->x_ix86_isa_flags2))
678 : ret = false;
679 :
680 : /* See if we have the same non-isa options. */
681 5019 : else if ((!always_inline
682 388 : && caller_opts->x_target_flags != callee_opts->x_target_flags)
683 4975 : || (caller_opts->x_target_flags & ~always_inline_safe_mask)
684 4975 : != (callee_opts->x_target_flags & ~always_inline_safe_mask))
685 : ret = false;
686 :
687 4975 : else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
688 : /* If the calle doesn't use FP expressions differences in
689 : ix86_fpmath can be ignored. We are called from FEs
690 : for multi-versioning call optimization, so beware of
691 : ipa_fn_summaries not available. */
692 1247 : && (! ipa_fn_summaries
693 1247 : || ipa_fn_summaries->get (callee_node) == NULL
694 1247 : || ipa_fn_summaries->get (callee_node)->fp_expressions))
695 : ret = false;
696 :
697 : /* At this point we cannot identify whether arch or tune setting
698 : comes from target attribute or not. So the most conservative way
699 : is to allow the callee that uses default arch and tune string to
700 : be inlined. */
701 4701 : else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
702 1430 : && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
703 : ret = true;
704 :
705 : /* See if arch, tune, etc. are the same. As previous ISA flags already
706 : checks if callee's ISA is subset of caller's, do not block
707 : always_inline attribute for callee even it has different arch. */
708 3279 : else if (!always_inline && caller_opts->arch != callee_opts->arch)
709 : ret = false;
710 :
711 15 : else if (!always_inline && caller_opts->tune != callee_opts->tune)
712 : ret = false;
713 :
714 3279 : else if (!always_inline
715 15 : && caller_opts->branch_cost != callee_opts->branch_cost)
716 : ret = false;
717 :
718 : else
719 9873037 : ret = true;
720 :
721 : return ret;
722 : }
723 :
724 : /* Return true if this goes in large data/bss. */
725 :
726 : static bool
727 80801768 : ix86_in_large_data_p (tree exp)
728 : {
729 80801768 : if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
730 80801530 : && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
731 : return false;
732 :
733 1147 : if (exp == NULL_TREE)
734 : return false;
735 :
736 : /* Functions are never large data. */
737 1147 : if (TREE_CODE (exp) == FUNCTION_DECL)
738 : return false;
739 :
740 : /* Automatic variables are never large data. */
741 279 : if (VAR_P (exp) && !is_global_var (exp))
742 : return false;
743 :
744 279 : if (VAR_P (exp) && DECL_SECTION_NAME (exp))
745 : {
746 51 : const char *section = DECL_SECTION_NAME (exp);
747 51 : if (strcmp (section, ".ldata") == 0
748 51 : || startswith (section, ".ldata.")
749 51 : || strcmp (section, ".lbss") == 0
750 51 : || startswith (section, ".lbss.")
751 99 : || startswith (section, ".gnu.linkonce.lb."))
752 : return true;
753 : return false;
754 : }
755 : else
756 : {
757 228 : HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
758 :
759 : /* If this is an incomplete type with size 0, then we can't put it
760 : in data because it might be too big when completed. Also,
761 : int_size_in_bytes returns -1 if size can vary or is larger than
762 : an integer in which case also it is safer to assume that it goes in
763 : large data. */
764 228 : if (size <= 0 || size > ix86_section_threshold)
765 : return true;
766 : }
767 :
768 : return false;
769 : }
770 :
771 : /* i386-specific section flag to mark large sections. */
772 : #define SECTION_LARGE SECTION_MACH_DEP
773 :
774 : /* Switch to the appropriate section for output of DECL.
775 : DECL is either a `VAR_DECL' node or a constant of some sort.
776 : RELOC indicates whether forming the initial value of DECL requires
777 : link-time relocations. */
778 :
779 : ATTRIBUTE_UNUSED static section *
780 1663816 : x86_64_elf_select_section (tree decl, int reloc,
781 : unsigned HOST_WIDE_INT align)
782 : {
783 1663816 : if (ix86_in_large_data_p (decl))
784 : {
785 6 : const char *sname = NULL;
786 6 : unsigned int flags = SECTION_WRITE | SECTION_LARGE;
787 6 : switch (categorize_decl_for_section (decl, reloc))
788 : {
789 1 : case SECCAT_DATA:
790 1 : sname = ".ldata";
791 1 : break;
792 0 : case SECCAT_DATA_REL:
793 0 : sname = ".ldata.rel";
794 0 : break;
795 0 : case SECCAT_DATA_REL_LOCAL:
796 0 : sname = ".ldata.rel.local";
797 0 : break;
798 0 : case SECCAT_DATA_REL_RO:
799 0 : sname = ".ldata.rel.ro";
800 0 : break;
801 0 : case SECCAT_DATA_REL_RO_LOCAL:
802 0 : sname = ".ldata.rel.ro.local";
803 0 : break;
804 0 : case SECCAT_BSS:
805 0 : sname = ".lbss";
806 0 : flags |= SECTION_BSS;
807 0 : break;
808 : case SECCAT_RODATA:
809 : case SECCAT_RODATA_MERGE_STR:
810 : case SECCAT_RODATA_MERGE_STR_INIT:
811 : case SECCAT_RODATA_MERGE_CONST:
812 : sname = ".lrodata";
813 : flags &= ~SECTION_WRITE;
814 : break;
815 0 : case SECCAT_SRODATA:
816 0 : case SECCAT_SDATA:
817 0 : case SECCAT_SBSS:
818 0 : gcc_unreachable ();
819 : case SECCAT_TEXT:
820 : case SECCAT_TDATA:
821 : case SECCAT_TBSS:
822 : /* We don't split these for medium model. Place them into
823 : default sections and hope for best. */
824 : break;
825 : }
826 1 : if (sname)
827 : {
828 : /* We might get called with string constants, but get_named_section
829 : doesn't like them as they are not DECLs. Also, we need to set
830 : flags in that case. */
831 6 : if (!DECL_P (decl))
832 3 : return get_section (sname, flags, NULL);
833 3 : return get_named_section (decl, sname, reloc);
834 : }
835 : }
836 1663810 : return default_elf_select_section (decl, reloc, align);
837 : }
838 :
839 : /* Select a set of attributes for section NAME based on the properties
840 : of DECL and whether or not RELOC indicates that DECL's initializer
841 : might contain runtime relocations. */
842 :
843 : static unsigned int ATTRIBUTE_UNUSED
844 67413942 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
845 : {
846 67413942 : unsigned int flags = default_section_type_flags (decl, name, reloc);
847 :
848 67413942 : if (ix86_in_large_data_p (decl))
849 10 : flags |= SECTION_LARGE;
850 :
851 67413942 : if (decl == NULL_TREE
852 375 : && (strcmp (name, ".ldata.rel.ro") == 0
853 375 : || strcmp (name, ".ldata.rel.ro.local") == 0))
854 0 : flags |= SECTION_RELRO;
855 :
856 67413942 : if (strcmp (name, ".lbss") == 0
857 67413938 : || startswith (name, ".lbss.")
858 134827877 : || startswith (name, ".gnu.linkonce.lb."))
859 : {
860 7 : flags |= SECTION_BSS;
861 : /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
862 : default_elf_asm_named_section. */
863 7 : flags &= ~SECTION_NOTYPE;
864 : }
865 :
866 67413942 : return flags;
867 : }
868 :
869 : /* Build up a unique section name, expressed as a
870 : STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
871 : RELOC indicates whether the initial value of EXP requires
872 : link-time relocations. */
873 :
874 : static void ATTRIBUTE_UNUSED
875 1804869 : x86_64_elf_unique_section (tree decl, int reloc)
876 : {
877 1804869 : if (ix86_in_large_data_p (decl))
878 : {
879 3 : const char *prefix = NULL;
880 : /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
881 3 : bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
882 :
883 3 : switch (categorize_decl_for_section (decl, reloc))
884 : {
885 0 : case SECCAT_DATA:
886 0 : case SECCAT_DATA_REL:
887 0 : case SECCAT_DATA_REL_LOCAL:
888 0 : case SECCAT_DATA_REL_RO:
889 0 : case SECCAT_DATA_REL_RO_LOCAL:
890 0 : prefix = one_only ? ".ld" : ".ldata";
891 : break;
892 3 : case SECCAT_BSS:
893 3 : prefix = one_only ? ".lb" : ".lbss";
894 : break;
895 : case SECCAT_RODATA:
896 : case SECCAT_RODATA_MERGE_STR:
897 : case SECCAT_RODATA_MERGE_STR_INIT:
898 : case SECCAT_RODATA_MERGE_CONST:
899 : prefix = one_only ? ".lr" : ".lrodata";
900 : break;
901 0 : case SECCAT_SRODATA:
902 0 : case SECCAT_SDATA:
903 0 : case SECCAT_SBSS:
904 0 : gcc_unreachable ();
905 : case SECCAT_TEXT:
906 : case SECCAT_TDATA:
907 : case SECCAT_TBSS:
908 : /* We don't split these for medium model. Place them into
909 : default sections and hope for best. */
910 : break;
911 : }
912 3 : if (prefix)
913 : {
914 3 : const char *name, *linkonce;
915 3 : char *string;
916 :
917 3 : name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
918 3 : name = targetm.strip_name_encoding (name);
919 :
920 : /* If we're using one_only, then there needs to be a .gnu.linkonce
921 : prefix to the section name. */
922 3 : linkonce = one_only ? ".gnu.linkonce" : "";
923 :
924 3 : string = ACONCAT ((linkonce, prefix, ".", name, NULL));
925 :
926 3 : set_decl_section_name (decl, string);
927 3 : return;
928 : }
929 : }
930 1804866 : default_unique_section (decl, reloc);
931 : }
932 :
933 : /* Return true if TYPE has no_callee_saved_registers or preserve_none
934 : attribute. */
935 :
936 : bool
937 7541389 : ix86_type_no_callee_saved_registers_p (const_tree type)
938 : {
939 15082778 : return (lookup_attribute ("no_callee_saved_registers",
940 7541389 : TYPE_ATTRIBUTES (type)) != NULL
941 15082645 : || lookup_attribute ("preserve_none",
942 7541256 : TYPE_ATTRIBUTES (type)) != NULL);
943 : }
944 :
945 : #ifdef COMMON_ASM_OP
946 :
947 : #ifndef LARGECOMM_SECTION_ASM_OP
948 : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
949 : #endif
950 :
951 : /* This says how to output assembler code to declare an
952 : uninitialized external linkage data object.
953 :
954 : For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
955 : large objects. */
956 : void
957 170516 : x86_elf_aligned_decl_common (FILE *file, tree decl,
958 : const char *name, unsigned HOST_WIDE_INT size,
959 : unsigned align)
960 : {
961 170516 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
962 170510 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
963 7 : && size > (unsigned int)ix86_section_threshold)
964 : {
965 1 : switch_to_section (get_named_section (decl, ".lbss", 0));
966 1 : fputs (LARGECOMM_SECTION_ASM_OP, file);
967 : }
968 : else
969 170515 : fputs (COMMON_ASM_OP, file);
970 170516 : assemble_name (file, name);
971 170516 : fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
972 : size, align / BITS_PER_UNIT);
973 170516 : }
974 : #endif
975 :
976 : /* Utility function for targets to use in implementing
977 : ASM_OUTPUT_ALIGNED_BSS. */
978 :
979 : void
980 768255 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
981 : unsigned HOST_WIDE_INT size, unsigned align)
982 : {
983 768255 : if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
984 768245 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
985 42 : && size > (unsigned int)ix86_section_threshold)
986 3 : switch_to_section (get_named_section (decl, ".lbss", 0));
987 : else
988 768252 : switch_to_section (bss_section);
989 922667 : ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
990 : #ifdef ASM_DECLARE_OBJECT_NAME
991 768255 : last_assemble_variable_decl = decl;
992 768255 : ASM_DECLARE_OBJECT_NAME (file, name, decl);
993 : #else
994 : /* Standard thing is just output label for the object. */
995 : ASM_OUTPUT_LABEL (file, name);
996 : #endif /* ASM_DECLARE_OBJECT_NAME */
997 768255 : ASM_OUTPUT_SKIP (file, size ? size : 1);
998 768255 : }
999 :
1000 : /* Decide whether we must probe the stack before any space allocation
1001 : on this target. It's essentially TARGET_STACK_PROBE except when
1002 : -fstack-check causes the stack to be already probed differently. */
1003 :
1004 : bool
1005 870491 : ix86_target_stack_probe (void)
1006 : {
1007 : /* Do not probe the stack twice if static stack checking is enabled. */
1008 870491 : if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
1009 : return false;
1010 :
1011 870491 : return TARGET_STACK_PROBE;
1012 : }
1013 :
1014 : /* Decide whether we can make a sibling call to a function. DECL is the
1015 : declaration of the function being targeted by the call and EXP is the
1016 : CALL_EXPR representing the call. */
1017 :
1018 : static bool
1019 138021 : ix86_function_ok_for_sibcall (tree decl, tree exp)
1020 : {
1021 138021 : tree type, decl_or_type;
1022 138021 : rtx a, b;
1023 138021 : bool bind_global = decl && !targetm.binds_local_p (decl);
1024 :
1025 138021 : if (ix86_function_naked (current_function_decl))
1026 : return false;
1027 :
1028 : /* Sibling call isn't OK if there are no caller-saved registers
1029 : since all registers must be preserved before return. */
1030 138019 : if (cfun->machine->call_saved_registers
1031 138019 : == TYPE_NO_CALLER_SAVED_REGISTERS)
1032 : return false;
1033 :
1034 : /* If we are generating position-independent code, we cannot sibcall
1035 : optimize direct calls to global functions, as the PLT requires
1036 : %ebx be live. (Darwin does not have a PLT.) */
1037 137990 : if (!TARGET_MACHO
1038 137990 : && !TARGET_64BIT
1039 11328 : && flag_pic
1040 8402 : && flag_plt
1041 8402 : && bind_global)
1042 : return false;
1043 :
1044 : /* If we need to align the outgoing stack, then sibcalling would
1045 : unalign the stack, which may break the called function. */
1046 133347 : if (ix86_minimum_incoming_stack_boundary (true)
1047 133347 : < PREFERRED_STACK_BOUNDARY)
1048 : return false;
1049 :
1050 132766 : if (decl)
1051 : {
1052 121915 : decl_or_type = decl;
1053 121915 : type = TREE_TYPE (decl);
1054 : }
1055 : else
1056 : {
1057 : /* We're looking at the CALL_EXPR, we need the type of the function. */
1058 10851 : type = CALL_EXPR_FN (exp); /* pointer expression */
1059 10851 : type = TREE_TYPE (type); /* pointer type */
1060 10851 : type = TREE_TYPE (type); /* function type */
1061 10851 : decl_or_type = type;
1062 : }
1063 :
1064 : /* Sibling call isn't OK if callee has no callee-saved registers
1065 : and the calling function has callee-saved registers. */
1066 132766 : if ((cfun->machine->call_saved_registers
1067 132766 : != TYPE_NO_CALLEE_SAVED_REGISTERS)
1068 132766 : && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
1069 132766 : && ix86_type_no_callee_saved_registers_p (type))
1070 : return false;
1071 :
1072 : /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1073 132750 : if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1074 132750 : != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1075 264754 : || (REG_PARM_STACK_SPACE (decl_or_type)
1076 132004 : != REG_PARM_STACK_SPACE (current_function_decl)))
1077 : {
1078 746 : maybe_complain_about_tail_call (exp,
1079 : "inconsistent size of stack space"
1080 : " allocated for arguments which are"
1081 : " passed in registers");
1082 746 : return false;
1083 : }
1084 :
1085 : /* Check that the return value locations are the same. Like
1086 : if we are returning floats on the 80387 register stack, we cannot
1087 : make a sibcall from a function that doesn't return a float to a
1088 : function that does or, conversely, from a function that does return
1089 : a float to a function that doesn't; the necessary stack adjustment
1090 : would not be executed. This is also the place we notice
1091 : differences in the return value ABI. Note that it is ok for one
1092 : of the functions to have void return type as long as the return
1093 : value of the other is passed in a register. */
1094 132004 : a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1095 132004 : b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1096 132004 : cfun->decl, false);
1097 132004 : if (STACK_REG_P (a) || STACK_REG_P (b))
1098 : {
1099 1020 : if (!rtx_equal_p (a, b))
1100 : return false;
1101 : }
1102 130984 : else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1103 : ;
1104 24466 : else if (!rtx_equal_p (a, b))
1105 : return false;
1106 :
1107 131623 : if (TARGET_64BIT)
1108 : {
1109 : /* The SYSV ABI has more call-clobbered registers;
1110 : disallow sibcalls from MS to SYSV. */
1111 124938 : if (cfun->machine->call_abi == MS_ABI
1112 124938 : && ix86_function_type_abi (type) == SYSV_ABI)
1113 : return false;
1114 : }
1115 : else
1116 : {
1117 : /* If this call is indirect, we'll need to be able to use a
1118 : call-clobbered register for the address of the target function.
1119 : Make sure that all such registers are not used for passing
1120 : parameters. Note that DLLIMPORT functions and call to global
1121 : function via GOT slot are indirect. */
1122 6685 : if (!decl
1123 4771 : || (bind_global && flag_pic && !flag_plt)
1124 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1125 4771 : || flag_force_indirect_call)
1126 : {
1127 : /* Check if regparm >= 3 since arg_reg_available is set to
1128 : false if regparm == 0. If regparm is 1 or 2, there is
1129 : always a call-clobbered register available.
1130 :
1131 : ??? The symbol indirect call doesn't need a call-clobbered
1132 : register. But we don't know if this is a symbol indirect
1133 : call or not here. */
1134 1914 : if (ix86_function_regparm (type, decl) >= 3
1135 1914 : && !cfun->machine->arg_reg_available)
1136 : return false;
1137 : }
1138 : }
1139 :
1140 131623 : if (decl && ix86_use_pseudo_pic_reg ())
1141 : {
1142 : /* When PIC register is used, it must be restored after ifunc
1143 : function returns. */
1144 2060 : cgraph_node *node = cgraph_node::get (decl);
1145 2060 : if (node && node->ifunc_resolver)
1146 : return false;
1147 : }
1148 :
1149 : /* Disable sibcall if callee has indirect_return attribute and
1150 : caller doesn't since callee will return to the caller's caller
1151 : via an indirect jump. */
1152 131623 : if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1153 : == (CF_RETURN | CF_BRANCH))
1154 52642 : && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1155 131627 : && !lookup_attribute ("indirect_return",
1156 4 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1157 : return false;
1158 :
1159 : /* Otherwise okay. That also includes certain types of indirect calls. */
1160 : return true;
1161 : }
1162 :
1163 : /* This function determines from TYPE the calling-convention. */
1164 :
1165 : unsigned int
1166 6201143 : ix86_get_callcvt (const_tree type)
1167 : {
1168 6201143 : unsigned int ret = 0;
1169 6201143 : bool is_stdarg;
1170 6201143 : tree attrs;
1171 :
1172 6201143 : if (TARGET_64BIT)
1173 : return IX86_CALLCVT_CDECL;
1174 :
1175 3270149 : attrs = TYPE_ATTRIBUTES (type);
1176 3270149 : if (attrs != NULL_TREE)
1177 : {
1178 67401 : if (lookup_attribute ("cdecl", attrs))
1179 : ret |= IX86_CALLCVT_CDECL;
1180 67401 : else if (lookup_attribute ("stdcall", attrs))
1181 : ret |= IX86_CALLCVT_STDCALL;
1182 67401 : else if (lookup_attribute ("fastcall", attrs))
1183 : ret |= IX86_CALLCVT_FASTCALL;
1184 67392 : else if (lookup_attribute ("thiscall", attrs))
1185 : ret |= IX86_CALLCVT_THISCALL;
1186 :
1187 : /* Regparm isn't allowed for thiscall and fastcall. */
1188 : if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1189 : {
1190 67392 : if (lookup_attribute ("regparm", attrs))
1191 15828 : ret |= IX86_CALLCVT_REGPARM;
1192 67392 : if (lookup_attribute ("sseregparm", attrs))
1193 0 : ret |= IX86_CALLCVT_SSEREGPARM;
1194 : }
1195 :
1196 67401 : if (IX86_BASE_CALLCVT(ret) != 0)
1197 9 : return ret;
1198 : }
1199 :
1200 3270140 : is_stdarg = stdarg_p (type);
1201 3270140 : if (TARGET_RTD && !is_stdarg)
1202 0 : return IX86_CALLCVT_STDCALL | ret;
1203 :
1204 3270140 : if (ret != 0
1205 3270140 : || is_stdarg
1206 3245292 : || TREE_CODE (type) != METHOD_TYPE
1207 3406719 : || ix86_function_type_abi (type) != MS_ABI)
1208 3270140 : return IX86_CALLCVT_CDECL | ret;
1209 :
1210 : return IX86_CALLCVT_THISCALL;
1211 : }
1212 :
1213 : /* Return 0 if the attributes for two types are incompatible, 1 if they
1214 : are compatible, and 2 if they are nearly compatible (which causes a
1215 : warning to be generated). */
1216 :
1217 : static int
1218 1483196 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
1219 : {
1220 1483196 : unsigned int ccvt1, ccvt2;
1221 :
1222 1483196 : if (TREE_CODE (type1) != FUNCTION_TYPE
1223 1483196 : && TREE_CODE (type1) != METHOD_TYPE)
1224 : return 1;
1225 :
1226 1476621 : ccvt1 = ix86_get_callcvt (type1);
1227 1476621 : ccvt2 = ix86_get_callcvt (type2);
1228 1476621 : if (ccvt1 != ccvt2)
1229 : return 0;
1230 2931114 : if (ix86_function_regparm (type1, NULL)
1231 1465557 : != ix86_function_regparm (type2, NULL))
1232 : return 0;
1233 :
1234 1427762 : if (ix86_type_no_callee_saved_registers_p (type1)
1235 713881 : != ix86_type_no_callee_saved_registers_p (type2))
1236 : return 0;
1237 :
1238 : /* preserve_none attribute uses a different calling convention is
1239 : only for 64-bit. */
1240 713753 : if (TARGET_64BIT
1241 1427446 : && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
1242 713693 : != lookup_attribute ("preserve_none",
1243 713693 : TYPE_ATTRIBUTES (type2))))
1244 : return 0;
1245 :
1246 : return 1;
1247 : }
1248 :
1249 : /* Return the regparm value for a function with the indicated TYPE and DECL.
1250 : DECL may be NULL when calling function indirectly
1251 : or considering a libcall. */
1252 :
1253 : static int
1254 4202769 : ix86_function_regparm (const_tree type, const_tree decl)
1255 : {
1256 4202769 : tree attr;
1257 4202769 : int regparm;
1258 4202769 : unsigned int ccvt;
1259 :
1260 4202769 : if (TARGET_64BIT)
1261 2930994 : return (ix86_function_type_abi (type) == SYSV_ABI
1262 2930994 : ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1263 1271775 : ccvt = ix86_get_callcvt (type);
1264 1271775 : regparm = ix86_regparm;
1265 :
1266 1271775 : if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1267 : {
1268 2020 : attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1269 2020 : if (attr)
1270 : {
1271 2020 : regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1272 2020 : return regparm;
1273 : }
1274 : }
1275 1269755 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1276 : return 2;
1277 1269755 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1278 : return 1;
1279 :
1280 : /* Use register calling convention for local functions when possible. */
1281 1269755 : if (decl
1282 1205500 : && TREE_CODE (decl) == FUNCTION_DECL)
1283 : {
1284 1195399 : cgraph_node *target = cgraph_node::get (decl);
1285 1195399 : if (target)
1286 1187892 : target = target->function_symbol ();
1287 :
1288 : /* Caller and callee must agree on the calling convention, so
1289 : checking here just optimize means that with
1290 : __attribute__((optimize (...))) caller could use regparm convention
1291 : and callee not, or vice versa. Instead look at whether the callee
1292 : is optimized or not. */
1293 1187892 : if (target && opt_for_fn (target->decl, optimize)
1294 2374892 : && !(profile_flag && !flag_fentry))
1295 : {
1296 1187000 : if (target->local && target->can_change_signature)
1297 : {
1298 140052 : int local_regparm, globals = 0, regno;
1299 :
1300 : /* Make sure no regparm register is taken by a
1301 : fixed register variable. */
1302 140052 : for (local_regparm = 0; local_regparm < REGPARM_MAX;
1303 : local_regparm++)
1304 105039 : if (fixed_regs[local_regparm])
1305 : break;
1306 :
1307 : /* We don't want to use regparm(3) for nested functions as
1308 : these use a static chain pointer in the third argument. */
1309 35013 : if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1310 : local_regparm = 2;
1311 :
1312 : /* Save a register for the split stack. */
1313 35013 : if (flag_split_stack)
1314 : {
1315 20696 : if (local_regparm == 3)
1316 : local_regparm = 2;
1317 707 : else if (local_regparm == 2
1318 707 : && DECL_STATIC_CHAIN (target->decl))
1319 : local_regparm = 1;
1320 : }
1321 :
1322 : /* Each fixed register usage increases register pressure,
1323 : so less registers should be used for argument passing.
1324 : This functionality can be overriden by an explicit
1325 : regparm value. */
1326 245091 : for (regno = AX_REG; regno <= DI_REG; regno++)
1327 210078 : if (fixed_regs[regno])
1328 0 : globals++;
1329 :
1330 35013 : local_regparm
1331 35013 : = globals < local_regparm ? local_regparm - globals : 0;
1332 :
1333 35013 : if (local_regparm > regparm)
1334 4202769 : regparm = local_regparm;
1335 : }
1336 : }
1337 : }
1338 :
1339 : return regparm;
1340 : }
1341 :
1342 : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1343 : DFmode (2) arguments in SSE registers for a function with the
1344 : indicated TYPE and DECL. DECL may be NULL when calling function
1345 : indirectly or considering a libcall. Return -1 if any FP parameter
1346 : should be rejected by error. This is used in siutation we imply SSE
1347 : calling convetion but the function is called from another function with
1348 : SSE disabled. Otherwise return 0. */
1349 :
1350 : static int
1351 1077424 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1352 : {
1353 1077424 : gcc_assert (!TARGET_64BIT);
1354 :
1355 : /* Use SSE registers to pass SFmode and DFmode arguments if requested
1356 : by the sseregparm attribute. */
1357 1077424 : if (TARGET_SSEREGPARM
1358 1077424 : || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1359 : {
1360 0 : if (!TARGET_SSE)
1361 : {
1362 0 : if (warn)
1363 : {
1364 0 : if (decl)
1365 0 : error ("calling %qD with attribute sseregparm without "
1366 : "SSE/SSE2 enabled", decl);
1367 : else
1368 0 : error ("calling %qT with attribute sseregparm without "
1369 : "SSE/SSE2 enabled", type);
1370 : }
1371 0 : return 0;
1372 : }
1373 :
1374 : return 2;
1375 : }
1376 :
1377 1077424 : if (!decl)
1378 : return 0;
1379 :
1380 978849 : cgraph_node *target = cgraph_node::get (decl);
1381 978849 : if (target)
1382 971349 : target = target->function_symbol ();
1383 :
1384 : /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1385 : (and DFmode for SSE2) arguments in SSE registers. */
1386 971349 : if (target
1387 : /* TARGET_SSE_MATH */
1388 971349 : && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1389 1296 : && opt_for_fn (target->decl, optimize)
1390 972645 : && !(profile_flag && !flag_fentry))
1391 : {
1392 1296 : if (target->local && target->can_change_signature)
1393 : {
1394 : /* Refuse to produce wrong code when local function with SSE enabled
1395 : is called from SSE disabled function.
1396 : FIXME: We need a way to detect these cases cross-ltrans partition
1397 : and avoid using SSE calling conventions on local functions called
1398 : from function with SSE disabled. For now at least delay the
1399 : warning until we know we are going to produce wrong code.
1400 : See PR66047 */
1401 0 : if (!TARGET_SSE && warn)
1402 : return -1;
1403 0 : return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1404 0 : ->x_ix86_isa_flags) ? 2 : 1;
1405 : }
1406 : }
1407 :
1408 : return 0;
1409 : }
1410 :
1411 : /* Return true if EAX is live at the start of the function. Used by
1412 : ix86_expand_prologue to determine if we need special help before
1413 : calling allocate_stack_worker. */
1414 :
1415 : static bool
1416 7090 : ix86_eax_live_at_start_p (void)
1417 : {
1418 : /* Cheat. Don't bother working forward from ix86_function_regparm
1419 : to the function type to whether an actual argument is located in
1420 : eax. Instead just look at cfg info, which is still close enough
1421 : to correct at this point. This gives false positives for broken
1422 : functions that might use uninitialized data that happens to be
1423 : allocated in eax, but who cares? */
1424 7090 : return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1425 : }
1426 :
1427 : static bool
1428 160020 : ix86_keep_aggregate_return_pointer (tree fntype)
1429 : {
1430 160020 : tree attr;
1431 :
1432 160020 : if (!TARGET_64BIT)
1433 : {
1434 160020 : attr = lookup_attribute ("callee_pop_aggregate_return",
1435 160020 : TYPE_ATTRIBUTES (fntype));
1436 160020 : if (attr)
1437 0 : return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1438 :
1439 : /* For 32-bit MS-ABI the default is to keep aggregate
1440 : return pointer. */
1441 160020 : if (ix86_function_type_abi (fntype) == MS_ABI)
1442 : return true;
1443 : }
1444 : return KEEP_AGGREGATE_RETURN_POINTER != 0;
1445 : }
1446 :
1447 : /* Value is the number of bytes of arguments automatically
1448 : popped when returning from a subroutine call.
1449 : FUNDECL is the declaration node of the function (as a tree),
1450 : FUNTYPE is the data type of the function (as a tree),
1451 : or for a library call it is an identifier node for the subroutine name.
1452 : SIZE is the number of bytes of arguments passed on the stack.
1453 :
1454 : On the 80386, the RTD insn may be used to pop them if the number
1455 : of args is fixed, but if the number is variable then the caller
1456 : must pop them all. RTD can't be used for library calls now
1457 : because the library is compiled with the Unix compiler.
1458 : Use of RTD is a selectable option, since it is incompatible with
1459 : standard Unix calling sequences. If the option is not selected,
1460 : the caller must always pop the args.
1461 :
1462 : The attribute stdcall is equivalent to RTD on a per module basis. */
1463 :
1464 : static poly_int64
1465 7590914 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1466 : {
1467 7590914 : unsigned int ccvt;
1468 :
1469 : /* None of the 64-bit ABIs pop arguments. */
1470 7590914 : if (TARGET_64BIT)
1471 6717494 : return 0;
1472 :
1473 873420 : ccvt = ix86_get_callcvt (funtype);
1474 :
1475 873420 : if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1476 : | IX86_CALLCVT_THISCALL)) != 0
1477 873420 : && ! stdarg_p (funtype))
1478 3 : return size;
1479 :
1480 : /* Lose any fake structure return argument if it is passed on the stack. */
1481 873417 : if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1482 873417 : && !ix86_keep_aggregate_return_pointer (funtype))
1483 : {
1484 160020 : int nregs = ix86_function_regparm (funtype, fundecl);
1485 160020 : if (nregs == 0)
1486 459195 : return GET_MODE_SIZE (Pmode);
1487 : }
1488 :
1489 720352 : return 0;
1490 : }
1491 :
1492 : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1493 :
1494 : static bool
1495 10051893 : ix86_legitimate_combined_insn (rtx_insn *insn)
1496 : {
1497 10051893 : int i;
1498 :
1499 : /* Check operand constraints in case hard registers were propagated
1500 : into insn pattern. This check prevents combine pass from
1501 : generating insn patterns with invalid hard register operands.
1502 : These invalid insns can eventually confuse reload to error out
1503 : with a spill failure. See also PRs 46829 and 46843. */
1504 :
1505 10051893 : gcc_assert (INSN_CODE (insn) >= 0);
1506 :
1507 10051893 : extract_insn (insn);
1508 10051893 : preprocess_constraints (insn);
1509 :
1510 10051893 : int n_operands = recog_data.n_operands;
1511 10051893 : int n_alternatives = recog_data.n_alternatives;
1512 34352571 : for (i = 0; i < n_operands; i++)
1513 : {
1514 24304181 : rtx op = recog_data.operand[i];
1515 24304181 : machine_mode mode = GET_MODE (op);
1516 24304181 : const operand_alternative *op_alt;
1517 24304181 : int offset = 0;
1518 24304181 : bool win;
1519 24304181 : int j;
1520 :
1521 : /* A unary operator may be accepted by the predicate, but it
1522 : is irrelevant for matching constraints. */
1523 24304181 : if (UNARY_P (op))
1524 48613 : op = XEXP (op, 0);
1525 :
1526 24304181 : if (SUBREG_P (op))
1527 : {
1528 876039 : if (REG_P (SUBREG_REG (op))
1529 876039 : && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1530 54 : offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1531 54 : GET_MODE (SUBREG_REG (op)),
1532 54 : SUBREG_BYTE (op),
1533 54 : GET_MODE (op));
1534 876039 : op = SUBREG_REG (op);
1535 : }
1536 :
1537 24304181 : if (!(REG_P (op) && HARD_REGISTER_P (op)))
1538 24002704 : continue;
1539 :
1540 301477 : op_alt = recog_op_alt;
1541 :
1542 : /* Operand has no constraints, anything is OK. */
1543 301477 : win = !n_alternatives;
1544 :
1545 301477 : alternative_mask preferred = get_preferred_alternatives (insn);
1546 827545 : for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1547 : {
1548 522484 : if (!TEST_BIT (preferred, j))
1549 137296 : continue;
1550 385188 : if (op_alt[i].anything_ok
1551 202726 : || (op_alt[i].matches != -1
1552 33730 : && operands_match_p
1553 33730 : (recog_data.operand[i],
1554 33730 : recog_data.operand[op_alt[i].matches]))
1555 583818 : || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1556 : {
1557 : win = true;
1558 : break;
1559 : }
1560 : }
1561 :
1562 301477 : if (!win)
1563 : return false;
1564 : }
1565 :
1566 : return true;
1567 : }
1568 :
1569 : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1570 :
1571 : static unsigned HOST_WIDE_INT
1572 4581 : ix86_asan_shadow_offset (void)
1573 : {
1574 4581 : return SUBTARGET_SHADOW_OFFSET;
1575 : }
1576 :
1577 : /* Argument support functions. */
1578 :
1579 : /* Return true when register may be used to pass function parameters. */
1580 : bool
1581 1480812072 : ix86_function_arg_regno_p (int regno)
1582 : {
1583 1480812072 : int i;
1584 1480812072 : enum calling_abi call_abi;
1585 1480812072 : const int *parm_regs;
1586 :
1587 1477365013 : if (TARGET_SSE && SSE_REGNO_P (regno)
1588 2448959030 : && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1589 : return true;
1590 :
1591 1361245180 : if (!TARGET_64BIT)
1592 129210670 : return (regno < REGPARM_MAX
1593 129210670 : || (TARGET_MMX && MMX_REGNO_P (regno)
1594 11613072 : && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1595 :
1596 : /* TODO: The function should depend on current function ABI but
1597 : builtins.cc would need updating then. Therefore we use the
1598 : default ABI. */
1599 1232034510 : call_abi = ix86_cfun_abi ();
1600 :
1601 : /* RAX is used as hidden argument to va_arg functions. */
1602 1232034510 : if (call_abi == SYSV_ABI && regno == AX_REG)
1603 : return true;
1604 :
1605 1217797819 : if (cfun
1606 1217797487 : && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
1607 : parm_regs = x86_64_preserve_none_int_parameter_registers;
1608 1217778895 : else if (call_abi == MS_ABI)
1609 : parm_regs = x86_64_ms_abi_int_parameter_registers;
1610 : else
1611 1181783343 : parm_regs = x86_64_int_parameter_registers;
1612 :
1613 16298581598 : for (i = 0; i < (call_abi == MS_ABI
1614 8149290799 : ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1615 7018596439 : if (regno == parm_regs[i])
1616 : return true;
1617 : return false;
1618 : }
1619 :
1620 : /* Return if we do not know how to pass ARG solely in registers. */
1621 :
1622 : static bool
1623 403783638 : ix86_must_pass_in_stack (const function_arg_info &arg)
1624 : {
1625 403783638 : if (must_pass_in_stack_var_size_or_pad (arg))
1626 : return true;
1627 :
1628 : /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1629 : The layout_type routine is crafty and tries to trick us into passing
1630 : currently unsupported vector types on the stack by using TImode. */
1631 1772098 : return (!TARGET_64BIT && arg.mode == TImode
1632 403783601 : && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1633 : }
1634 :
1635 : /* Implement TARGET_OVERALIGNED_STACK_SLOT_REQUIRED. */
1636 :
1637 : static bool
1638 84424 : ix86_overaligned_stack_slot_required (void)
1639 : {
1640 84424 : return TARGET_SEH;
1641 : }
1642 :
1643 : /* It returns the size, in bytes, of the area reserved for arguments passed
1644 : in registers for the function represented by fndecl dependent to the used
1645 : abi format. */
1646 : int
1647 10699397 : ix86_reg_parm_stack_space (const_tree fndecl)
1648 : {
1649 10699397 : enum calling_abi call_abi = SYSV_ABI;
1650 10699397 : if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1651 10388100 : call_abi = ix86_function_abi (fndecl);
1652 : else
1653 311297 : call_abi = ix86_function_type_abi (fndecl);
1654 10699397 : if (TARGET_64BIT && call_abi == MS_ABI)
1655 119312 : return 32;
1656 : return 0;
1657 : }
1658 :
1659 : /* We add this as a workaround in order to use libc_has_function
1660 : hook in i386.md. */
1661 : bool
1662 0 : ix86_libc_has_function (enum function_class fn_class)
1663 : {
1664 0 : return targetm.libc_has_function (fn_class, NULL_TREE);
1665 : }
1666 :
1667 : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1668 : specifying the call abi used. */
1669 : enum calling_abi
1670 439278643 : ix86_function_type_abi (const_tree fntype)
1671 : {
1672 439278643 : enum calling_abi abi = ix86_abi;
1673 :
1674 439278643 : if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1675 : return abi;
1676 :
1677 17591913 : if (abi == SYSV_ABI
1678 17591913 : && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1679 : {
1680 2608409 : static int warned;
1681 2608409 : if (TARGET_X32 && !warned)
1682 : {
1683 1 : error ("X32 does not support %<ms_abi%> attribute");
1684 1 : warned = 1;
1685 : }
1686 :
1687 : abi = MS_ABI;
1688 : }
1689 14983504 : else if (abi == MS_ABI
1690 14983504 : && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1691 : abi = SYSV_ABI;
1692 :
1693 : return abi;
1694 : }
1695 :
1696 : enum calling_abi
1697 217305413 : ix86_function_abi (const_tree fndecl)
1698 : {
1699 217305413 : return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1700 : }
1701 :
1702 : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1703 : specifying the call abi used. */
1704 : enum calling_abi
1705 2082718362 : ix86_cfun_abi (void)
1706 : {
1707 2082718362 : return cfun ? cfun->machine->call_abi : ix86_abi;
1708 : }
1709 :
1710 : bool
1711 5026377 : ix86_function_ms_hook_prologue (const_tree fn)
1712 : {
1713 5026377 : if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1714 : {
1715 8 : if (decl_function_context (fn) != NULL_TREE)
1716 0 : error_at (DECL_SOURCE_LOCATION (fn),
1717 : "%<ms_hook_prologue%> attribute is not compatible "
1718 : "with nested function");
1719 : else
1720 : return true;
1721 : }
1722 : return false;
1723 : }
1724 :
1725 : bool
1726 115209521 : ix86_function_naked (const_tree fn)
1727 : {
1728 115209521 : if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1729 : return true;
1730 :
1731 : return false;
1732 : }
1733 :
1734 : /* Write the extra assembler code needed to declare a function properly. */
1735 :
1736 : void
1737 1551588 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
1738 : tree decl)
1739 : {
1740 1551588 : bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1741 :
1742 1551588 : if (cfun)
1743 1547982 : cfun->machine->function_label_emitted = true;
1744 :
1745 1551588 : if (is_ms_hook)
1746 : {
1747 2 : int i, filler_count = (TARGET_64BIT ? 32 : 16);
1748 2 : unsigned int filler_cc = 0xcccccccc;
1749 :
1750 18 : for (i = 0; i < filler_count; i += 4)
1751 16 : fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1752 : }
1753 :
1754 : #ifdef SUBTARGET_ASM_UNWIND_INIT
1755 : SUBTARGET_ASM_UNWIND_INIT (out_file);
1756 : #endif
1757 :
1758 1551588 : assemble_function_label_raw (out_file, fname);
1759 :
1760 : /* Output magic byte marker, if hot-patch attribute is set. */
1761 1551588 : if (is_ms_hook)
1762 : {
1763 2 : if (TARGET_64BIT)
1764 : {
1765 : /* leaq [%rsp + 0], %rsp */
1766 2 : fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1767 : out_file);
1768 : }
1769 : else
1770 : {
1771 : /* movl.s %edi, %edi
1772 : push %ebp
1773 : movl.s %esp, %ebp */
1774 0 : fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1775 : }
1776 : }
1777 1551588 : }
1778 :
1779 : /* Output a user-defined label. In AT&T syntax, registers are prefixed
1780 : with %, so labels require no punctuation. In Intel syntax, registers
1781 : are unprefixed, so labels may clash with registers or other operators,
1782 : and require quoting. */
1783 : void
1784 35063589 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1785 : {
1786 35063589 : if (ASSEMBLER_DIALECT == ASM_ATT)
1787 35062488 : fprintf (file, "%s%s", prefix, label);
1788 : else
1789 1101 : fprintf (file, "\"%s%s\"", prefix, label);
1790 35063589 : }
1791 :
1792 : /* Implementation of call abi switching target hook. Specific to FNDECL
1793 : the specific call register sets are set. See also
1794 : ix86_conditional_register_usage for more details. */
1795 : void
1796 196791568 : ix86_call_abi_override (const_tree fndecl)
1797 : {
1798 196791568 : cfun->machine->call_abi = ix86_function_abi (fndecl);
1799 196791568 : }
1800 :
1801 : /* Return 1 if pseudo register should be created and used to hold
1802 : GOT address for PIC code. */
1803 : bool
1804 169699212 : ix86_use_pseudo_pic_reg (void)
1805 : {
1806 169699212 : if ((TARGET_64BIT
1807 158650626 : && (ix86_cmodel == CM_SMALL_PIC
1808 : || TARGET_PECOFF))
1809 163823316 : || !flag_pic)
1810 164899907 : return false;
1811 : return true;
1812 : }
1813 :
1814 : /* Initialize large model PIC register. */
1815 :
1816 : static void
1817 56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
1818 : {
1819 56 : rtx_code_label *label;
1820 56 : rtx tmp_reg;
1821 :
1822 56 : gcc_assert (Pmode == DImode);
1823 56 : label = gen_label_rtx ();
1824 56 : emit_label (label);
1825 56 : LABEL_PRESERVE_P (label) = 1;
1826 56 : tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1827 56 : gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1828 56 : emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1829 : label));
1830 56 : emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1831 56 : emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1832 56 : const char *name = LABEL_NAME (label);
1833 56 : PUT_CODE (label, NOTE);
1834 56 : NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1835 56 : NOTE_DELETED_LABEL_NAME (label) = name;
1836 56 : }
1837 :
1838 : /* Create and initialize PIC register if required. */
1839 : static void
1840 1481483 : ix86_init_pic_reg (void)
1841 : {
1842 1481483 : edge entry_edge;
1843 1481483 : rtx_insn *seq;
1844 :
1845 1481483 : if (!ix86_use_pseudo_pic_reg ())
1846 : return;
1847 :
1848 40461 : start_sequence ();
1849 :
1850 40461 : if (TARGET_64BIT)
1851 : {
1852 69 : if (ix86_cmodel == CM_LARGE_PIC)
1853 53 : ix86_init_large_pic_reg (R11_REG);
1854 : else
1855 16 : emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1856 : }
1857 : else
1858 : {
1859 : /* If there is future mcount call in the function it is more profitable
1860 : to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1861 40392 : rtx reg = crtl->profile
1862 40392 : ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1863 40392 : : pic_offset_table_rtx;
1864 40392 : rtx_insn *insn = emit_insn (gen_set_got (reg));
1865 40392 : RTX_FRAME_RELATED_P (insn) = 1;
1866 40392 : if (crtl->profile)
1867 0 : emit_move_insn (pic_offset_table_rtx, reg);
1868 40392 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1869 : }
1870 :
1871 40461 : seq = end_sequence ();
1872 :
1873 40461 : entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1874 40461 : insert_insn_on_edge (seq, entry_edge);
1875 40461 : commit_one_edge_insertion (entry_edge);
1876 : }
1877 :
1878 : /* Initialize a variable CUM of type CUMULATIVE_ARGS
1879 : for a call to a function whose data type is FNTYPE.
1880 : For a library call, FNTYPE is 0. */
1881 :
1882 : void
1883 10418798 : init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1884 : tree fntype, /* tree ptr for function decl */
1885 : rtx libname, /* SYMBOL_REF of library name or 0 */
1886 : tree fndecl,
1887 : int caller)
1888 : {
1889 10418798 : struct cgraph_node *local_info_node = NULL;
1890 10418798 : struct cgraph_node *target = NULL;
1891 :
1892 : /* Set silent_p to false to raise an error for invalid calls when
1893 : expanding function body. */
1894 10418798 : cfun->machine->silent_p = false;
1895 :
1896 10418798 : memset (cum, 0, sizeof (*cum));
1897 :
1898 10418798 : tree preserve_none_type;
1899 10418798 : if (fndecl)
1900 : {
1901 10078839 : target = cgraph_node::get (fndecl);
1902 10078839 : if (target)
1903 : {
1904 9932931 : target = target->function_symbol ();
1905 9932931 : local_info_node = cgraph_node::local_info_node (target->decl);
1906 9932931 : cum->call_abi = ix86_function_abi (target->decl);
1907 9932931 : preserve_none_type = TREE_TYPE (target->decl);
1908 : }
1909 : else
1910 : {
1911 145908 : cum->call_abi = ix86_function_abi (fndecl);
1912 145908 : preserve_none_type = TREE_TYPE (fndecl);
1913 : }
1914 : }
1915 : else
1916 : {
1917 339959 : cum->call_abi = ix86_function_type_abi (fntype);
1918 339959 : preserve_none_type = fntype;
1919 : }
1920 10418798 : cum->preserve_none_abi
1921 10418798 : = (preserve_none_type
1922 20720306 : && (lookup_attribute ("preserve_none",
1923 10301508 : TYPE_ATTRIBUTES (preserve_none_type))
1924 : != nullptr));
1925 :
1926 10418798 : cum->caller = caller;
1927 :
1928 : /* Set up the number of registers to use for passing arguments. */
1929 10418798 : cum->nregs = ix86_regparm;
1930 10418798 : if (TARGET_64BIT)
1931 : {
1932 9382609 : cum->nregs = (cum->call_abi == SYSV_ABI
1933 9382609 : ? X86_64_REGPARM_MAX
1934 : : X86_64_MS_REGPARM_MAX);
1935 : }
1936 10418798 : if (TARGET_SSE)
1937 : {
1938 10409688 : cum->sse_nregs = SSE_REGPARM_MAX;
1939 10409688 : if (TARGET_64BIT)
1940 : {
1941 9373619 : cum->sse_nregs = (cum->call_abi == SYSV_ABI
1942 9373619 : ? X86_64_SSE_REGPARM_MAX
1943 : : X86_64_MS_SSE_REGPARM_MAX);
1944 : }
1945 : }
1946 10418798 : if (TARGET_MMX)
1947 11242211 : cum->mmx_nregs = MMX_REGPARM_MAX;
1948 10418798 : cum->warn_avx512f = true;
1949 10418798 : cum->warn_avx = true;
1950 10418798 : cum->warn_sse = true;
1951 10418798 : cum->warn_mmx = true;
1952 :
1953 : /* Because type might mismatch in between caller and callee, we need to
1954 : use actual type of function for local calls.
1955 : FIXME: cgraph_analyze can be told to actually record if function uses
1956 : va_start so for local functions maybe_vaarg can be made aggressive
1957 : helping K&R code.
1958 : FIXME: once typesytem is fixed, we won't need this code anymore. */
1959 10418798 : if (local_info_node && local_info_node->local
1960 427951 : && local_info_node->can_change_signature)
1961 404792 : fntype = TREE_TYPE (target->decl);
1962 10418798 : cum->stdarg = stdarg_p (fntype);
1963 20837596 : cum->maybe_vaarg = (fntype
1964 10888613 : ? (!prototype_p (fntype) || stdarg_p (fntype))
1965 117290 : : !libname);
1966 :
1967 10418798 : cum->decl = fndecl;
1968 :
1969 10418798 : cum->warn_empty = !warn_abi || cum->stdarg;
1970 10418798 : if (!cum->warn_empty && fntype)
1971 : {
1972 2693010 : function_args_iterator iter;
1973 2693010 : tree argtype;
1974 2693010 : bool seen_empty_type = false;
1975 7462918 : FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1976 : {
1977 7462855 : if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1978 : break;
1979 4790005 : if (TYPE_EMPTY_P (argtype))
1980 : seen_empty_type = true;
1981 4717848 : else if (seen_empty_type)
1982 : {
1983 20097 : cum->warn_empty = true;
1984 20097 : break;
1985 : }
1986 : }
1987 : }
1988 :
1989 10418798 : if (!TARGET_64BIT)
1990 : {
1991 : /* If there are variable arguments, then we won't pass anything
1992 : in registers in 32-bit mode. */
1993 1036189 : if (stdarg_p (fntype))
1994 : {
1995 9077 : cum->nregs = 0;
1996 : /* Since in 32-bit, variable arguments are always passed on
1997 : stack, there is scratch register available for indirect
1998 : sibcall. */
1999 9077 : cfun->machine->arg_reg_available = true;
2000 9077 : cum->sse_nregs = 0;
2001 9077 : cum->mmx_nregs = 0;
2002 9077 : cum->warn_avx512f = false;
2003 9077 : cum->warn_avx = false;
2004 9077 : cum->warn_sse = false;
2005 9077 : cum->warn_mmx = false;
2006 9077 : return;
2007 : }
2008 :
2009 : /* Use ecx and edx registers if function has fastcall attribute,
2010 : else look for regparm information. */
2011 1027112 : if (fntype)
2012 : {
2013 1013904 : unsigned int ccvt = ix86_get_callcvt (fntype);
2014 1013904 : if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
2015 : {
2016 0 : cum->nregs = 1;
2017 0 : cum->fastcall = 1; /* Same first register as in fastcall. */
2018 : }
2019 1013904 : else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
2020 : {
2021 4 : cum->nregs = 2;
2022 4 : cum->fastcall = 1;
2023 : }
2024 : else
2025 1013900 : cum->nregs = ix86_function_regparm (fntype, fndecl);
2026 : }
2027 :
2028 : /* Set up the number of SSE registers used for passing SFmode
2029 : and DFmode arguments. Warn for mismatching ABI. */
2030 1027112 : cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
2031 : }
2032 :
2033 10409721 : cfun->machine->arg_reg_available = (cum->nregs > 0);
2034 : }
2035 :
2036 : /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2037 : But in the case of vector types, it is some vector mode.
2038 :
2039 : When we have only some of our vector isa extensions enabled, then there
2040 : are some modes for which vector_mode_supported_p is false. For these
2041 : modes, the generic vector support in gcc will choose some non-vector mode
2042 : in order to implement the type. By computing the natural mode, we'll
2043 : select the proper ABI location for the operand and not depend on whatever
2044 : the middle-end decides to do with these vector types.
2045 :
2046 : The midde-end can't deal with the vector types > 16 bytes. In this
2047 : case, we return the original mode and warn ABI change if CUM isn't
2048 : NULL.
2049 :
2050 : If INT_RETURN is true, warn ABI change if the vector mode isn't
2051 : available for function return value. */
2052 :
2053 : static machine_mode
2054 228315054 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
2055 : bool in_return)
2056 : {
2057 228315054 : machine_mode mode = TYPE_MODE (type);
2058 :
2059 228315054 : if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
2060 : {
2061 467240 : HOST_WIDE_INT size = int_size_in_bytes (type);
2062 467240 : if ((size == 8 || size == 16 || size == 32 || size == 64)
2063 : /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2064 467240 : && TYPE_VECTOR_SUBPARTS (type) > 1)
2065 : {
2066 430668 : machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2067 :
2068 : /* There are no XFmode vector modes ... */
2069 430668 : if (innermode == XFmode)
2070 : return mode;
2071 :
2072 : /* ... and no decimal float vector modes. */
2073 430115 : if (DECIMAL_FLOAT_MODE_P (innermode))
2074 : return mode;
2075 :
2076 429822 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2077 : mode = MIN_MODE_VECTOR_FLOAT;
2078 : else
2079 360002 : mode = MIN_MODE_VECTOR_INT;
2080 :
2081 : /* Get the mode which has this inner mode and number of units. */
2082 9086586 : FOR_EACH_MODE_FROM (mode, mode)
2083 18908131 : if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2084 10251367 : && GET_MODE_INNER (mode) == innermode)
2085 : {
2086 429822 : if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2087 : {
2088 293561 : static bool warnedavx512f;
2089 293561 : static bool warnedavx512f_ret;
2090 :
2091 293561 : if (cum && cum->warn_avx512f && !warnedavx512f)
2092 : {
2093 1361 : if (warning (OPT_Wpsabi, "AVX512F vector argument "
2094 : "without AVX512F enabled changes the ABI"))
2095 2 : warnedavx512f = true;
2096 : }
2097 292200 : else if (in_return && !warnedavx512f_ret)
2098 : {
2099 283579 : if (warning (OPT_Wpsabi, "AVX512F vector return "
2100 : "without AVX512F enabled changes the ABI"))
2101 4 : warnedavx512f_ret = true;
2102 : }
2103 :
2104 293561 : return TYPE_MODE (type);
2105 : }
2106 136261 : else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2107 : {
2108 135712 : static bool warnedavx;
2109 135712 : static bool warnedavx_ret;
2110 :
2111 135712 : if (cum && cum->warn_avx && !warnedavx)
2112 : {
2113 770 : if (warning (OPT_Wpsabi, "AVX vector argument "
2114 : "without AVX enabled changes the ABI"))
2115 5 : warnedavx = true;
2116 : }
2117 134942 : else if (in_return && !warnedavx_ret)
2118 : {
2119 120871 : if (warning (OPT_Wpsabi, "AVX vector return "
2120 : "without AVX enabled changes the ABI"))
2121 10 : warnedavx_ret = true;
2122 : }
2123 :
2124 135712 : return TYPE_MODE (type);
2125 : }
2126 549 : else if (((size == 8 && TARGET_64BIT) || size == 16)
2127 546 : && !TARGET_SSE
2128 140 : && !TARGET_IAMCU)
2129 : {
2130 140 : static bool warnedsse;
2131 140 : static bool warnedsse_ret;
2132 :
2133 140 : if (cum && cum->warn_sse && !warnedsse)
2134 : {
2135 19 : if (warning (OPT_Wpsabi, "SSE vector argument "
2136 : "without SSE enabled changes the ABI"))
2137 6 : warnedsse = true;
2138 : }
2139 121 : else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2140 : {
2141 0 : if (warning (OPT_Wpsabi, "SSE vector return "
2142 : "without SSE enabled changes the ABI"))
2143 0 : warnedsse_ret = true;
2144 : }
2145 : }
2146 409 : else if ((size == 8 && !TARGET_64BIT)
2147 0 : && (!cfun
2148 0 : || cfun->machine->func_type == TYPE_NORMAL)
2149 0 : && !TARGET_MMX
2150 0 : && !TARGET_IAMCU)
2151 : {
2152 0 : static bool warnedmmx;
2153 0 : static bool warnedmmx_ret;
2154 :
2155 0 : if (cum && cum->warn_mmx && !warnedmmx)
2156 : {
2157 0 : if (warning (OPT_Wpsabi, "MMX vector argument "
2158 : "without MMX enabled changes the ABI"))
2159 0 : warnedmmx = true;
2160 : }
2161 0 : else if (in_return && !warnedmmx_ret)
2162 : {
2163 0 : if (warning (OPT_Wpsabi, "MMX vector return "
2164 : "without MMX enabled changes the ABI"))
2165 0 : warnedmmx_ret = true;
2166 : }
2167 : }
2168 549 : return mode;
2169 : }
2170 :
2171 0 : gcc_unreachable ();
2172 : }
2173 : }
2174 :
2175 : return mode;
2176 : }
2177 :
2178 : /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2179 : this may not agree with the mode that the type system has chosen for the
2180 : register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2181 : go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2182 :
2183 : static rtx
2184 36447178 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2185 : unsigned int regno)
2186 : {
2187 36447178 : rtx tmp;
2188 :
2189 36447178 : if (orig_mode != BLKmode)
2190 36447150 : tmp = gen_rtx_REG (orig_mode, regno);
2191 : else
2192 : {
2193 28 : tmp = gen_rtx_REG (mode, regno);
2194 28 : tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2195 28 : tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2196 : }
2197 :
2198 36447178 : return tmp;
2199 : }
2200 :
2201 : /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2202 : of this code is to classify each 8bytes of incoming argument by the register
2203 : class and assign registers accordingly. */
2204 :
2205 : /* Return the union class of CLASS1 and CLASS2.
2206 : See the x86-64 PS ABI for details. */
2207 :
2208 : static enum x86_64_reg_class
2209 54902543 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2210 : {
2211 : /* Rule #1: If both classes are equal, this is the resulting class. */
2212 53663363 : if (class1 == class2)
2213 : return class1;
2214 :
2215 : /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2216 : the other class. */
2217 47516650 : if (class1 == X86_64_NO_CLASS)
2218 : return class2;
2219 48335504 : if (class2 == X86_64_NO_CLASS)
2220 : return class1;
2221 :
2222 : /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2223 1658266 : if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2224 : return X86_64_MEMORY_CLASS;
2225 :
2226 : /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2227 1511892 : if ((class1 == X86_64_INTEGERSI_CLASS
2228 189962 : && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2229 1510686 : || (class2 == X86_64_INTEGERSI_CLASS
2230 921197 : && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2231 : return X86_64_INTEGERSI_CLASS;
2232 1506879 : if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2233 382202 : || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2234 : return X86_64_INTEGER_CLASS;
2235 :
2236 : /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2237 : MEMORY is used. */
2238 90802 : if (class1 == X86_64_X87_CLASS
2239 : || class1 == X86_64_X87UP_CLASS
2240 90802 : || class1 == X86_64_COMPLEX_X87_CLASS
2241 : || class2 == X86_64_X87_CLASS
2242 89897 : || class2 == X86_64_X87UP_CLASS
2243 59516 : || class2 == X86_64_COMPLEX_X87_CLASS)
2244 31286 : return X86_64_MEMORY_CLASS;
2245 :
2246 : /* Rule #6: Otherwise class SSE is used. */
2247 : return X86_64_SSE_CLASS;
2248 : }
2249 :
2250 : /* Classify the argument of type TYPE and mode MODE.
2251 : CLASSES will be filled by the register class used to pass each word
2252 : of the operand. The number of words is returned. In case the parameter
2253 : should be passed in memory, 0 is returned. As a special case for zero
2254 : sized containers, classes[0] will be NO_CLASS and 1 is returned.
2255 :
2256 : BIT_OFFSET is used internally for handling records and specifies offset
2257 : of the offset in bits modulo 512 to avoid overflow cases.
2258 :
2259 : See the x86-64 PS ABI for details.
2260 : */
2261 :
2262 : static int
2263 389650162 : classify_argument (machine_mode mode, const_tree type,
2264 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2265 : int &zero_width_bitfields)
2266 : {
2267 389650162 : HOST_WIDE_INT bytes
2268 773176374 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2269 389650162 : int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2270 :
2271 : /* Variable sized entities are always passed/returned in memory. */
2272 389650162 : if (bytes < 0)
2273 : return 0;
2274 :
2275 389648963 : if (mode != VOIDmode)
2276 : {
2277 : /* The value of "named" doesn't matter. */
2278 388572183 : function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2279 388572183 : if (targetm.calls.must_pass_in_stack (arg))
2280 37 : return 0;
2281 : }
2282 :
2283 389648926 : if (type && (AGGREGATE_TYPE_P (type)
2284 353888910 : || (BITINT_TYPE_P (type) && words > 1)))
2285 : {
2286 36867487 : int i;
2287 36867487 : tree field;
2288 36867487 : enum x86_64_reg_class subclasses[MAX_CLASSES];
2289 :
2290 : /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2291 36867487 : if (bytes > 64)
2292 : return 0;
2293 :
2294 92595959 : for (i = 0; i < words; i++)
2295 56555162 : classes[i] = X86_64_NO_CLASS;
2296 :
2297 : /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2298 : signalize memory class, so handle it as special case. */
2299 36040797 : if (!words)
2300 : {
2301 82698 : classes[0] = X86_64_NO_CLASS;
2302 82698 : return 1;
2303 : }
2304 :
2305 : /* Classify each field of record and merge classes. */
2306 35958099 : switch (TREE_CODE (type))
2307 : {
2308 33933053 : case RECORD_TYPE:
2309 : /* And now merge the fields of structure. */
2310 915525429 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2311 : {
2312 882105567 : if (TREE_CODE (field) == FIELD_DECL)
2313 : {
2314 50099542 : int num;
2315 :
2316 50099542 : if (TREE_TYPE (field) == error_mark_node)
2317 4 : continue;
2318 :
2319 : /* Bitfields are always classified as integer. Handle them
2320 : early, since later code would consider them to be
2321 : misaligned integers. */
2322 50099538 : if (DECL_BIT_FIELD (field))
2323 : {
2324 1248399 : if (integer_zerop (DECL_SIZE (field)))
2325 : {
2326 12839 : if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2327 8021 : continue;
2328 4818 : if (zero_width_bitfields != 2)
2329 : {
2330 4284 : zero_width_bitfields = 1;
2331 4284 : continue;
2332 : }
2333 : }
2334 1236094 : for (i = (int_bit_position (field)
2335 1236094 : + (bit_offset % 64)) / 8 / 8;
2336 2475274 : i < ((int_bit_position (field) + (bit_offset % 64))
2337 2475274 : + tree_to_shwi (DECL_SIZE (field))
2338 2475274 : + 63) / 8 / 8; i++)
2339 1239180 : classes[i]
2340 2478360 : = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2341 : }
2342 : else
2343 : {
2344 48851139 : int pos;
2345 :
2346 48851139 : type = TREE_TYPE (field);
2347 :
2348 : /* Flexible array member is ignored. */
2349 48851139 : if (TYPE_MODE (type) == BLKmode
2350 646043 : && TREE_CODE (type) == ARRAY_TYPE
2351 168102 : && TYPE_SIZE (type) == NULL_TREE
2352 2007 : && TYPE_DOMAIN (type) != NULL_TREE
2353 48852381 : && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2354 : == NULL_TREE))
2355 : {
2356 1242 : static bool warned;
2357 :
2358 1242 : if (!warned && warn_psabi)
2359 : {
2360 3 : warned = true;
2361 3 : inform (input_location,
2362 : "the ABI of passing struct with"
2363 : " a flexible array member has"
2364 : " changed in GCC 4.4");
2365 : }
2366 1242 : continue;
2367 1242 : }
2368 48849897 : num = classify_argument (TYPE_MODE (type), type,
2369 : subclasses,
2370 48849897 : (int_bit_position (field)
2371 48849897 : + bit_offset) % 512,
2372 : zero_width_bitfields);
2373 48849897 : if (!num)
2374 : return 0;
2375 48336706 : pos = (int_bit_position (field)
2376 48336706 : + (bit_offset % 64)) / 8 / 8;
2377 100043375 : for (i = 0; i < num && (i + pos) < words; i++)
2378 51706669 : classes[i + pos]
2379 51706669 : = merge_classes (subclasses[i], classes[i + pos]);
2380 : }
2381 : }
2382 : }
2383 : break;
2384 :
2385 444666 : case ARRAY_TYPE:
2386 : /* Arrays are handled as small records. */
2387 444666 : {
2388 444666 : int num;
2389 444666 : num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2390 444666 : TREE_TYPE (type), subclasses, bit_offset,
2391 : zero_width_bitfields);
2392 444666 : if (!num)
2393 : return 0;
2394 :
2395 : /* The partial classes are now full classes. */
2396 429184 : if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2397 13868 : subclasses[0] = X86_64_SSE_CLASS;
2398 429184 : if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2399 5126 : subclasses[0] = X86_64_SSE_CLASS;
2400 429184 : if (subclasses[0] == X86_64_INTEGERSI_CLASS
2401 161650 : && !((bit_offset % 64) == 0 && bytes == 4))
2402 129999 : subclasses[0] = X86_64_INTEGER_CLASS;
2403 :
2404 1325866 : for (i = 0; i < words; i++)
2405 896682 : classes[i] = subclasses[i % num];
2406 :
2407 : break;
2408 : }
2409 272546 : case UNION_TYPE:
2410 272546 : case QUAL_UNION_TYPE:
2411 : /* Unions are similar to RECORD_TYPE but offset is always 0.
2412 : */
2413 3033677 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2414 : {
2415 2796396 : if (TREE_CODE (field) == FIELD_DECL)
2416 : {
2417 1231357 : int num;
2418 :
2419 1231357 : if (TREE_TYPE (field) == error_mark_node)
2420 10 : continue;
2421 :
2422 1231347 : num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2423 1231347 : TREE_TYPE (field), subclasses,
2424 : bit_offset, zero_width_bitfields);
2425 1231347 : if (!num)
2426 : return 0;
2427 3152776 : for (i = 0; i < num && i < words; i++)
2428 1956694 : classes[i] = merge_classes (subclasses[i], classes[i]);
2429 : }
2430 : }
2431 : break;
2432 :
2433 1307834 : case BITINT_TYPE:
2434 1307834 : case ENUMERAL_TYPE:
2435 : /* _BitInt(N) for N > 64 is passed as structure containing
2436 : (N + 63) / 64 64-bit elements. */
2437 1307834 : if (words > 2)
2438 : return 0;
2439 75441 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2440 75441 : return 2;
2441 :
2442 0 : default:
2443 0 : gcc_unreachable ();
2444 : }
2445 :
2446 34086327 : if (words > 2)
2447 : {
2448 : /* When size > 16 bytes, if the first one isn't
2449 : X86_64_SSE_CLASS or any other ones aren't
2450 : X86_64_SSEUP_CLASS, everything should be passed in
2451 : memory. */
2452 1654562 : if (classes[0] != X86_64_SSE_CLASS)
2453 : return 0;
2454 :
2455 197316 : for (i = 1; i < words; i++)
2456 179129 : if (classes[i] != X86_64_SSEUP_CLASS)
2457 : return 0;
2458 : }
2459 :
2460 : /* Final merger cleanup. */
2461 76198321 : for (i = 0; i < words; i++)
2462 : {
2463 : /* If one class is MEMORY, everything should be passed in
2464 : memory. */
2465 43781312 : if (classes[i] == X86_64_MEMORY_CLASS)
2466 : return 0;
2467 :
2468 : /* The X86_64_SSEUP_CLASS should be always preceded by
2469 : X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2470 43750739 : if (classes[i] == X86_64_SSEUP_CLASS
2471 207011 : && classes[i - 1] != X86_64_SSE_CLASS
2472 76546 : && classes[i - 1] != X86_64_SSEUP_CLASS)
2473 : {
2474 : /* The first one should never be X86_64_SSEUP_CLASS. */
2475 1916 : gcc_assert (i != 0);
2476 1916 : classes[i] = X86_64_SSE_CLASS;
2477 : }
2478 :
2479 : /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2480 : everything should be passed in memory. */
2481 43750739 : if (classes[i] == X86_64_X87UP_CLASS
2482 179862 : && (classes[i - 1] != X86_64_X87_CLASS))
2483 : {
2484 2370 : static bool warned;
2485 :
2486 : /* The first one should never be X86_64_X87UP_CLASS. */
2487 2370 : gcc_assert (i != 0);
2488 2370 : if (!warned && warn_psabi)
2489 : {
2490 1 : warned = true;
2491 1 : inform (input_location,
2492 : "the ABI of passing union with %<long double%>"
2493 : " has changed in GCC 4.4");
2494 : }
2495 2370 : return 0;
2496 : }
2497 : }
2498 : return words;
2499 : }
2500 :
2501 : /* Compute alignment needed. We align all types to natural boundaries with
2502 : exception of XFmode that is aligned to 64bits. */
2503 352781439 : if (mode != VOIDmode && mode != BLKmode)
2504 : {
2505 351240668 : int mode_alignment = GET_MODE_BITSIZE (mode);
2506 :
2507 351240668 : if (mode == XFmode)
2508 : mode_alignment = 128;
2509 344335404 : else if (mode == XCmode)
2510 552059 : mode_alignment = 256;
2511 351240668 : if (COMPLEX_MODE_P (mode))
2512 2312936 : mode_alignment /= 2;
2513 : /* Misaligned fields are always returned in memory. */
2514 351240668 : if (bit_offset % mode_alignment)
2515 : return 0;
2516 : }
2517 :
2518 : /* for V1xx modes, just use the base mode */
2519 352773806 : if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2520 446216763 : && GET_MODE_UNIT_SIZE (mode) == bytes)
2521 6315 : mode = GET_MODE_INNER (mode);
2522 :
2523 : /* Classification of atomic types. */
2524 352773806 : switch (mode)
2525 : {
2526 206928 : case E_SDmode:
2527 206928 : case E_DDmode:
2528 206928 : classes[0] = X86_64_SSE_CLASS;
2529 206928 : return 1;
2530 98708 : case E_TDmode:
2531 98708 : classes[0] = X86_64_SSE_CLASS;
2532 98708 : classes[1] = X86_64_SSEUP_CLASS;
2533 98708 : return 2;
2534 229905394 : case E_DImode:
2535 229905394 : case E_SImode:
2536 229905394 : case E_HImode:
2537 229905394 : case E_QImode:
2538 229905394 : case E_CSImode:
2539 229905394 : case E_CHImode:
2540 229905394 : case E_CQImode:
2541 229905394 : {
2542 229905394 : int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2543 :
2544 : /* Analyze last 128 bits only. */
2545 229905394 : size = (size - 1) & 0x7f;
2546 :
2547 229905394 : if (size < 32)
2548 : {
2549 102093159 : classes[0] = X86_64_INTEGERSI_CLASS;
2550 102093159 : return 1;
2551 : }
2552 127812235 : else if (size < 64)
2553 : {
2554 117238598 : classes[0] = X86_64_INTEGER_CLASS;
2555 117238598 : return 1;
2556 : }
2557 10573637 : else if (size < 64+32)
2558 : {
2559 3866618 : classes[0] = X86_64_INTEGER_CLASS;
2560 3866618 : classes[1] = X86_64_INTEGERSI_CLASS;
2561 3866618 : return 2;
2562 : }
2563 6707019 : else if (size < 64+64)
2564 : {
2565 6707019 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2566 6707019 : return 2;
2567 : }
2568 : else
2569 : gcc_unreachable ();
2570 : }
2571 2440624 : case E_CDImode:
2572 2440624 : case E_TImode:
2573 2440624 : classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2574 2440624 : return 2;
2575 0 : case E_COImode:
2576 0 : case E_OImode:
2577 : /* OImode shouldn't be used directly. */
2578 0 : gcc_unreachable ();
2579 : case E_CTImode:
2580 : return 0;
2581 828876 : case E_HFmode:
2582 828876 : case E_BFmode:
2583 828876 : if (!(bit_offset % 64))
2584 826326 : classes[0] = X86_64_SSEHF_CLASS;
2585 : else
2586 2550 : classes[0] = X86_64_SSE_CLASS;
2587 : return 1;
2588 9741887 : case E_SFmode:
2589 9741887 : if (!(bit_offset % 64))
2590 9688708 : classes[0] = X86_64_SSESF_CLASS;
2591 : else
2592 53179 : classes[0] = X86_64_SSE_CLASS;
2593 : return 1;
2594 4246619 : case E_DFmode:
2595 4246619 : classes[0] = X86_64_SSEDF_CLASS;
2596 4246619 : return 1;
2597 6904548 : case E_XFmode:
2598 6904548 : classes[0] = X86_64_X87_CLASS;
2599 6904548 : classes[1] = X86_64_X87UP_CLASS;
2600 6904548 : return 2;
2601 1289867 : case E_TFmode:
2602 1289867 : classes[0] = X86_64_SSE_CLASS;
2603 1289867 : classes[1] = X86_64_SSEUP_CLASS;
2604 1289867 : return 2;
2605 76798 : case E_HCmode:
2606 76798 : case E_BCmode:
2607 76798 : classes[0] = X86_64_SSE_CLASS;
2608 76798 : if (!(bit_offset % 64))
2609 : return 1;
2610 : else
2611 : {
2612 98 : classes[1] = X86_64_SSEHF_CLASS;
2613 98 : return 2;
2614 : }
2615 693999 : case E_SCmode:
2616 693999 : classes[0] = X86_64_SSE_CLASS;
2617 693999 : if (!(bit_offset % 64))
2618 : return 1;
2619 : else
2620 : {
2621 1119 : static bool warned;
2622 :
2623 1119 : if (!warned && warn_psabi)
2624 : {
2625 2 : warned = true;
2626 2 : inform (input_location,
2627 : "the ABI of passing structure with %<complex float%>"
2628 : " member has changed in GCC 4.4");
2629 : }
2630 1119 : classes[1] = X86_64_SSESF_CLASS;
2631 1119 : return 2;
2632 : }
2633 704001 : case E_DCmode:
2634 704001 : classes[0] = X86_64_SSEDF_CLASS;
2635 704001 : classes[1] = X86_64_SSEDF_CLASS;
2636 704001 : return 2;
2637 552059 : case E_XCmode:
2638 552059 : classes[0] = X86_64_COMPLEX_X87_CLASS;
2639 552059 : return 1;
2640 : case E_TCmode:
2641 : /* This modes is larger than 16 bytes. */
2642 : return 0;
2643 25334908 : case E_V8SFmode:
2644 25334908 : case E_V8SImode:
2645 25334908 : case E_V32QImode:
2646 25334908 : case E_V16HFmode:
2647 25334908 : case E_V16BFmode:
2648 25334908 : case E_V16HImode:
2649 25334908 : case E_V4DFmode:
2650 25334908 : case E_V4DImode:
2651 25334908 : classes[0] = X86_64_SSE_CLASS;
2652 25334908 : classes[1] = X86_64_SSEUP_CLASS;
2653 25334908 : classes[2] = X86_64_SSEUP_CLASS;
2654 25334908 : classes[3] = X86_64_SSEUP_CLASS;
2655 25334908 : return 4;
2656 27469792 : case E_V8DFmode:
2657 27469792 : case E_V16SFmode:
2658 27469792 : case E_V32HFmode:
2659 27469792 : case E_V32BFmode:
2660 27469792 : case E_V8DImode:
2661 27469792 : case E_V16SImode:
2662 27469792 : case E_V32HImode:
2663 27469792 : case E_V64QImode:
2664 27469792 : classes[0] = X86_64_SSE_CLASS;
2665 27469792 : classes[1] = X86_64_SSEUP_CLASS;
2666 27469792 : classes[2] = X86_64_SSEUP_CLASS;
2667 27469792 : classes[3] = X86_64_SSEUP_CLASS;
2668 27469792 : classes[4] = X86_64_SSEUP_CLASS;
2669 27469792 : classes[5] = X86_64_SSEUP_CLASS;
2670 27469792 : classes[6] = X86_64_SSEUP_CLASS;
2671 27469792 : classes[7] = X86_64_SSEUP_CLASS;
2672 27469792 : return 8;
2673 37342797 : case E_V4SFmode:
2674 37342797 : case E_V4SImode:
2675 37342797 : case E_V16QImode:
2676 37342797 : case E_V8HImode:
2677 37342797 : case E_V8HFmode:
2678 37342797 : case E_V8BFmode:
2679 37342797 : case E_V2DFmode:
2680 37342797 : case E_V2DImode:
2681 37342797 : classes[0] = X86_64_SSE_CLASS;
2682 37342797 : classes[1] = X86_64_SSEUP_CLASS;
2683 37342797 : return 2;
2684 3263874 : case E_V1TImode:
2685 3263874 : case E_V1DImode:
2686 3263874 : case E_V2SFmode:
2687 3263874 : case E_V2SImode:
2688 3263874 : case E_V4HImode:
2689 3263874 : case E_V4HFmode:
2690 3263874 : case E_V4BFmode:
2691 3263874 : case E_V2HFmode:
2692 3263874 : case E_V2BFmode:
2693 3263874 : case E_V8QImode:
2694 3263874 : classes[0] = X86_64_SSE_CLASS;
2695 3263874 : return 1;
2696 : case E_BLKmode:
2697 : case E_VOIDmode:
2698 : return 0;
2699 45148 : default:
2700 45148 : gcc_assert (VECTOR_MODE_P (mode));
2701 :
2702 45148 : if (bytes > 16)
2703 : return 0;
2704 :
2705 60568 : gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2706 :
2707 60568 : if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2708 29850 : classes[0] = X86_64_INTEGERSI_CLASS;
2709 : else
2710 434 : classes[0] = X86_64_INTEGER_CLASS;
2711 30284 : classes[1] = X86_64_INTEGER_CLASS;
2712 30284 : return 1 + (bytes > 8);
2713 : }
2714 : }
2715 :
2716 : /* Wrapper around classify_argument with the extra zero_width_bitfields
2717 : argument, to diagnose GCC 12.1 ABI differences for C. */
2718 :
2719 : static int
2720 339123718 : classify_argument (machine_mode mode, const_tree type,
2721 : enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2722 : {
2723 339123718 : int zero_width_bitfields = 0;
2724 339123718 : static bool warned = false;
2725 339123718 : int n = classify_argument (mode, type, classes, bit_offset,
2726 : zero_width_bitfields);
2727 339123718 : if (!zero_width_bitfields || warned || !warn_psabi)
2728 : return n;
2729 534 : enum x86_64_reg_class alt_classes[MAX_CLASSES];
2730 534 : zero_width_bitfields = 2;
2731 534 : if (classify_argument (mode, type, alt_classes, bit_offset,
2732 : zero_width_bitfields) != n)
2733 0 : zero_width_bitfields = 3;
2734 : else
2735 1286 : for (int i = 0; i < n; i++)
2736 760 : if (classes[i] != alt_classes[i])
2737 : {
2738 8 : zero_width_bitfields = 3;
2739 8 : break;
2740 : }
2741 534 : if (zero_width_bitfields == 3)
2742 : {
2743 8 : warned = true;
2744 8 : const char *url
2745 : = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2746 :
2747 8 : inform (input_location,
2748 : "the ABI of passing C structures with zero-width bit-fields"
2749 : " has changed in GCC %{12.1%}", url);
2750 : }
2751 : return n;
2752 : }
2753 :
2754 : /* Examine the argument and return set number of register required in each
2755 : class. Return true iff parameter should be passed in memory. */
2756 :
2757 : static bool
2758 229459165 : examine_argument (machine_mode mode, const_tree type, bool in_return,
2759 : int *int_nregs, int *sse_nregs)
2760 : {
2761 229459165 : enum x86_64_reg_class regclass[MAX_CLASSES];
2762 229459165 : int n = classify_argument (mode, type, regclass, 0);
2763 :
2764 229459165 : *int_nregs = 0;
2765 229459165 : *sse_nregs = 0;
2766 :
2767 229459165 : if (!n)
2768 : return true;
2769 664735932 : for (n--; n >= 0; n--)
2770 440756439 : switch (regclass[n])
2771 : {
2772 152366192 : case X86_64_INTEGER_CLASS:
2773 152366192 : case X86_64_INTEGERSI_CLASS:
2774 152366192 : (*int_nregs)++;
2775 152366192 : break;
2776 74405590 : case X86_64_SSE_CLASS:
2777 74405590 : case X86_64_SSEHF_CLASS:
2778 74405590 : case X86_64_SSESF_CLASS:
2779 74405590 : case X86_64_SSEDF_CLASS:
2780 74405590 : (*sse_nregs)++;
2781 74405590 : break;
2782 : case X86_64_NO_CLASS:
2783 : case X86_64_SSEUP_CLASS:
2784 : break;
2785 9430636 : case X86_64_X87_CLASS:
2786 9430636 : case X86_64_X87UP_CLASS:
2787 9430636 : case X86_64_COMPLEX_X87_CLASS:
2788 9430636 : if (!in_return)
2789 : return true;
2790 : break;
2791 0 : case X86_64_MEMORY_CLASS:
2792 0 : gcc_unreachable ();
2793 : }
2794 :
2795 : return false;
2796 : }
2797 :
2798 : /* Construct container for the argument used by GCC interface. See
2799 : FUNCTION_ARG for the detailed description. */
2800 :
2801 : static rtx
2802 111281221 : construct_container (machine_mode mode, machine_mode orig_mode,
2803 : const_tree type, bool in_return, int nintregs,
2804 : int nsseregs, const int *intreg, int sse_regno)
2805 : {
2806 : /* The following variables hold the static issued_error state. */
2807 111281221 : static bool issued_sse_arg_error;
2808 111281221 : static bool issued_sse_ret_error;
2809 111281221 : static bool issued_x87_ret_error;
2810 :
2811 111281221 : machine_mode tmpmode;
2812 111281221 : int bytes
2813 221885169 : = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2814 111281221 : enum x86_64_reg_class regclass[MAX_CLASSES];
2815 111281221 : int n;
2816 111281221 : int i;
2817 111281221 : int nexps = 0;
2818 111281221 : int needed_sseregs, needed_intregs;
2819 111281221 : rtx exp[MAX_CLASSES];
2820 111281221 : rtx ret;
2821 :
2822 111281221 : if (examine_argument (mode, type, in_return, &needed_intregs,
2823 : &needed_sseregs))
2824 : return NULL;
2825 :
2826 110769303 : if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2827 : return NULL;
2828 :
2829 : /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2830 : some less clueful developer tries to use floating-point anyway. */
2831 109664624 : if (needed_sseregs
2832 36767966 : && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2833 : {
2834 : /* Return early if we shouldn't raise an error for invalid
2835 : calls. */
2836 71 : if (cfun != NULL && cfun->machine->silent_p)
2837 : return NULL;
2838 39 : if (in_return)
2839 : {
2840 34 : if (!issued_sse_ret_error)
2841 : {
2842 16 : if (VALID_SSE2_TYPE_MODE (mode))
2843 5 : error ("SSE register return with SSE2 disabled");
2844 : else
2845 11 : error ("SSE register return with SSE disabled");
2846 16 : issued_sse_ret_error = true;
2847 : }
2848 : }
2849 5 : else if (!issued_sse_arg_error)
2850 : {
2851 5 : if (VALID_SSE2_TYPE_MODE (mode))
2852 0 : error ("SSE register argument with SSE2 disabled");
2853 : else
2854 5 : error ("SSE register argument with SSE disabled");
2855 5 : issued_sse_arg_error = true;
2856 : }
2857 39 : return NULL;
2858 : }
2859 :
2860 109664553 : n = classify_argument (mode, type, regclass, 0);
2861 109664553 : gcc_assert (n);
2862 :
2863 : /* Likewise, error if the ABI requires us to return values in the
2864 : x87 registers and the user specified -mno-80387. */
2865 109664553 : if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2866 1424666 : for (i = 0; i < n; i++)
2867 751944 : if (regclass[i] == X86_64_X87_CLASS
2868 : || regclass[i] == X86_64_X87UP_CLASS
2869 751944 : || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2870 : {
2871 : /* Return early if we shouldn't raise an error for invalid
2872 : calls. */
2873 16 : if (cfun != NULL && cfun->machine->silent_p)
2874 : return NULL;
2875 13 : if (!issued_x87_ret_error)
2876 : {
2877 8 : error ("x87 register return with x87 disabled");
2878 8 : issued_x87_ret_error = true;
2879 : }
2880 13 : return NULL;
2881 : }
2882 :
2883 : /* First construct simple cases. Avoid SCmode, since we want to use
2884 : single register to pass this type. */
2885 109664537 : if (n == 1 && mode != SCmode && mode != HCmode)
2886 72833863 : switch (regclass[0])
2887 : {
2888 66790454 : case X86_64_INTEGER_CLASS:
2889 66790454 : case X86_64_INTEGERSI_CLASS:
2890 66790454 : return gen_rtx_REG (mode, intreg[0]);
2891 5843099 : case X86_64_SSE_CLASS:
2892 5843099 : case X86_64_SSEHF_CLASS:
2893 5843099 : case X86_64_SSESF_CLASS:
2894 5843099 : case X86_64_SSEDF_CLASS:
2895 5843099 : if (mode != BLKmode)
2896 11685390 : return gen_reg_or_parallel (mode, orig_mode,
2897 11685390 : GET_SSE_REGNO (sse_regno));
2898 : break;
2899 172007 : case X86_64_X87_CLASS:
2900 172007 : case X86_64_COMPLEX_X87_CLASS:
2901 172007 : return gen_rtx_REG (mode, FIRST_STACK_REG);
2902 : case X86_64_NO_CLASS:
2903 : /* Zero sized array, struct or class. */
2904 : return NULL;
2905 0 : default:
2906 0 : gcc_unreachable ();
2907 : }
2908 36831078 : if (n == 2
2909 19036943 : && regclass[0] == X86_64_SSE_CLASS
2910 12881589 : && regclass[1] == X86_64_SSEUP_CLASS
2911 12876434 : && mode != BLKmode)
2912 25752868 : return gen_reg_or_parallel (mode, orig_mode,
2913 25752868 : GET_SSE_REGNO (sse_regno));
2914 23954644 : if (n == 4
2915 8428958 : && regclass[0] == X86_64_SSE_CLASS
2916 8428958 : && regclass[1] == X86_64_SSEUP_CLASS
2917 8428958 : && regclass[2] == X86_64_SSEUP_CLASS
2918 8428958 : && regclass[3] == X86_64_SSEUP_CLASS
2919 8428958 : && mode != BLKmode)
2920 16854538 : return gen_reg_or_parallel (mode, orig_mode,
2921 16854538 : GET_SSE_REGNO (sse_regno));
2922 15527375 : if (n == 8
2923 9126513 : && regclass[0] == X86_64_SSE_CLASS
2924 9126513 : && regclass[1] == X86_64_SSEUP_CLASS
2925 9126513 : && regclass[2] == X86_64_SSEUP_CLASS
2926 9126513 : && regclass[3] == X86_64_SSEUP_CLASS
2927 9126513 : && regclass[4] == X86_64_SSEUP_CLASS
2928 9126513 : && regclass[5] == X86_64_SSEUP_CLASS
2929 9126513 : && regclass[6] == X86_64_SSEUP_CLASS
2930 9126513 : && regclass[7] == X86_64_SSEUP_CLASS
2931 9126513 : && mode != BLKmode)
2932 18248754 : return gen_reg_or_parallel (mode, orig_mode,
2933 18248754 : GET_SSE_REGNO (sse_regno));
2934 6402998 : if (n == 2
2935 6160509 : && regclass[0] == X86_64_X87_CLASS
2936 2252451 : && regclass[1] == X86_64_X87UP_CLASS)
2937 2252451 : return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2938 :
2939 4150547 : if (n == 2
2940 3908058 : && regclass[0] == X86_64_INTEGER_CLASS
2941 3483885 : && regclass[1] == X86_64_INTEGER_CLASS
2942 3475587 : && (mode == CDImode || mode == TImode || mode == BLKmode)
2943 3475587 : && intreg[0] + 1 == intreg[1])
2944 : {
2945 3158305 : if (mode == BLKmode)
2946 : {
2947 : /* Use TImode for BLKmode values in 2 integer registers. */
2948 505084 : exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2949 252542 : gen_rtx_REG (TImode, intreg[0]),
2950 : GEN_INT (0));
2951 252542 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2952 252542 : XVECEXP (ret, 0, 0) = exp[0];
2953 252542 : return ret;
2954 : }
2955 : else
2956 2905763 : return gen_rtx_REG (mode, intreg[0]);
2957 : }
2958 :
2959 : /* Otherwise figure out the entries of the PARALLEL. */
2960 2734237 : for (i = 0; i < n; i++)
2961 : {
2962 1741995 : int pos;
2963 :
2964 1741995 : switch (regclass[i])
2965 : {
2966 : case X86_64_NO_CLASS:
2967 : break;
2968 993636 : case X86_64_INTEGER_CLASS:
2969 993636 : case X86_64_INTEGERSI_CLASS:
2970 : /* Merge TImodes on aligned occasions here too. */
2971 993636 : if (i * 8 + 8 > bytes)
2972 : {
2973 3233 : unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2974 3233 : if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2975 : /* We've requested 24 bytes we
2976 : don't have mode for. Use DImode. */
2977 357 : tmpmode = DImode;
2978 : }
2979 990403 : else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2980 : tmpmode = SImode;
2981 : else
2982 816460 : tmpmode = DImode;
2983 1987272 : exp [nexps++]
2984 993636 : = gen_rtx_EXPR_LIST (VOIDmode,
2985 993636 : gen_rtx_REG (tmpmode, *intreg),
2986 993636 : GEN_INT (i*8));
2987 993636 : intreg++;
2988 993636 : break;
2989 592 : case X86_64_SSEHF_CLASS:
2990 592 : tmpmode = (mode == BFmode ? BFmode : HFmode);
2991 1184 : exp [nexps++]
2992 1184 : = gen_rtx_EXPR_LIST (VOIDmode,
2993 : gen_rtx_REG (tmpmode,
2994 592 : GET_SSE_REGNO (sse_regno)),
2995 592 : GEN_INT (i*8));
2996 592 : sse_regno++;
2997 592 : break;
2998 3007 : case X86_64_SSESF_CLASS:
2999 6014 : exp [nexps++]
3000 6014 : = gen_rtx_EXPR_LIST (VOIDmode,
3001 : gen_rtx_REG (SFmode,
3002 3007 : GET_SSE_REGNO (sse_regno)),
3003 3007 : GEN_INT (i*8));
3004 3007 : sse_regno++;
3005 3007 : break;
3006 484151 : case X86_64_SSEDF_CLASS:
3007 968302 : exp [nexps++]
3008 968302 : = gen_rtx_EXPR_LIST (VOIDmode,
3009 : gen_rtx_REG (DFmode,
3010 484151 : GET_SSE_REGNO (sse_regno)),
3011 484151 : GEN_INT (i*8));
3012 484151 : sse_regno++;
3013 484151 : break;
3014 252403 : case X86_64_SSE_CLASS:
3015 252403 : pos = i;
3016 252403 : switch (n)
3017 : {
3018 : case 1:
3019 : tmpmode = DImode;
3020 : break;
3021 10128 : case 2:
3022 10128 : if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
3023 : {
3024 0 : tmpmode = TImode;
3025 0 : i++;
3026 : }
3027 : else
3028 : tmpmode = DImode;
3029 : break;
3030 1689 : case 4:
3031 1689 : gcc_assert (i == 0
3032 : && regclass[1] == X86_64_SSEUP_CLASS
3033 : && regclass[2] == X86_64_SSEUP_CLASS
3034 : && regclass[3] == X86_64_SSEUP_CLASS);
3035 : tmpmode = OImode;
3036 : i += 3;
3037 : break;
3038 2136 : case 8:
3039 2136 : gcc_assert (i == 0
3040 : && regclass[1] == X86_64_SSEUP_CLASS
3041 : && regclass[2] == X86_64_SSEUP_CLASS
3042 : && regclass[3] == X86_64_SSEUP_CLASS
3043 : && regclass[4] == X86_64_SSEUP_CLASS
3044 : && regclass[5] == X86_64_SSEUP_CLASS
3045 : && regclass[6] == X86_64_SSEUP_CLASS
3046 : && regclass[7] == X86_64_SSEUP_CLASS);
3047 : tmpmode = XImode;
3048 : i += 7;
3049 : break;
3050 0 : default:
3051 0 : gcc_unreachable ();
3052 : }
3053 504806 : exp [nexps++]
3054 504806 : = gen_rtx_EXPR_LIST (VOIDmode,
3055 : gen_rtx_REG (tmpmode,
3056 252403 : GET_SSE_REGNO (sse_regno)),
3057 252403 : GEN_INT (pos*8));
3058 252403 : sse_regno++;
3059 252403 : break;
3060 0 : default:
3061 0 : gcc_unreachable ();
3062 : }
3063 : }
3064 :
3065 : /* Empty aligned struct, union or class. */
3066 992242 : if (nexps == 0)
3067 : return NULL;
3068 :
3069 991987 : ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3070 2725776 : for (i = 0; i < nexps; i++)
3071 1733789 : XVECEXP (ret, 0, i) = exp [i];
3072 : return ret;
3073 : }
3074 :
3075 : /* Update the data in CUM to advance over an argument of mode MODE
3076 : and data type TYPE. (TYPE is null for libcalls where that information
3077 : may not be available.)
3078 :
3079 : Return a number of integer regsiters advanced over. */
3080 :
3081 : static int
3082 2129938 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3083 : const_tree type, HOST_WIDE_INT bytes,
3084 : HOST_WIDE_INT words)
3085 : {
3086 2129938 : int res = 0;
3087 2129938 : bool error_p = false;
3088 :
3089 2129938 : if (TARGET_IAMCU)
3090 : {
3091 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3092 : bytes in registers. */
3093 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3094 0 : goto pass_in_reg;
3095 : return res;
3096 : }
3097 :
3098 2129938 : switch (mode)
3099 : {
3100 : default:
3101 : break;
3102 :
3103 93818 : case E_BLKmode:
3104 93818 : if (bytes < 0)
3105 : break;
3106 : /* FALLTHRU */
3107 :
3108 2093082 : case E_DImode:
3109 2093082 : case E_SImode:
3110 2093082 : case E_HImode:
3111 2093082 : case E_QImode:
3112 93818 : pass_in_reg:
3113 2093082 : cum->words += words;
3114 2093082 : cum->nregs -= words;
3115 2093082 : cum->regno += words;
3116 2093082 : if (cum->nregs >= 0)
3117 47363 : res = words;
3118 2093082 : if (cum->nregs <= 0)
3119 : {
3120 2058951 : cum->nregs = 0;
3121 2058951 : cfun->machine->arg_reg_available = false;
3122 2058951 : cum->regno = 0;
3123 : }
3124 : break;
3125 :
3126 0 : case E_OImode:
3127 : /* OImode shouldn't be used directly. */
3128 0 : gcc_unreachable ();
3129 :
3130 4744 : case E_DFmode:
3131 4744 : if (cum->float_in_sse == -1)
3132 0 : error_p = true;
3133 4744 : if (cum->float_in_sse < 2)
3134 : break;
3135 : /* FALLTHRU */
3136 1360 : case E_SFmode:
3137 1360 : if (cum->float_in_sse == -1)
3138 0 : error_p = true;
3139 1360 : if (cum->float_in_sse < 1)
3140 : break;
3141 : /* FALLTHRU */
3142 :
3143 52 : case E_V16HFmode:
3144 52 : case E_V16BFmode:
3145 52 : case E_V8SFmode:
3146 52 : case E_V8SImode:
3147 52 : case E_V64QImode:
3148 52 : case E_V32HImode:
3149 52 : case E_V16SImode:
3150 52 : case E_V8DImode:
3151 52 : case E_V32HFmode:
3152 52 : case E_V32BFmode:
3153 52 : case E_V16SFmode:
3154 52 : case E_V8DFmode:
3155 52 : case E_V32QImode:
3156 52 : case E_V16HImode:
3157 52 : case E_V4DFmode:
3158 52 : case E_V4DImode:
3159 52 : case E_TImode:
3160 52 : case E_V16QImode:
3161 52 : case E_V8HImode:
3162 52 : case E_V4SImode:
3163 52 : case E_V2DImode:
3164 52 : case E_V8HFmode:
3165 52 : case E_V8BFmode:
3166 52 : case E_V4SFmode:
3167 52 : case E_V2DFmode:
3168 52 : if (!type || !AGGREGATE_TYPE_P (type))
3169 : {
3170 52 : cum->sse_words += words;
3171 52 : cum->sse_nregs -= 1;
3172 52 : cum->sse_regno += 1;
3173 52 : if (cum->sse_nregs <= 0)
3174 : {
3175 4 : cum->sse_nregs = 0;
3176 4 : cum->sse_regno = 0;
3177 : }
3178 : }
3179 : break;
3180 :
3181 16 : case E_V8QImode:
3182 16 : case E_V4HImode:
3183 16 : case E_V4HFmode:
3184 16 : case E_V4BFmode:
3185 16 : case E_V2SImode:
3186 16 : case E_V2SFmode:
3187 16 : case E_V1TImode:
3188 16 : case E_V1DImode:
3189 16 : if (!type || !AGGREGATE_TYPE_P (type))
3190 : {
3191 16 : cum->mmx_words += words;
3192 16 : cum->mmx_nregs -= 1;
3193 16 : cum->mmx_regno += 1;
3194 16 : if (cum->mmx_nregs <= 0)
3195 : {
3196 0 : cum->mmx_nregs = 0;
3197 0 : cum->mmx_regno = 0;
3198 : }
3199 : }
3200 : break;
3201 : }
3202 2065107 : if (error_p)
3203 : {
3204 0 : cum->float_in_sse = 0;
3205 0 : error ("calling %qD with SSE calling convention without "
3206 : "SSE/SSE2 enabled", cum->decl);
3207 0 : sorry ("this is a GCC bug that can be worked around by adding "
3208 : "attribute used to function called");
3209 : }
3210 :
3211 : return res;
3212 : }
3213 :
3214 : static int
3215 19030667 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3216 : const_tree type, HOST_WIDE_INT words, bool named)
3217 : {
3218 19030667 : int int_nregs, sse_nregs;
3219 :
3220 : /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3221 19030667 : if (!named && (VALID_AVX512F_REG_MODE (mode)
3222 : || VALID_AVX256_REG_MODE (mode)))
3223 : return 0;
3224 :
3225 19030303 : if (!examine_argument (mode, type, false, &int_nregs, &sse_nregs)
3226 19030303 : && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3227 : {
3228 16752900 : cum->nregs -= int_nregs;
3229 16752900 : cum->sse_nregs -= sse_nregs;
3230 16752900 : cum->regno += int_nregs;
3231 16752900 : cum->sse_regno += sse_nregs;
3232 16752900 : return int_nregs;
3233 : }
3234 : else
3235 : {
3236 2277403 : int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3237 2277403 : cum->words = ROUND_UP (cum->words, align);
3238 2277403 : cum->words += words;
3239 2277403 : return 0;
3240 : }
3241 : }
3242 :
3243 : static int
3244 447161 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3245 : HOST_WIDE_INT words)
3246 : {
3247 : /* Otherwise, this should be passed indirect. */
3248 447161 : gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3249 :
3250 447161 : cum->words += words;
3251 447161 : if (cum->nregs > 0)
3252 : {
3253 289519 : cum->nregs -= 1;
3254 289519 : cum->regno += 1;
3255 289519 : return 1;
3256 : }
3257 : return 0;
3258 : }
3259 :
3260 : /* Update the data in CUM to advance over argument ARG. */
3261 :
3262 : static void
3263 21608133 : ix86_function_arg_advance (cumulative_args_t cum_v,
3264 : const function_arg_info &arg)
3265 : {
3266 21608133 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3267 21608133 : machine_mode mode = arg.mode;
3268 21608133 : HOST_WIDE_INT bytes, words;
3269 21608133 : int nregs;
3270 :
3271 : /* The argument of interrupt handler is a special case and is
3272 : handled in ix86_function_arg. */
3273 21608133 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3274 : return;
3275 :
3276 21607766 : bytes = arg.promoted_size_in_bytes ();
3277 21607766 : words = CEIL (bytes, UNITS_PER_WORD);
3278 :
3279 21607766 : if (arg.type)
3280 21294350 : mode = type_natural_mode (arg.type, NULL, false);
3281 :
3282 21607766 : if (TARGET_64BIT)
3283 : {
3284 19477828 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3285 :
3286 19477828 : if (call_abi == MS_ABI)
3287 447161 : nregs = function_arg_advance_ms_64 (cum, bytes, words);
3288 : else
3289 19030667 : nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3290 19030667 : arg.named);
3291 : }
3292 : else
3293 2129938 : nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3294 :
3295 21607766 : if (!nregs)
3296 : {
3297 : /* Track if there are outgoing arguments on stack. */
3298 5707969 : if (cum->caller)
3299 2718086 : cfun->machine->outgoing_args_on_stack = true;
3300 : }
3301 : }
3302 :
3303 : /* Define where to put the arguments to a function.
3304 : Value is zero to push the argument on the stack,
3305 : or a hard register in which to store the argument.
3306 :
3307 : MODE is the argument's machine mode.
3308 : TYPE is the data type of the argument (as a tree).
3309 : This is null for libcalls where that information may
3310 : not be available.
3311 : CUM is a variable of type CUMULATIVE_ARGS which gives info about
3312 : the preceding args and about the function being called.
3313 : NAMED is nonzero if this argument is a named parameter
3314 : (otherwise it is an extra parameter matching an ellipsis). */
3315 :
3316 : static rtx
3317 2556734 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3318 : machine_mode orig_mode, const_tree type,
3319 : HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3320 : {
3321 2556734 : bool error_p = false;
3322 :
3323 : /* Avoid the AL settings for the Unix64 ABI. */
3324 2556734 : if (mode == VOIDmode)
3325 741977 : return constm1_rtx;
3326 :
3327 1814757 : if (TARGET_IAMCU)
3328 : {
3329 : /* Intel MCU psABI passes scalars and aggregates no larger than 8
3330 : bytes in registers. */
3331 0 : if (!VECTOR_MODE_P (mode) && bytes <= 8)
3332 0 : goto pass_in_reg;
3333 : return NULL_RTX;
3334 : }
3335 :
3336 1814757 : switch (mode)
3337 : {
3338 : default:
3339 : break;
3340 :
3341 77786 : case E_BLKmode:
3342 77786 : if (bytes < 0)
3343 : break;
3344 : /* FALLTHRU */
3345 1781186 : case E_DImode:
3346 1781186 : case E_SImode:
3347 1781186 : case E_HImode:
3348 1781186 : case E_QImode:
3349 77786 : pass_in_reg:
3350 1781186 : if (words <= cum->nregs)
3351 : {
3352 45531 : int regno = cum->regno;
3353 :
3354 : /* Fastcall allocates the first two DWORD (SImode) or
3355 : smaller arguments to ECX and EDX if it isn't an
3356 : aggregate type . */
3357 45531 : if (cum->fastcall)
3358 : {
3359 6 : if (mode == BLKmode
3360 6 : || mode == DImode
3361 6 : || (type && AGGREGATE_TYPE_P (type)))
3362 : break;
3363 :
3364 : /* ECX not EAX is the first allocated register. */
3365 6 : if (regno == AX_REG)
3366 45531 : regno = CX_REG;
3367 : }
3368 45531 : return gen_rtx_REG (mode, regno);
3369 : }
3370 : break;
3371 :
3372 3354 : case E_DFmode:
3373 3354 : if (cum->float_in_sse == -1)
3374 0 : error_p = true;
3375 3354 : if (cum->float_in_sse < 2)
3376 : break;
3377 : /* FALLTHRU */
3378 960 : case E_SFmode:
3379 960 : if (cum->float_in_sse == -1)
3380 0 : error_p = true;
3381 960 : if (cum->float_in_sse < 1)
3382 : break;
3383 : /* FALLTHRU */
3384 12 : case E_TImode:
3385 : /* In 32bit, we pass TImode in xmm registers. */
3386 12 : case E_V16QImode:
3387 12 : case E_V8HImode:
3388 12 : case E_V4SImode:
3389 12 : case E_V2DImode:
3390 12 : case E_V8HFmode:
3391 12 : case E_V8BFmode:
3392 12 : case E_V4SFmode:
3393 12 : case E_V2DFmode:
3394 12 : if (!type || !AGGREGATE_TYPE_P (type))
3395 : {
3396 12 : if (cum->sse_nregs)
3397 12 : return gen_reg_or_parallel (mode, orig_mode,
3398 12 : cum->sse_regno + FIRST_SSE_REG);
3399 : }
3400 : break;
3401 :
3402 0 : case E_OImode:
3403 0 : case E_XImode:
3404 : /* OImode and XImode shouldn't be used directly. */
3405 0 : gcc_unreachable ();
3406 :
3407 9 : case E_V64QImode:
3408 9 : case E_V32HImode:
3409 9 : case E_V16SImode:
3410 9 : case E_V8DImode:
3411 9 : case E_V32HFmode:
3412 9 : case E_V32BFmode:
3413 9 : case E_V16SFmode:
3414 9 : case E_V8DFmode:
3415 9 : case E_V16HFmode:
3416 9 : case E_V16BFmode:
3417 9 : case E_V8SFmode:
3418 9 : case E_V8SImode:
3419 9 : case E_V32QImode:
3420 9 : case E_V16HImode:
3421 9 : case E_V4DFmode:
3422 9 : case E_V4DImode:
3423 9 : if (!type || !AGGREGATE_TYPE_P (type))
3424 : {
3425 9 : if (cum->sse_nregs)
3426 9 : return gen_reg_or_parallel (mode, orig_mode,
3427 9 : cum->sse_regno + FIRST_SSE_REG);
3428 : }
3429 : break;
3430 :
3431 8 : case E_V8QImode:
3432 8 : case E_V4HImode:
3433 8 : case E_V4HFmode:
3434 8 : case E_V4BFmode:
3435 8 : case E_V2SImode:
3436 8 : case E_V2SFmode:
3437 8 : case E_V1TImode:
3438 8 : case E_V1DImode:
3439 8 : if (!type || !AGGREGATE_TYPE_P (type))
3440 : {
3441 8 : if (cum->mmx_nregs)
3442 8 : return gen_reg_or_parallel (mode, orig_mode,
3443 8 : cum->mmx_regno + FIRST_MMX_REG);
3444 : }
3445 : break;
3446 : }
3447 4314 : if (error_p)
3448 : {
3449 0 : cum->float_in_sse = 0;
3450 0 : error ("calling %qD with SSE calling convention without "
3451 : "SSE/SSE2 enabled", cum->decl);
3452 0 : sorry ("this is a GCC bug that can be worked around by adding "
3453 : "attribute used to function called");
3454 : }
3455 :
3456 : return NULL_RTX;
3457 : }
3458 :
3459 : static rtx
3460 18720218 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3461 : machine_mode orig_mode, const_tree type, bool named)
3462 : {
3463 : /* Handle a hidden AL argument containing number of registers
3464 : for varargs x86-64 functions. */
3465 18720218 : if (mode == VOIDmode)
3466 5202168 : return GEN_INT (cum->maybe_vaarg
3467 : ? (cum->sse_nregs < 0
3468 : ? X86_64_SSE_REGPARM_MAX
3469 : : cum->sse_regno)
3470 : : -1);
3471 :
3472 13518050 : switch (mode)
3473 : {
3474 : default:
3475 : break;
3476 :
3477 90175 : case E_V16HFmode:
3478 90175 : case E_V16BFmode:
3479 90175 : case E_V8SFmode:
3480 90175 : case E_V8SImode:
3481 90175 : case E_V32QImode:
3482 90175 : case E_V16HImode:
3483 90175 : case E_V4DFmode:
3484 90175 : case E_V4DImode:
3485 90175 : case E_V32HFmode:
3486 90175 : case E_V32BFmode:
3487 90175 : case E_V16SFmode:
3488 90175 : case E_V16SImode:
3489 90175 : case E_V64QImode:
3490 90175 : case E_V32HImode:
3491 90175 : case E_V8DFmode:
3492 90175 : case E_V8DImode:
3493 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3494 90175 : if (!named)
3495 : return NULL;
3496 : break;
3497 : }
3498 :
3499 13517686 : const int *parm_regs;
3500 13517686 : if (cum->preserve_none_abi)
3501 : parm_regs = x86_64_preserve_none_int_parameter_registers;
3502 : else
3503 13517557 : parm_regs = x86_64_int_parameter_registers;
3504 :
3505 13517686 : return construct_container (mode, orig_mode, type, false,
3506 13517686 : cum->nregs, cum->sse_nregs,
3507 13517686 : &parm_regs[cum->regno],
3508 13517686 : cum->sse_regno);
3509 : }
3510 :
3511 : static rtx
3512 296428 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3513 : machine_mode orig_mode, bool named, const_tree type,
3514 : HOST_WIDE_INT bytes)
3515 : {
3516 296428 : unsigned int regno;
3517 :
3518 : /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3519 : We use value of -2 to specify that current function call is MSABI. */
3520 296428 : if (mode == VOIDmode)
3521 36295 : return GEN_INT (-2);
3522 :
3523 : /* If we've run out of registers, it goes on the stack. */
3524 260133 : if (cum->nregs == 0)
3525 : return NULL_RTX;
3526 :
3527 176374 : regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3528 :
3529 : /* Only floating point modes less than 64 bits are passed in anything but
3530 : integer regs. Larger floating point types are excluded as the Windows
3531 : ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
3532 176374 : if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
3533 : {
3534 38260 : if (named)
3535 : {
3536 38260 : if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3537 37263 : regno = cum->regno + FIRST_SSE_REG;
3538 : }
3539 : else
3540 : {
3541 0 : rtx t1, t2;
3542 :
3543 : /* Unnamed floating parameters are passed in both the
3544 : SSE and integer registers. */
3545 0 : t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3546 0 : t2 = gen_rtx_REG (mode, regno);
3547 0 : t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3548 0 : t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3549 0 : return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3550 : }
3551 : }
3552 : /* Handle aggregated types passed in register. */
3553 176374 : if (orig_mode == BLKmode)
3554 : {
3555 0 : if (bytes > 0 && bytes <= 8)
3556 0 : mode = (bytes > 4 ? DImode : SImode);
3557 0 : if (mode == BLKmode)
3558 0 : mode = DImode;
3559 : }
3560 :
3561 176374 : return gen_reg_or_parallel (mode, orig_mode, regno);
3562 : }
3563 :
3564 : /* Return where to put the arguments to a function.
3565 : Return zero to push the argument on the stack, or a hard register in which to store the argument.
3566 :
3567 : ARG describes the argument while CUM gives information about the
3568 : preceding args and about the function being called. */
3569 :
3570 : static rtx
3571 21573567 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3572 : {
3573 21573567 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3574 21573567 : machine_mode mode = arg.mode;
3575 21573567 : HOST_WIDE_INT bytes, words;
3576 21573567 : rtx reg;
3577 :
3578 21573567 : if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3579 : {
3580 187 : gcc_assert (arg.type != NULL_TREE);
3581 187 : if (POINTER_TYPE_P (arg.type))
3582 : {
3583 : /* This is the pointer argument. */
3584 122 : gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3585 : /* It is at -WORD(AP) in the current frame in interrupt and
3586 : exception handlers. */
3587 122 : reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3588 : }
3589 : else
3590 : {
3591 65 : gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3592 : && TREE_CODE (arg.type) == INTEGER_TYPE
3593 : && TYPE_MODE (arg.type) == word_mode);
3594 : /* The error code is the word-mode integer argument at
3595 : -2 * WORD(AP) in the current frame of the exception
3596 : handler. */
3597 65 : reg = gen_rtx_MEM (word_mode,
3598 65 : plus_constant (Pmode,
3599 : arg_pointer_rtx,
3600 65 : -2 * UNITS_PER_WORD));
3601 : }
3602 187 : return reg;
3603 : }
3604 :
3605 21573380 : bytes = arg.promoted_size_in_bytes ();
3606 21573380 : words = CEIL (bytes, UNITS_PER_WORD);
3607 :
3608 : /* To simplify the code below, represent vector types with a vector mode
3609 : even if MMX/SSE are not active. */
3610 21573380 : if (arg.type && VECTOR_TYPE_P (arg.type))
3611 171255 : mode = type_natural_mode (arg.type, cum, false);
3612 :
3613 21573380 : if (TARGET_64BIT)
3614 : {
3615 19016646 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3616 :
3617 19016646 : if (call_abi == MS_ABI)
3618 296428 : reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3619 296428 : arg.type, bytes);
3620 : else
3621 18720218 : reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3622 : }
3623 : else
3624 2556734 : reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3625 :
3626 : /* Track if there are outgoing arguments on stack. */
3627 21573380 : if (reg == NULL_RTX && cum->caller)
3628 2200033 : cfun->machine->outgoing_args_on_stack = true;
3629 :
3630 : return reg;
3631 : }
3632 :
3633 : /* A C expression that indicates when an argument must be passed by
3634 : reference. If nonzero for an argument, a copy of that argument is
3635 : made in memory and a pointer to the argument is passed instead of
3636 : the argument itself. The pointer is passed in whatever way is
3637 : appropriate for passing a pointer to that type. */
3638 :
3639 : static bool
3640 21528806 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3641 : {
3642 21528806 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3643 :
3644 21528806 : if (TARGET_64BIT)
3645 : {
3646 19409126 : enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3647 :
3648 : /* See Windows x64 Software Convention. */
3649 19409126 : if (call_abi == MS_ABI)
3650 : {
3651 441562 : HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3652 :
3653 441562 : if (tree type = arg.type)
3654 : {
3655 : /* Arrays are passed by reference. */
3656 441562 : if (TREE_CODE (type) == ARRAY_TYPE)
3657 : return true;
3658 :
3659 441562 : if (RECORD_OR_UNION_TYPE_P (type))
3660 : {
3661 : /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3662 : are passed by reference. */
3663 15103 : msize = int_size_in_bytes (type);
3664 : }
3665 : }
3666 :
3667 : /* __m128 is passed by reference. */
3668 873131 : return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3669 : }
3670 18967564 : else if (arg.type && int_size_in_bytes (arg.type) == -1)
3671 : return true;
3672 : }
3673 :
3674 : return false;
3675 : }
3676 :
3677 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3678 : passing ABI. XXX: This function is obsolete and is only used for
3679 : checking psABI compatibility with previous versions of GCC. */
3680 :
3681 : static bool
3682 1975065 : ix86_compat_aligned_value_p (const_tree type)
3683 : {
3684 1975065 : machine_mode mode = TYPE_MODE (type);
3685 1975065 : if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3686 1975023 : || mode == TDmode
3687 1975023 : || mode == TFmode
3688 : || mode == TCmode)
3689 1975277 : && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3690 : return true;
3691 1974853 : if (TYPE_ALIGN (type) < 128)
3692 : return false;
3693 :
3694 0 : if (AGGREGATE_TYPE_P (type))
3695 : {
3696 : /* Walk the aggregates recursively. */
3697 0 : switch (TREE_CODE (type))
3698 : {
3699 0 : case RECORD_TYPE:
3700 0 : case UNION_TYPE:
3701 0 : case QUAL_UNION_TYPE:
3702 0 : {
3703 0 : tree field;
3704 :
3705 : /* Walk all the structure fields. */
3706 0 : for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3707 : {
3708 0 : if (TREE_CODE (field) == FIELD_DECL
3709 0 : && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3710 : return true;
3711 : }
3712 : break;
3713 : }
3714 :
3715 0 : case ARRAY_TYPE:
3716 : /* Just for use if some languages passes arrays by value. */
3717 0 : if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3718 : return true;
3719 : break;
3720 :
3721 : default:
3722 : gcc_unreachable ();
3723 : }
3724 : }
3725 : return false;
3726 : }
3727 :
3728 : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3729 : XXX: This function is obsolete and is only used for checking psABI
3730 : compatibility with previous versions of GCC. */
3731 :
3732 : static unsigned int
3733 5556494 : ix86_compat_function_arg_boundary (machine_mode mode,
3734 : const_tree type, unsigned int align)
3735 : {
3736 : /* In 32bit, only _Decimal128 and __float128 are aligned to their
3737 : natural boundaries. */
3738 5556494 : if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3739 : {
3740 : /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3741 : make an exception for SSE modes since these require 128bit
3742 : alignment.
3743 :
3744 : The handling here differs from field_alignment. ICC aligns MMX
3745 : arguments to 4 byte boundaries, while structure fields are aligned
3746 : to 8 byte boundaries. */
3747 1987001 : if (!type)
3748 : {
3749 11936 : if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3750 1986789 : align = PARM_BOUNDARY;
3751 : }
3752 : else
3753 : {
3754 1975065 : if (!ix86_compat_aligned_value_p (type))
3755 1986789 : align = PARM_BOUNDARY;
3756 : }
3757 : }
3758 10713045 : if (align > BIGGEST_ALIGNMENT)
3759 90 : align = BIGGEST_ALIGNMENT;
3760 5556494 : return align;
3761 : }
3762 :
3763 : /* Return true when TYPE should be 128bit aligned for 32bit argument
3764 : passing ABI. */
3765 :
3766 : static bool
3767 1977748 : ix86_contains_aligned_value_p (const_tree type)
3768 : {
3769 1977748 : machine_mode mode = TYPE_MODE (type);
3770 :
3771 1977748 : if (mode == XFmode || mode == XCmode)
3772 : return false;
3773 :
3774 1975606 : if (TYPE_ALIGN (type) < 128)
3775 : return false;
3776 :
3777 2895 : if (AGGREGATE_TYPE_P (type))
3778 : {
3779 : /* Walk the aggregates recursively. */
3780 0 : switch (TREE_CODE (type))
3781 : {
3782 0 : case RECORD_TYPE:
3783 0 : case UNION_TYPE:
3784 0 : case QUAL_UNION_TYPE:
3785 0 : {
3786 0 : tree field;
3787 :
3788 : /* Walk all the structure fields. */
3789 0 : for (field = TYPE_FIELDS (type);
3790 0 : field;
3791 0 : field = DECL_CHAIN (field))
3792 : {
3793 0 : if (TREE_CODE (field) == FIELD_DECL
3794 0 : && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3795 : return true;
3796 : }
3797 : break;
3798 : }
3799 :
3800 0 : case ARRAY_TYPE:
3801 : /* Just for use if some languages passes arrays by value. */
3802 0 : if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3803 : return true;
3804 : break;
3805 :
3806 : default:
3807 : gcc_unreachable ();
3808 : }
3809 : }
3810 : else
3811 2895 : return TYPE_ALIGN (type) >= 128;
3812 :
3813 : return false;
3814 : }
3815 :
3816 : /* Gives the alignment boundary, in bits, of an argument with the
3817 : specified mode and type. */
3818 :
3819 : static unsigned int
3820 10954913 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
3821 : {
3822 10954913 : unsigned int align;
3823 10954913 : if (type)
3824 : {
3825 : /* Since the main variant type is used for call, we convert it to
3826 : the main variant type. */
3827 10915080 : type = TYPE_MAIN_VARIANT (type);
3828 10915080 : align = TYPE_ALIGN (type);
3829 10915080 : if (TYPE_EMPTY_P (type))
3830 24468 : return PARM_BOUNDARY;
3831 : }
3832 : else
3833 39833 : align = GET_MODE_ALIGNMENT (mode);
3834 12955382 : if (align < PARM_BOUNDARY)
3835 4111288 : align = PARM_BOUNDARY;
3836 : else
3837 : {
3838 6819157 : static bool warned;
3839 6819157 : unsigned int saved_align = align;
3840 :
3841 6819157 : if (!TARGET_64BIT)
3842 : {
3843 : /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3844 2013574 : if (!type)
3845 : {
3846 35826 : if (mode == XFmode || mode == XCmode)
3847 : align = PARM_BOUNDARY;
3848 : }
3849 1977748 : else if (!ix86_contains_aligned_value_p (type))
3850 : align = PARM_BOUNDARY;
3851 :
3852 38721 : if (align < 128)
3853 1986789 : align = PARM_BOUNDARY;
3854 : }
3855 :
3856 6819157 : if (warn_psabi
3857 5561330 : && !warned
3858 12375651 : && align != ix86_compat_function_arg_boundary (mode, type,
3859 : saved_align))
3860 : {
3861 90 : warned = true;
3862 90 : inform (input_location,
3863 : "the ABI for passing parameters with %d-byte"
3864 : " alignment has changed in GCC 4.6",
3865 : align / BITS_PER_UNIT);
3866 : }
3867 : }
3868 :
3869 : return align;
3870 : }
3871 :
3872 : /* Return true if N is a possible register number of function value. */
3873 :
3874 : static bool
3875 4693315 : ix86_function_value_regno_p (const unsigned int regno)
3876 : {
3877 4693315 : switch (regno)
3878 : {
3879 : case AX_REG:
3880 : return true;
3881 103151 : case DX_REG:
3882 103151 : return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3883 :
3884 : /* Complex values are returned in %st(0)/%st(1) pair. */
3885 25261 : case ST0_REG:
3886 25261 : case ST1_REG:
3887 : /* TODO: The function should depend on current function ABI but
3888 : builtins.cc would need updating then. Therefore we use the
3889 : default ABI. */
3890 25261 : if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3891 : return false;
3892 25261 : return TARGET_FLOAT_RETURNS_IN_80387;
3893 :
3894 : /* Complex values are returned in %xmm0/%xmm1 pair. */
3895 1291014 : case XMM0_REG:
3896 1291014 : case XMM1_REG:
3897 1291014 : return TARGET_SSE;
3898 :
3899 10078 : case MM0_REG:
3900 10078 : if (TARGET_MACHO || TARGET_64BIT)
3901 : return false;
3902 2492 : return TARGET_MMX;
3903 : }
3904 :
3905 : return false;
3906 : }
3907 :
3908 : /* Check whether the register REGNO should be zeroed on X86.
3909 : When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3910 : together, no need to zero it again.
3911 : When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3912 :
3913 : static bool
3914 1377 : zero_call_used_regno_p (const unsigned int regno,
3915 : bool all_sse_zeroed,
3916 : bool need_zero_mmx)
3917 : {
3918 835 : return GENERAL_REGNO_P (regno)
3919 819 : || (!all_sse_zeroed && SSE_REGNO_P (regno))
3920 439 : || MASK_REGNO_P (regno)
3921 1800 : || (need_zero_mmx && MMX_REGNO_P (regno));
3922 : }
3923 :
3924 : /* Return the machine_mode that is used to zero register REGNO. */
3925 :
3926 : static machine_mode
3927 954 : zero_call_used_regno_mode (const unsigned int regno)
3928 : {
3929 : /* NB: We only need to zero the lower 32 bits for integer registers
3930 : and the lower 128 bits for vector registers since destination are
3931 : zero-extended to the full register width. */
3932 954 : if (GENERAL_REGNO_P (regno))
3933 : return SImode;
3934 : else if (SSE_REGNO_P (regno))
3935 380 : return V4SFmode;
3936 : else if (MASK_REGNO_P (regno))
3937 : return HImode;
3938 : else if (MMX_REGNO_P (regno))
3939 0 : return V2SImode;
3940 : else
3941 0 : gcc_unreachable ();
3942 : }
3943 :
3944 : /* Generate a rtx to zero all vector registers together if possible,
3945 : otherwise, return NULL. */
3946 :
3947 : static rtx
3948 131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3949 : {
3950 131 : if (!TARGET_AVX)
3951 : return NULL;
3952 :
3953 372 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3954 368 : if ((LEGACY_SSE_REGNO_P (regno)
3955 336 : || (TARGET_64BIT
3956 336 : && (REX_SSE_REGNO_P (regno)
3957 304 : || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3958 432 : && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3959 : return NULL;
3960 :
3961 4 : return gen_avx_vzeroall ();
3962 : }
3963 :
3964 : /* Generate insns to zero all st registers together.
3965 : Return true when zeroing instructions are generated.
3966 : Assume the number of st registers that are zeroed is num_of_st,
3967 : we will emit the following sequence to zero them together:
3968 : fldz; \
3969 : fldz; \
3970 : ...
3971 : fldz; \
3972 : fstp %%st(0); \
3973 : fstp %%st(0); \
3974 : ...
3975 : fstp %%st(0);
3976 : i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3977 : mark stack slots empty.
3978 :
3979 : How to compute the num_of_st:
3980 : There is no direct mapping from stack registers to hard register
3981 : numbers. If one stack register needs to be cleared, we don't know
3982 : where in the stack the value remains. So, if any stack register
3983 : needs to be cleared, the whole stack should be cleared. However,
3984 : x87 stack registers that hold the return value should be excluded.
3985 : x87 returns in the top (two for complex values) register, so
3986 : num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3987 : return the value of num_of_st. */
3988 :
3989 :
3990 : static int
3991 131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3992 : {
3993 :
3994 : /* If the FPU is disabled, no need to zero all st registers. */
3995 131 : if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3996 : return 0;
3997 :
3998 10329 : unsigned int num_of_st = 0;
3999 10329 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4000 10220 : if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
4001 10220 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4002 : {
4003 : num_of_st++;
4004 : break;
4005 : }
4006 :
4007 130 : if (num_of_st == 0)
4008 : return 0;
4009 :
4010 21 : bool return_with_x87 = false;
4011 42 : return_with_x87 = (crtl->return_rtx
4012 21 : && (STACK_REG_P (crtl->return_rtx)));
4013 :
4014 21 : bool complex_return = false;
4015 42 : complex_return = (crtl->return_rtx
4016 21 : && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
4017 :
4018 21 : if (return_with_x87)
4019 2 : if (complex_return)
4020 : num_of_st = 6;
4021 : else
4022 1 : num_of_st = 7;
4023 : else
4024 : num_of_st = 8;
4025 :
4026 21 : rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
4027 186 : for (unsigned int i = 0; i < num_of_st; i++)
4028 165 : emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
4029 :
4030 186 : for (unsigned int i = 0; i < num_of_st; i++)
4031 : {
4032 165 : rtx insn;
4033 165 : insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
4034 165 : add_reg_note (insn, REG_DEAD, st_reg);
4035 : }
4036 21 : return num_of_st;
4037 : }
4038 :
4039 :
4040 : /* When the routine exit in MMX mode, if any ST register needs
4041 : to be zeroed, we should clear all MMX registers except the
4042 : RET_MMX_REGNO that holds the return value. */
4043 : static bool
4044 0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
4045 : unsigned int ret_mmx_regno)
4046 : {
4047 0 : bool need_zero_all_mm = false;
4048 0 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4049 0 : if (STACK_REGNO_P (regno)
4050 0 : && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4051 : {
4052 : need_zero_all_mm = true;
4053 : break;
4054 : }
4055 :
4056 0 : if (!need_zero_all_mm)
4057 : return false;
4058 :
4059 : machine_mode mode = V2SImode;
4060 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4061 0 : if (regno != ret_mmx_regno)
4062 : {
4063 0 : rtx reg = gen_rtx_REG (mode, regno);
4064 0 : emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
4065 : }
4066 : return true;
4067 : }
4068 :
4069 : /* TARGET_ZERO_CALL_USED_REGS. */
4070 : /* Generate a sequence of instructions that zero registers specified by
4071 : NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
4072 : zeroed. */
4073 : static HARD_REG_SET
4074 131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
4075 : {
4076 131 : HARD_REG_SET zeroed_hardregs;
4077 131 : bool all_sse_zeroed = false;
4078 131 : int all_st_zeroed_num = 0;
4079 131 : bool all_mm_zeroed = false;
4080 :
4081 131 : CLEAR_HARD_REG_SET (zeroed_hardregs);
4082 :
4083 : /* first, let's see whether we can zero all vector registers together. */
4084 131 : rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4085 131 : if (zero_all_vec_insn)
4086 : {
4087 4 : emit_insn (zero_all_vec_insn);
4088 4 : all_sse_zeroed = true;
4089 4 : if (TARGET_64BIT && TARGET_AVX512F)
4090 : {
4091 2 : rtx zero = CONST0_RTX (V4SFmode);
4092 34 : for (unsigned int regno = XMM16_REG;
4093 34 : regno <= XMM31_REG;
4094 : regno++)
4095 : {
4096 32 : rtx reg = gen_rtx_REG (V4SFmode, regno);
4097 32 : emit_move_insn (reg, zero);
4098 : }
4099 : }
4100 : }
4101 :
4102 : /* mm/st registers are shared registers set, we should follow the following
4103 : rules to clear them:
4104 : MMX exit mode x87 exit mode
4105 : -------------|----------------------|---------------
4106 : uses x87 reg | clear all MMX | clear all x87
4107 : uses MMX reg | clear individual MMX | clear all x87
4108 : x87 + MMX | clear all MMX | clear all x87
4109 :
4110 : first, we should decide which mode (MMX mode or x87 mode) the function
4111 : exit with. */
4112 :
4113 131 : bool exit_with_mmx_mode = (crtl->return_rtx
4114 131 : && (MMX_REG_P (crtl->return_rtx)));
4115 :
4116 131 : if (!exit_with_mmx_mode)
4117 : /* x87 exit mode, we should zero all st registers together. */
4118 : {
4119 131 : all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4120 :
4121 131 : if (all_st_zeroed_num > 0)
4122 189 : for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4123 : /* x87 stack registers that hold the return value should be excluded.
4124 : x87 returns in the top (two for complex values) register. */
4125 168 : if (all_st_zeroed_num == 8
4126 168 : || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4127 : || (all_st_zeroed_num == 6
4128 7 : && (regno == (REGNO (crtl->return_rtx) + 1)))))
4129 165 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4130 : }
4131 : else
4132 : /* MMX exit mode, check whether we can zero all mm registers. */
4133 : {
4134 0 : unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4135 0 : all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4136 : exit_mmx_regno);
4137 0 : if (all_mm_zeroed)
4138 0 : for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4139 0 : if (regno != exit_mmx_regno)
4140 0 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4141 : }
4142 :
4143 : /* Now, generate instructions to zero all the other registers. */
4144 :
4145 12183 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4146 : {
4147 12052 : if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4148 10675 : continue;
4149 1800 : if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4150 1377 : exit_with_mmx_mode && !all_mm_zeroed))
4151 423 : continue;
4152 :
4153 954 : SET_HARD_REG_BIT (zeroed_hardregs, regno);
4154 :
4155 954 : machine_mode mode = zero_call_used_regno_mode (regno);
4156 :
4157 954 : rtx reg = gen_rtx_REG (mode, regno);
4158 954 : rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4159 :
4160 954 : switch (mode)
4161 : {
4162 558 : case E_SImode:
4163 558 : if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4164 : {
4165 558 : rtx clob = gen_rtx_CLOBBER (VOIDmode,
4166 : gen_rtx_REG (CCmode,
4167 : FLAGS_REG));
4168 558 : tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4169 : tmp,
4170 : clob));
4171 : }
4172 : /* FALLTHRU. */
4173 :
4174 954 : case E_V4SFmode:
4175 954 : case E_HImode:
4176 954 : case E_V2SImode:
4177 954 : emit_insn (tmp);
4178 954 : break;
4179 :
4180 0 : default:
4181 0 : gcc_unreachable ();
4182 : }
4183 : }
4184 131 : return zeroed_hardregs;
4185 : }
4186 :
4187 : /* Define how to find the value returned by a function.
4188 : VALTYPE is the data type of the value (as a tree).
4189 : If the precise function being called is known, FUNC is its FUNCTION_DECL;
4190 : otherwise, FUNC is 0. */
4191 :
4192 : static rtx
4193 3933465 : function_value_32 (machine_mode orig_mode, machine_mode mode,
4194 : const_tree fntype, const_tree fn)
4195 : {
4196 3933465 : unsigned int regno;
4197 :
4198 : /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4199 : we normally prevent this case when mmx is not available. However
4200 : some ABIs may require the result to be returned like DImode. */
4201 4201550 : if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4202 : regno = FIRST_MMX_REG;
4203 :
4204 : /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4205 : we prevent this case when sse is not available. However some ABIs
4206 : may require the result to be returned like integer TImode. */
4207 3924189 : else if (mode == TImode
4208 4182998 : || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4209 : regno = FIRST_SSE_REG;
4210 :
4211 : /* 32-byte vector modes in %ymm0. */
4212 3965100 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4213 : regno = FIRST_SSE_REG;
4214 :
4215 : /* 64-byte vector modes in %zmm0. */
4216 3820902 : else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4217 : regno = FIRST_SSE_REG;
4218 :
4219 : /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4220 3665380 : else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4221 : regno = FIRST_FLOAT_REG;
4222 : else
4223 : /* Most things go in %eax. */
4224 3600985 : regno = AX_REG;
4225 :
4226 : /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4227 3933465 : if (mode == HFmode || mode == BFmode)
4228 : {
4229 1907 : if (!TARGET_SSE2)
4230 : {
4231 0 : error ("SSE register return with SSE2 disabled");
4232 0 : regno = AX_REG;
4233 : }
4234 : else
4235 : regno = FIRST_SSE_REG;
4236 : }
4237 :
4238 3933465 : if (mode == HCmode)
4239 : {
4240 129 : if (!TARGET_SSE2)
4241 0 : error ("SSE register return with SSE2 disabled");
4242 :
4243 129 : rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4244 258 : XVECEXP (ret, 0, 0)
4245 258 : = gen_rtx_EXPR_LIST (VOIDmode,
4246 : gen_rtx_REG (SImode,
4247 129 : TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4248 : GEN_INT (0));
4249 129 : return ret;
4250 : }
4251 :
4252 : /* Override FP return register with %xmm0 for local functions when
4253 : SSE math is enabled or for functions with sseregparm attribute. */
4254 3933336 : if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4255 : {
4256 50312 : int sse_level = ix86_function_sseregparm (fntype, fn, false);
4257 50312 : if (sse_level == -1)
4258 : {
4259 0 : error ("calling %qD with SSE calling convention without "
4260 : "SSE/SSE2 enabled", fn);
4261 0 : sorry ("this is a GCC bug that can be worked around by adding "
4262 : "attribute used to function called");
4263 : }
4264 50312 : else if ((sse_level >= 1 && mode == SFmode)
4265 50312 : || (sse_level == 2 && mode == DFmode))
4266 : regno = FIRST_SSE_REG;
4267 : }
4268 :
4269 : /* OImode shouldn't be used directly. */
4270 3933336 : gcc_assert (mode != OImode);
4271 :
4272 3933336 : return gen_rtx_REG (orig_mode, regno);
4273 : }
4274 :
4275 : static rtx
4276 97814065 : function_value_64 (machine_mode orig_mode, machine_mode mode,
4277 : const_tree valtype)
4278 : {
4279 97814065 : rtx ret;
4280 :
4281 : /* Handle libcalls, which don't provide a type node. */
4282 97814065 : if (valtype == NULL)
4283 : {
4284 102330 : unsigned int regno;
4285 :
4286 102330 : switch (mode)
4287 : {
4288 : case E_BFmode:
4289 : case E_HFmode:
4290 : case E_HCmode:
4291 : case E_SFmode:
4292 : case E_SCmode:
4293 : case E_DFmode:
4294 : case E_DCmode:
4295 : case E_TFmode:
4296 : case E_SDmode:
4297 : case E_DDmode:
4298 : case E_TDmode:
4299 : regno = FIRST_SSE_REG;
4300 : break;
4301 1040 : case E_XFmode:
4302 1040 : case E_XCmode:
4303 1040 : regno = FIRST_FLOAT_REG;
4304 1040 : break;
4305 : case E_TCmode:
4306 : return NULL;
4307 56152 : default:
4308 56152 : regno = AX_REG;
4309 : }
4310 :
4311 102330 : return gen_rtx_REG (mode, regno);
4312 : }
4313 97711735 : else if (POINTER_TYPE_P (valtype))
4314 : {
4315 : /* Pointers are always returned in word_mode. */
4316 16166319 : mode = word_mode;
4317 : }
4318 :
4319 97711735 : ret = construct_container (mode, orig_mode, valtype, true,
4320 : X86_64_MAX_RETURN_NREGS,
4321 : X86_64_MAX_SSE_RETURN_NREGS,
4322 : x86_64_int_return_registers, 0);
4323 :
4324 : /* For zero sized structures, construct_container returns NULL, but we
4325 : need to keep rest of compiler happy by returning meaningful value. */
4326 97711735 : if (!ret)
4327 204085 : ret = gen_rtx_REG (orig_mode, AX_REG);
4328 :
4329 : return ret;
4330 : }
4331 :
4332 : static rtx
4333 0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4334 : const_tree fntype, const_tree fn, const_tree valtype)
4335 : {
4336 0 : unsigned int regno;
4337 :
4338 : /* Floating point return values in %st(0)
4339 : (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4340 0 : if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4341 0 : && (GET_MODE_SIZE (mode) > 8
4342 0 : || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4343 : {
4344 0 : regno = FIRST_FLOAT_REG;
4345 0 : return gen_rtx_REG (orig_mode, regno);
4346 : }
4347 : else
4348 0 : return function_value_32(orig_mode, mode, fntype,fn);
4349 : }
4350 :
4351 : static rtx
4352 787708 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4353 : const_tree valtype)
4354 : {
4355 787708 : unsigned int regno = AX_REG;
4356 :
4357 787708 : if (TARGET_SSE)
4358 : {
4359 786981 : unsigned int mode_size = GET_MODE_SIZE (mode);
4360 :
4361 786981 : switch (mode_size)
4362 : {
4363 34397 : case 16:
4364 34397 : case 32:
4365 34397 : case 64:
4366 34397 : if (mode_size == 32 && !TARGET_AVX)
4367 : break;
4368 34397 : if (mode_size == 64 && !TARGET_AVX512F)
4369 : break;
4370 34397 : if (valtype != NULL_TREE
4371 34397 : && !VECTOR_INTEGER_TYPE_P (valtype)
4372 15828 : && !INTEGRAL_TYPE_P (valtype)
4373 50225 : && !VECTOR_FLOAT_TYPE_P (valtype))
4374 : break;
4375 34397 : if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4376 : && !COMPLEX_MODE_P (mode))
4377 218079 : regno = FIRST_SSE_REG;
4378 : break;
4379 741282 : case 8:
4380 741282 : case 4:
4381 741282 : case 2:
4382 741282 : if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4383 : break;
4384 723988 : if (mode == HFmode || mode == SFmode || mode == DFmode)
4385 218079 : regno = FIRST_SSE_REG;
4386 : break;
4387 : default:
4388 : break;
4389 : }
4390 : }
4391 787708 : return gen_rtx_REG (orig_mode, regno);
4392 : }
4393 :
4394 : static rtx
4395 102535238 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4396 : machine_mode orig_mode, machine_mode mode)
4397 : {
4398 102535238 : const_tree fn, fntype;
4399 :
4400 102535238 : fn = NULL_TREE;
4401 102535238 : if (fntype_or_decl && DECL_P (fntype_or_decl))
4402 3541005 : fn = fntype_or_decl;
4403 3541005 : fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4404 :
4405 102535238 : if (ix86_function_type_abi (fntype) == MS_ABI)
4406 : {
4407 787708 : if (TARGET_64BIT)
4408 787708 : return function_value_ms_64 (orig_mode, mode, valtype);
4409 : else
4410 0 : return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4411 : }
4412 101747530 : else if (TARGET_64BIT)
4413 97814065 : return function_value_64 (orig_mode, mode, valtype);
4414 : else
4415 3933465 : return function_value_32 (orig_mode, mode, fntype, fn);
4416 : }
4417 :
4418 : static rtx
4419 102429766 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4420 : {
4421 102429766 : machine_mode mode, orig_mode;
4422 :
4423 102429766 : orig_mode = TYPE_MODE (valtype);
4424 102429766 : mode = type_natural_mode (valtype, NULL, true);
4425 102429766 : return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4426 : }
4427 :
4428 : /* Pointer function arguments and return values are promoted to
4429 : word_mode for normal functions. */
4430 :
4431 : static machine_mode
4432 32089961 : ix86_promote_function_mode (const_tree type, machine_mode mode,
4433 : int *punsignedp, const_tree fntype,
4434 : int for_return)
4435 : {
4436 32089961 : if (cfun->machine->func_type == TYPE_NORMAL
4437 32088938 : && type != NULL_TREE
4438 32054815 : && POINTER_TYPE_P (type))
4439 : {
4440 16057950 : *punsignedp = POINTERS_EXTEND_UNSIGNED;
4441 16057950 : return word_mode;
4442 : }
4443 16032011 : return default_promote_function_mode (type, mode, punsignedp, fntype,
4444 16032011 : for_return);
4445 : }
4446 :
4447 : /* Return true if a structure, union or array with MODE containing FIELD
4448 : should be accessed using BLKmode. */
4449 :
4450 : static bool
4451 142570941 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4452 : {
4453 : /* Union with XFmode must be in BLKmode. */
4454 142570941 : return (mode == XFmode
4455 142708886 : && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4456 130993 : || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4457 : }
4458 :
4459 : rtx
4460 105472 : ix86_libcall_value (machine_mode mode)
4461 : {
4462 105472 : return ix86_function_value_1 (NULL, NULL, mode, mode);
4463 : }
4464 :
4465 : /* Return true iff type is returned in memory. */
4466 :
4467 : static bool
4468 104367855 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4469 : {
4470 104367855 : const machine_mode mode = type_natural_mode (type, NULL, true);
4471 104367855 : HOST_WIDE_INT size;
4472 :
4473 104367855 : if (TARGET_64BIT)
4474 : {
4475 99826039 : if (ix86_function_type_abi (fntype) == MS_ABI)
4476 : {
4477 707133 : size = int_size_in_bytes (type);
4478 :
4479 : /* __m128 is returned in xmm0. 256/512-bit vector values are
4480 : returned in ymm0/zmm0 when AVX/AVX512 is enabled. */
4481 707133 : if ((!type || VECTOR_INTEGER_TYPE_P (type)
4482 687562 : || INTEGRAL_TYPE_P (type)
4483 217159 : || VECTOR_FLOAT_TYPE_P (type))
4484 505802 : && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4485 : && !COMPLEX_MODE_P (mode)
4486 1212935 : && ((GET_MODE_SIZE (mode) == 16 || size == 16)
4487 519126 : || (TARGET_AVX && (GET_MODE_SIZE (mode) == 32 || size == 32))
4488 482061 : || (TARGET_AVX512F
4489 16590 : && (GET_MODE_SIZE (mode) == 64 || size == 64))))
4490 : return false;
4491 :
4492 : /* Otherwise, the size must be exactly in [1248]. */
4493 1329376 : return size != 1 && size != 2 && size != 4 && size != 8;
4494 : }
4495 : else
4496 : {
4497 99118906 : int needed_intregs, needed_sseregs;
4498 :
4499 99118906 : return examine_argument (mode, type, true,
4500 : &needed_intregs, &needed_sseregs);
4501 : }
4502 : }
4503 : else
4504 : {
4505 4541816 : size = int_size_in_bytes (type);
4506 :
4507 : /* Intel MCU psABI returns scalars and aggregates no larger than 8
4508 : bytes in registers. */
4509 4541816 : if (TARGET_IAMCU)
4510 0 : return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4511 :
4512 4541816 : if (mode == BLKmode)
4513 : return true;
4514 :
4515 4541816 : if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4516 : return false;
4517 :
4518 4541816 : if (VECTOR_MODE_P (mode) || mode == TImode)
4519 : {
4520 : /* User-created vectors small enough to fit in EAX. */
4521 268055 : if (size < 8)
4522 : return false;
4523 :
4524 : /* Unless ABI prescibes otherwise,
4525 : MMX/3dNow values are returned in MM0 if available. */
4526 :
4527 268055 : if (size == 8)
4528 9266 : return TARGET_VECT8_RETURNS || !TARGET_MMX;
4529 :
4530 : /* SSE values are returned in XMM0 if available. */
4531 258789 : if (size == 16)
4532 108939 : return !TARGET_SSE;
4533 :
4534 : /* AVX values are returned in YMM0 if available. */
4535 149850 : if (size == 32)
4536 72090 : return !TARGET_AVX;
4537 :
4538 : /* AVX512F values are returned in ZMM0 if available. */
4539 77760 : if (size == 64)
4540 77760 : return !TARGET_AVX512F;
4541 : }
4542 :
4543 4273761 : if (mode == XFmode)
4544 : return false;
4545 :
4546 4262038 : if (size > 12)
4547 : return true;
4548 :
4549 : /* OImode shouldn't be used directly. */
4550 3280377 : gcc_assert (mode != OImode);
4551 :
4552 : return false;
4553 : }
4554 : }
4555 :
4556 : /* Implement TARGET_PUSH_ARGUMENT. */
4557 :
4558 : static bool
4559 9348065 : ix86_push_argument (unsigned int npush)
4560 : {
4561 : /* If SSE2 is available, use vector move to put large argument onto
4562 : stack. NB: In 32-bit mode, use 8-byte vector move. */
4563 11773075 : return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4564 9082932 : && TARGET_PUSH_ARGS
4565 18430899 : && !ACCUMULATE_OUTGOING_ARGS);
4566 : }
4567 :
4568 :
4569 : /* Create the va_list data type. */
4570 :
4571 : static tree
4572 281307 : ix86_build_builtin_va_list_64 (void)
4573 : {
4574 281307 : tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4575 :
4576 281307 : record = lang_hooks.types.make_type (RECORD_TYPE);
4577 281307 : type_decl = build_decl (BUILTINS_LOCATION,
4578 : TYPE_DECL, get_identifier ("__va_list_tag"), record);
4579 :
4580 281307 : f_gpr = build_decl (BUILTINS_LOCATION,
4581 : FIELD_DECL, get_identifier ("gp_offset"),
4582 : unsigned_type_node);
4583 281307 : f_fpr = build_decl (BUILTINS_LOCATION,
4584 : FIELD_DECL, get_identifier ("fp_offset"),
4585 : unsigned_type_node);
4586 281307 : f_ovf = build_decl (BUILTINS_LOCATION,
4587 : FIELD_DECL, get_identifier ("overflow_arg_area"),
4588 : ptr_type_node);
4589 281307 : f_sav = build_decl (BUILTINS_LOCATION,
4590 : FIELD_DECL, get_identifier ("reg_save_area"),
4591 : ptr_type_node);
4592 :
4593 281307 : va_list_gpr_counter_field = f_gpr;
4594 281307 : va_list_fpr_counter_field = f_fpr;
4595 :
4596 281307 : DECL_FIELD_CONTEXT (f_gpr) = record;
4597 281307 : DECL_FIELD_CONTEXT (f_fpr) = record;
4598 281307 : DECL_FIELD_CONTEXT (f_ovf) = record;
4599 281307 : DECL_FIELD_CONTEXT (f_sav) = record;
4600 :
4601 281307 : TYPE_STUB_DECL (record) = type_decl;
4602 281307 : TYPE_NAME (record) = type_decl;
4603 281307 : TYPE_FIELDS (record) = f_gpr;
4604 281307 : DECL_CHAIN (f_gpr) = f_fpr;
4605 281307 : DECL_CHAIN (f_fpr) = f_ovf;
4606 281307 : DECL_CHAIN (f_ovf) = f_sav;
4607 281307 : TREE_PUBLIC (type_decl) = 1;
4608 :
4609 281307 : layout_type (record);
4610 :
4611 281307 : TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4612 281307 : NULL_TREE, TYPE_ATTRIBUTES (record));
4613 :
4614 : /* The correct type is an array type of one element. */
4615 281307 : return build_array_type (record, build_index_type (size_zero_node));
4616 : }
4617 :
4618 : /* Setup the builtin va_list data type and for 64-bit the additional
4619 : calling convention specific va_list data types. */
4620 :
4621 : static tree
4622 288463 : ix86_build_builtin_va_list (void)
4623 : {
4624 288463 : if (TARGET_64BIT)
4625 : {
4626 : /* Initialize ABI specific va_list builtin types.
4627 :
4628 : In lto1, we can encounter two va_list types:
4629 : - one as a result of the type-merge across TUs, and
4630 : - the one constructed here.
4631 : These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4632 : a type identity check in canonical_va_list_type based on
4633 : TYPE_MAIN_VARIANT (which we used to have) will not work.
4634 : Instead, we tag each va_list_type_node with its unique attribute, and
4635 : look for the attribute in the type identity check in
4636 : canonical_va_list_type.
4637 :
4638 : Tagging sysv_va_list_type_node directly with the attribute is
4639 : problematic since it's a array of one record, which will degrade into a
4640 : pointer to record when used as parameter (see build_va_arg comments for
4641 : an example), dropping the attribute in the process. So we tag the
4642 : record instead. */
4643 :
4644 : /* For SYSV_ABI we use an array of one record. */
4645 281307 : sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4646 :
4647 : /* For MS_ABI we use plain pointer to argument area. */
4648 281307 : tree char_ptr_type = build_pointer_type (char_type_node);
4649 281307 : tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4650 281307 : TYPE_ATTRIBUTES (char_ptr_type));
4651 281307 : ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4652 :
4653 281307 : return ((ix86_abi == MS_ABI)
4654 281307 : ? ms_va_list_type_node
4655 281307 : : sysv_va_list_type_node);
4656 : }
4657 : else
4658 : {
4659 : /* For i386 we use plain pointer to argument area. */
4660 7156 : return build_pointer_type (char_type_node);
4661 : }
4662 : }
4663 :
4664 : /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4665 :
4666 : static void
4667 15683 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4668 : {
4669 15683 : rtx save_area, mem;
4670 15683 : alias_set_type set;
4671 15683 : int i, max;
4672 :
4673 : /* GPR size of varargs save area. */
4674 15683 : if (cfun->va_list_gpr_size)
4675 15233 : ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4676 : else
4677 450 : ix86_varargs_gpr_size = 0;
4678 :
4679 : /* FPR size of varargs save area. We don't need it if we don't pass
4680 : anything in SSE registers. */
4681 15683 : if (TARGET_SSE && cfun->va_list_fpr_size)
4682 14646 : ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4683 : else
4684 1037 : ix86_varargs_fpr_size = 0;
4685 :
4686 15683 : if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4687 : return;
4688 :
4689 15402 : save_area = frame_pointer_rtx;
4690 15402 : set = get_varargs_alias_set ();
4691 :
4692 15402 : max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4693 15402 : if (max > X86_64_REGPARM_MAX)
4694 : max = X86_64_REGPARM_MAX;
4695 :
4696 15402 : const int *parm_regs;
4697 15402 : if (cum->preserve_none_abi)
4698 : parm_regs = x86_64_preserve_none_int_parameter_registers;
4699 : else
4700 15401 : parm_regs = x86_64_int_parameter_registers;
4701 :
4702 85531 : for (i = cum->regno; i < max; i++)
4703 : {
4704 70129 : mem = gen_rtx_MEM (word_mode,
4705 70129 : plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4706 70129 : MEM_NOTRAP_P (mem) = 1;
4707 70129 : set_mem_alias_set (mem, set);
4708 70129 : emit_move_insn (mem,
4709 70129 : gen_rtx_REG (word_mode, parm_regs[i]));
4710 : }
4711 :
4712 15402 : if (ix86_varargs_fpr_size)
4713 : {
4714 14646 : machine_mode smode;
4715 14646 : rtx_code_label *label;
4716 14646 : rtx test;
4717 :
4718 : /* Now emit code to save SSE registers. The AX parameter contains number
4719 : of SSE parameter registers used to call this function, though all we
4720 : actually check here is the zero/non-zero status. */
4721 :
4722 14646 : label = gen_label_rtx ();
4723 14646 : test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4724 14646 : emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4725 : label));
4726 :
4727 : /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4728 : we used movdqa (i.e. TImode) instead? Perhaps even better would
4729 : be if we could determine the real mode of the data, via a hook
4730 : into pass_stdarg. Ignore all that for now. */
4731 14646 : smode = V4SFmode;
4732 14646 : if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4733 4149 : crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4734 :
4735 14646 : max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4736 14646 : if (max > X86_64_SSE_REGPARM_MAX)
4737 : max = X86_64_SSE_REGPARM_MAX;
4738 :
4739 130198 : for (i = cum->sse_regno; i < max; ++i)
4740 : {
4741 115552 : mem = plus_constant (Pmode, save_area,
4742 115552 : i * 16 + ix86_varargs_gpr_size);
4743 115552 : mem = gen_rtx_MEM (smode, mem);
4744 115552 : MEM_NOTRAP_P (mem) = 1;
4745 115552 : set_mem_alias_set (mem, set);
4746 115552 : set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4747 :
4748 115552 : emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4749 : }
4750 :
4751 14646 : emit_label (label);
4752 : }
4753 : }
4754 :
4755 : static void
4756 5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4757 : {
4758 5652 : alias_set_type set = get_varargs_alias_set ();
4759 5652 : int i;
4760 :
4761 : /* Reset to zero, as there might be a sysv vaarg used
4762 : before. */
4763 5652 : ix86_varargs_gpr_size = 0;
4764 5652 : ix86_varargs_fpr_size = 0;
4765 :
4766 14154 : for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4767 : {
4768 8502 : rtx reg, mem;
4769 :
4770 8502 : mem = gen_rtx_MEM (Pmode,
4771 8502 : plus_constant (Pmode, virtual_incoming_args_rtx,
4772 8502 : i * UNITS_PER_WORD));
4773 8502 : MEM_NOTRAP_P (mem) = 1;
4774 8502 : set_mem_alias_set (mem, set);
4775 :
4776 8502 : reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4777 8502 : emit_move_insn (mem, reg);
4778 : }
4779 5652 : }
4780 :
4781 : static void
4782 21489 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4783 : const function_arg_info &arg,
4784 : int *, int no_rtl)
4785 : {
4786 21489 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4787 21489 : CUMULATIVE_ARGS next_cum;
4788 21489 : tree fntype;
4789 :
4790 : /* This argument doesn't appear to be used anymore. Which is good,
4791 : because the old code here didn't suppress rtl generation. */
4792 21489 : gcc_assert (!no_rtl);
4793 :
4794 21489 : if (!TARGET_64BIT)
4795 154 : return;
4796 :
4797 21335 : fntype = TREE_TYPE (current_function_decl);
4798 :
4799 : /* For varargs, we do not want to skip the dummy va_dcl argument.
4800 : For stdargs, we do want to skip the last named argument. */
4801 21335 : next_cum = *cum;
4802 21335 : if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4803 122 : || arg.type != NULL_TREE)
4804 21360 : && stdarg_p (fntype))
4805 21238 : ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4806 :
4807 21335 : if (cum->call_abi == MS_ABI)
4808 5652 : setup_incoming_varargs_ms_64 (&next_cum);
4809 : else
4810 15683 : setup_incoming_varargs_64 (&next_cum);
4811 : }
4812 :
4813 : /* Checks if TYPE is of kind va_list char *. */
4814 :
4815 : static bool
4816 73066 : is_va_list_char_pointer (tree type)
4817 : {
4818 73066 : tree canonic;
4819 :
4820 : /* For 32-bit it is always true. */
4821 73066 : if (!TARGET_64BIT)
4822 : return true;
4823 72904 : canonic = ix86_canonical_va_list_type (type);
4824 72904 : return (canonic == ms_va_list_type_node
4825 72904 : || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4826 : }
4827 :
4828 : /* Implement va_start. */
4829 :
4830 : static void
4831 20978 : ix86_va_start (tree valist, rtx nextarg)
4832 : {
4833 20978 : HOST_WIDE_INT words, n_gpr, n_fpr;
4834 20978 : tree f_gpr, f_fpr, f_ovf, f_sav;
4835 20978 : tree gpr, fpr, ovf, sav, t;
4836 20978 : tree type;
4837 20978 : rtx ovf_rtx;
4838 :
4839 20978 : if (flag_split_stack
4840 12 : && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4841 : {
4842 12 : unsigned int scratch_regno;
4843 :
4844 : /* When we are splitting the stack, we can't refer to the stack
4845 : arguments using internal_arg_pointer, because they may be on
4846 : the old stack. The split stack prologue will arrange to
4847 : leave a pointer to the old stack arguments in a scratch
4848 : register, which we here copy to a pseudo-register. The split
4849 : stack prologue can't set the pseudo-register directly because
4850 : it (the prologue) runs before any registers have been saved. */
4851 :
4852 12 : scratch_regno = split_stack_prologue_scratch_regno ();
4853 12 : if (scratch_regno != INVALID_REGNUM)
4854 : {
4855 12 : rtx reg;
4856 12 : rtx_insn *seq;
4857 :
4858 16 : reg = gen_reg_rtx (Pmode);
4859 12 : cfun->machine->split_stack_varargs_pointer = reg;
4860 :
4861 12 : start_sequence ();
4862 16 : emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4863 12 : seq = end_sequence ();
4864 :
4865 12 : push_topmost_sequence ();
4866 12 : emit_insn_after (seq, entry_of_function ());
4867 12 : pop_topmost_sequence ();
4868 : }
4869 : }
4870 :
4871 : /* Only 64bit target needs something special. */
4872 20978 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4873 : {
4874 5656 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4875 5652 : std_expand_builtin_va_start (valist, nextarg);
4876 : else
4877 : {
4878 4 : rtx va_r, next;
4879 :
4880 4 : va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4881 8 : next = expand_binop (ptr_mode, add_optab,
4882 4 : cfun->machine->split_stack_varargs_pointer,
4883 : crtl->args.arg_offset_rtx,
4884 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4885 4 : convert_move (va_r, next, 0);
4886 : }
4887 5656 : return;
4888 : }
4889 :
4890 15322 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4891 15322 : f_fpr = DECL_CHAIN (f_gpr);
4892 15322 : f_ovf = DECL_CHAIN (f_fpr);
4893 15322 : f_sav = DECL_CHAIN (f_ovf);
4894 :
4895 15322 : valist = build_simple_mem_ref (valist);
4896 15322 : TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4897 : /* The following should be folded into the MEM_REF offset. */
4898 15322 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4899 : f_gpr, NULL_TREE);
4900 15322 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4901 : f_fpr, NULL_TREE);
4902 15322 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4903 : f_ovf, NULL_TREE);
4904 15322 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4905 : f_sav, NULL_TREE);
4906 :
4907 : /* Count number of gp and fp argument registers used. */
4908 15322 : words = crtl->args.info.words;
4909 15322 : n_gpr = crtl->args.info.regno;
4910 15322 : n_fpr = crtl->args.info.sse_regno;
4911 :
4912 15322 : if (cfun->va_list_gpr_size)
4913 : {
4914 15088 : type = TREE_TYPE (gpr);
4915 15088 : t = build2 (MODIFY_EXPR, type,
4916 15088 : gpr, build_int_cst (type, n_gpr * 8));
4917 15088 : TREE_SIDE_EFFECTS (t) = 1;
4918 15088 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4919 : }
4920 :
4921 15322 : if (TARGET_SSE && cfun->va_list_fpr_size)
4922 : {
4923 14489 : type = TREE_TYPE (fpr);
4924 14489 : t = build2 (MODIFY_EXPR, type, fpr,
4925 14489 : build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4926 14489 : TREE_SIDE_EFFECTS (t) = 1;
4927 14489 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4928 : }
4929 :
4930 : /* Find the overflow area. */
4931 15322 : type = TREE_TYPE (ovf);
4932 15322 : if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4933 15314 : ovf_rtx = crtl->args.internal_arg_pointer;
4934 : else
4935 : ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4936 15322 : t = make_tree (type, ovf_rtx);
4937 15322 : if (words != 0)
4938 488 : t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4939 :
4940 15322 : t = build2 (MODIFY_EXPR, type, ovf, t);
4941 15322 : TREE_SIDE_EFFECTS (t) = 1;
4942 15322 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4943 :
4944 15322 : if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4945 : {
4946 : /* Find the register save area.
4947 : Prologue of the function save it right above stack frame. */
4948 15257 : type = TREE_TYPE (sav);
4949 15257 : t = make_tree (type, frame_pointer_rtx);
4950 15257 : if (!ix86_varargs_gpr_size)
4951 169 : t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4952 :
4953 15257 : t = build2 (MODIFY_EXPR, type, sav, t);
4954 15257 : TREE_SIDE_EFFECTS (t) = 1;
4955 15257 : expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4956 : }
4957 : }
4958 :
4959 : /* Implement va_arg. */
4960 :
4961 : static tree
4962 52088 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4963 : gimple_seq *post_p)
4964 : {
4965 52088 : static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4966 52088 : tree f_gpr, f_fpr, f_ovf, f_sav;
4967 52088 : tree gpr, fpr, ovf, sav, t;
4968 52088 : int size, rsize;
4969 52088 : tree lab_false, lab_over = NULL_TREE;
4970 52088 : tree addr, t2;
4971 52088 : rtx container;
4972 52088 : int indirect_p = 0;
4973 52088 : tree ptrtype;
4974 52088 : machine_mode nat_mode;
4975 52088 : unsigned int arg_boundary;
4976 52088 : unsigned int type_align;
4977 :
4978 : /* Only 64bit target needs something special. */
4979 52088 : if (is_va_list_char_pointer (TREE_TYPE (valist)))
4980 260 : return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4981 :
4982 51828 : f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4983 51828 : f_fpr = DECL_CHAIN (f_gpr);
4984 51828 : f_ovf = DECL_CHAIN (f_fpr);
4985 51828 : f_sav = DECL_CHAIN (f_ovf);
4986 :
4987 51828 : gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4988 : valist, f_gpr, NULL_TREE);
4989 :
4990 51828 : fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4991 51828 : ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4992 51828 : sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4993 :
4994 51828 : indirect_p = pass_va_arg_by_reference (type);
4995 51828 : if (indirect_p)
4996 103 : type = build_pointer_type (type);
4997 51828 : size = arg_int_size_in_bytes (type);
4998 51828 : rsize = CEIL (size, UNITS_PER_WORD);
4999 :
5000 51828 : nat_mode = type_natural_mode (type, NULL, false);
5001 51828 : switch (nat_mode)
5002 : {
5003 28 : case E_V16HFmode:
5004 28 : case E_V16BFmode:
5005 28 : case E_V8SFmode:
5006 28 : case E_V8SImode:
5007 28 : case E_V32QImode:
5008 28 : case E_V16HImode:
5009 28 : case E_V4DFmode:
5010 28 : case E_V4DImode:
5011 28 : case E_V32HFmode:
5012 28 : case E_V32BFmode:
5013 28 : case E_V16SFmode:
5014 28 : case E_V16SImode:
5015 28 : case E_V64QImode:
5016 28 : case E_V32HImode:
5017 28 : case E_V8DFmode:
5018 28 : case E_V8DImode:
5019 : /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
5020 28 : if (!TARGET_64BIT_MS_ABI)
5021 : {
5022 : container = NULL;
5023 : break;
5024 : }
5025 : /* FALLTHRU */
5026 :
5027 51800 : default:
5028 51800 : container = construct_container (nat_mode, TYPE_MODE (type),
5029 : type, false, X86_64_REGPARM_MAX,
5030 : X86_64_SSE_REGPARM_MAX, intreg, 0);
5031 51800 : break;
5032 : }
5033 :
5034 : /* Pull the value out of the saved registers. */
5035 :
5036 51828 : addr = create_tmp_var (ptr_type_node, "addr");
5037 51828 : type_align = TYPE_ALIGN (type);
5038 :
5039 51828 : if (container)
5040 : {
5041 28735 : int needed_intregs, needed_sseregs;
5042 28735 : bool need_temp;
5043 28735 : tree int_addr, sse_addr;
5044 :
5045 28735 : lab_false = create_artificial_label (UNKNOWN_LOCATION);
5046 28735 : lab_over = create_artificial_label (UNKNOWN_LOCATION);
5047 :
5048 28735 : examine_argument (nat_mode, type, false,
5049 : &needed_intregs, &needed_sseregs);
5050 :
5051 28735 : bool container_in_reg = false;
5052 28735 : if (REG_P (container))
5053 : container_in_reg = true;
5054 1641 : else if (GET_CODE (container) == PARALLEL
5055 1641 : && GET_MODE (container) == BLKmode
5056 580 : && XVECLEN (container, 0) == 1)
5057 : {
5058 : /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
5059 : expression in a TImode register. In this case, temp isn't
5060 : needed. Otherwise, the TImode variable will be put in the
5061 : GPR save area which guarantees only 8-byte alignment. */
5062 509 : rtx x = XVECEXP (container, 0, 0);
5063 509 : if (GET_CODE (x) == EXPR_LIST
5064 509 : && REG_P (XEXP (x, 0))
5065 509 : && XEXP (x, 1) == const0_rtx)
5066 : container_in_reg = true;
5067 : }
5068 :
5069 680 : need_temp = (!container_in_reg
5070 1150 : && ((needed_intregs && TYPE_ALIGN (type) > 64)
5071 680 : || TYPE_ALIGN (type) > 128));
5072 :
5073 : /* In case we are passing structure, verify that it is consecutive block
5074 : on the register save area. If not we need to do moves. */
5075 680 : if (!need_temp && !container_in_reg)
5076 : {
5077 : /* Verify that all registers are strictly consecutive */
5078 966 : if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5079 : {
5080 : int i;
5081 :
5082 815 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5083 : {
5084 529 : rtx slot = XVECEXP (container, 0, i);
5085 529 : if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5086 529 : || INTVAL (XEXP (slot, 1)) != i * 16)
5087 : need_temp = true;
5088 : }
5089 : }
5090 : else
5091 : {
5092 : int i;
5093 :
5094 1120 : for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5095 : {
5096 726 : rtx slot = XVECEXP (container, 0, i);
5097 726 : if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5098 726 : || INTVAL (XEXP (slot, 1)) != i * 8)
5099 : need_temp = true;
5100 : }
5101 : }
5102 : }
5103 28735 : if (!need_temp)
5104 : {
5105 : int_addr = addr;
5106 : sse_addr = addr;
5107 : }
5108 : else
5109 : {
5110 877 : int_addr = create_tmp_var (ptr_type_node, "int_addr");
5111 877 : sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5112 : }
5113 :
5114 : /* First ensure that we fit completely in registers. */
5115 28735 : if (needed_intregs)
5116 : {
5117 18014 : t = build_int_cst (TREE_TYPE (gpr),
5118 18014 : (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5119 18014 : t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5120 18014 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5121 18014 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5122 18014 : gimplify_and_add (t, pre_p);
5123 : }
5124 28735 : if (needed_sseregs)
5125 : {
5126 11113 : t = build_int_cst (TREE_TYPE (fpr),
5127 : (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5128 11113 : + X86_64_REGPARM_MAX * 8);
5129 11113 : t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5130 11113 : t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5131 11113 : t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5132 11113 : gimplify_and_add (t, pre_p);
5133 : }
5134 :
5135 : /* Compute index to start of area used for integer regs. */
5136 28735 : if (needed_intregs)
5137 : {
5138 : /* int_addr = gpr + sav; */
5139 18014 : t = fold_build_pointer_plus (sav, gpr);
5140 18014 : gimplify_assign (int_addr, t, pre_p);
5141 : }
5142 28735 : if (needed_sseregs)
5143 : {
5144 : /* sse_addr = fpr + sav; */
5145 11113 : t = fold_build_pointer_plus (sav, fpr);
5146 11113 : gimplify_assign (sse_addr, t, pre_p);
5147 : }
5148 28735 : if (need_temp)
5149 : {
5150 877 : int i, prev_size = 0;
5151 877 : tree temp = create_tmp_var (type, "va_arg_tmp");
5152 877 : TREE_ADDRESSABLE (temp) = 1;
5153 :
5154 : /* addr = &temp; */
5155 877 : t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5156 877 : gimplify_assign (addr, t, pre_p);
5157 :
5158 2241 : for (i = 0; i < XVECLEN (container, 0); i++)
5159 : {
5160 1364 : rtx slot = XVECEXP (container, 0, i);
5161 1364 : rtx reg = XEXP (slot, 0);
5162 1364 : machine_mode mode = GET_MODE (reg);
5163 1364 : tree piece_type;
5164 1364 : tree addr_type;
5165 1364 : tree daddr_type;
5166 1364 : tree src_addr, src;
5167 1364 : int src_offset;
5168 1364 : tree dest_addr, dest;
5169 1364 : int cur_size = GET_MODE_SIZE (mode);
5170 :
5171 1364 : gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5172 1364 : prev_size = INTVAL (XEXP (slot, 1));
5173 1364 : if (prev_size + cur_size > size)
5174 : {
5175 30 : cur_size = size - prev_size;
5176 30 : unsigned int nbits = cur_size * BITS_PER_UNIT;
5177 30 : if (!int_mode_for_size (nbits, 1).exists (&mode))
5178 10 : mode = QImode;
5179 : }
5180 1364 : piece_type = lang_hooks.types.type_for_mode (mode, 1);
5181 1364 : if (mode == GET_MODE (reg))
5182 1334 : addr_type = build_pointer_type (piece_type);
5183 : else
5184 30 : addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5185 : true);
5186 1364 : daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5187 : true);
5188 :
5189 1364 : if (SSE_REGNO_P (REGNO (reg)))
5190 : {
5191 534 : src_addr = sse_addr;
5192 534 : src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5193 : }
5194 : else
5195 : {
5196 830 : src_addr = int_addr;
5197 830 : src_offset = REGNO (reg) * 8;
5198 : }
5199 1364 : src_addr = fold_convert (addr_type, src_addr);
5200 1364 : src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5201 :
5202 1364 : dest_addr = fold_convert (daddr_type, addr);
5203 1364 : dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5204 2728 : if (cur_size == GET_MODE_SIZE (mode))
5205 : {
5206 1354 : src = build_va_arg_indirect_ref (src_addr);
5207 1354 : dest = build_va_arg_indirect_ref (dest_addr);
5208 :
5209 1354 : gimplify_assign (dest, src, pre_p);
5210 : }
5211 : else
5212 : {
5213 10 : tree copy
5214 20 : = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5215 : 3, dest_addr, src_addr,
5216 10 : size_int (cur_size));
5217 10 : gimplify_and_add (copy, pre_p);
5218 : }
5219 1364 : prev_size += cur_size;
5220 : }
5221 : }
5222 :
5223 28735 : if (needed_intregs)
5224 : {
5225 18014 : t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5226 18014 : build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5227 18014 : gimplify_assign (gpr, t, pre_p);
5228 : /* The GPR save area guarantees only 8-byte alignment. */
5229 18014 : if (!need_temp)
5230 17210 : type_align = MIN (type_align, 64);
5231 : }
5232 :
5233 28735 : if (needed_sseregs)
5234 : {
5235 11113 : t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5236 11113 : build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5237 11113 : gimplify_assign (unshare_expr (fpr), t, pre_p);
5238 : }
5239 :
5240 28735 : gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5241 :
5242 28735 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5243 : }
5244 :
5245 : /* ... otherwise out of the overflow area. */
5246 :
5247 : /* When we align parameter on stack for caller, if the parameter
5248 : alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5249 : aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5250 : here with caller. */
5251 51828 : arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5252 51828 : if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5253 : arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5254 :
5255 : /* Care for on-stack alignment if needed. */
5256 51828 : if (arg_boundary <= 64 || size == 0)
5257 34789 : t = ovf;
5258 : else
5259 : {
5260 17039 : HOST_WIDE_INT align = arg_boundary / 8;
5261 17039 : t = fold_build_pointer_plus_hwi (ovf, align - 1);
5262 17039 : t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5263 17039 : build_int_cst (TREE_TYPE (t), -align));
5264 : }
5265 :
5266 51828 : gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5267 51828 : gimplify_assign (addr, t, pre_p);
5268 :
5269 51828 : t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5270 51828 : gimplify_assign (unshare_expr (ovf), t, pre_p);
5271 :
5272 51828 : if (container)
5273 28735 : gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5274 :
5275 51828 : type = build_aligned_type (type, type_align);
5276 51828 : ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5277 51828 : addr = fold_convert (ptrtype, addr);
5278 :
5279 51828 : if (indirect_p)
5280 103 : addr = build_va_arg_indirect_ref (addr);
5281 51828 : return build_va_arg_indirect_ref (addr);
5282 : }
5283 :
5284 : /* Return true if OPNUM's MEM should be matched
5285 : in movabs* patterns. */
5286 :
5287 : bool
5288 480 : ix86_check_movabs (rtx insn, int opnum)
5289 : {
5290 480 : rtx set, mem;
5291 :
5292 480 : set = PATTERN (insn);
5293 480 : if (GET_CODE (set) == PARALLEL)
5294 0 : set = XVECEXP (set, 0, 0);
5295 480 : gcc_assert (GET_CODE (set) == SET);
5296 480 : mem = XEXP (set, opnum);
5297 480 : while (SUBREG_P (mem))
5298 0 : mem = SUBREG_REG (mem);
5299 480 : gcc_assert (MEM_P (mem));
5300 480 : return volatile_ok || !MEM_VOLATILE_P (mem);
5301 : }
5302 :
5303 : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5304 : bool
5305 222383 : ix86_check_movs (rtx insn, int idx)
5306 : {
5307 222383 : rtx pat = PATTERN (insn);
5308 222383 : gcc_assert (GET_CODE (pat) == PARALLEL);
5309 :
5310 222383 : rtx set = XVECEXP (pat, 0, idx);
5311 222383 : gcc_assert (GET_CODE (set) == SET);
5312 :
5313 222383 : rtx dst = SET_DEST (set);
5314 222383 : gcc_assert (MEM_P (dst));
5315 :
5316 222383 : rtx src = SET_SRC (set);
5317 222383 : gcc_assert (MEM_P (src));
5318 :
5319 222383 : return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5320 444766 : && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5321 0 : || Pmode == word_mode));
5322 : }
5323 :
5324 : /* Return false if INSN contains a MEM with a non-default address space. */
5325 : bool
5326 65420 : ix86_check_no_addr_space (rtx insn)
5327 : {
5328 65420 : subrtx_var_iterator::array_type array;
5329 1439692 : FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5330 : {
5331 1374272 : rtx x = *iter;
5332 1505112 : if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5333 0 : return false;
5334 : }
5335 65420 : return true;
5336 65420 : }
5337 :
5338 : /* Initialize the table of extra 80387 mathematical constants. */
5339 :
5340 : static void
5341 2345 : init_ext_80387_constants (void)
5342 : {
5343 2345 : static const char * cst[5] =
5344 : {
5345 : "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5346 : "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5347 : "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5348 : "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5349 : "3.1415926535897932385128089594061862044", /* 4: fldpi */
5350 : };
5351 2345 : int i;
5352 :
5353 14070 : for (i = 0; i < 5; i++)
5354 : {
5355 11725 : real_from_string (&ext_80387_constants_table[i], cst[i]);
5356 : /* Ensure each constant is rounded to XFmode precision. */
5357 11725 : real_convert (&ext_80387_constants_table[i],
5358 23450 : XFmode, &ext_80387_constants_table[i]);
5359 : }
5360 :
5361 2345 : ext_80387_constants_init = 1;
5362 2345 : }
5363 :
5364 : /* Return non-zero if the constant is something that
5365 : can be loaded with a special instruction. */
5366 :
5367 : int
5368 5034801 : standard_80387_constant_p (rtx x)
5369 : {
5370 5034801 : machine_mode mode = GET_MODE (x);
5371 :
5372 5034801 : const REAL_VALUE_TYPE *r;
5373 :
5374 5034801 : if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5375 : return -1;
5376 :
5377 4574524 : if (x == CONST0_RTX (mode))
5378 : return 1;
5379 2113290 : if (x == CONST1_RTX (mode))
5380 : return 2;
5381 :
5382 1229868 : r = CONST_DOUBLE_REAL_VALUE (x);
5383 :
5384 : /* For XFmode constants, try to find a special 80387 instruction when
5385 : optimizing for size or on those CPUs that benefit from them. */
5386 1229868 : if (mode == XFmode
5387 796436 : && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5388 2026304 : && !flag_rounding_math)
5389 : {
5390 788386 : int i;
5391 :
5392 788386 : if (! ext_80387_constants_init)
5393 2338 : init_ext_80387_constants ();
5394 :
5395 4719794 : for (i = 0; i < 5; i++)
5396 3940249 : if (real_identical (r, &ext_80387_constants_table[i]))
5397 8841 : return i + 3;
5398 : }
5399 :
5400 : /* Load of the constant -0.0 or -1.0 will be split as
5401 : fldz;fchs or fld1;fchs sequence. */
5402 1221027 : if (real_isnegzero (r))
5403 : return 8;
5404 1204537 : if (real_identical (r, &dconstm1))
5405 301825 : return 9;
5406 :
5407 : return 0;
5408 : }
5409 :
5410 : /* Return the opcode of the special instruction to be used to load
5411 : the constant X. */
5412 :
5413 : const char *
5414 54446 : standard_80387_constant_opcode (rtx x)
5415 : {
5416 54446 : switch (standard_80387_constant_p (x))
5417 : {
5418 : case 1:
5419 : return "fldz";
5420 33979 : case 2:
5421 33979 : return "fld1";
5422 1 : case 3:
5423 1 : return "fldlg2";
5424 10 : case 4:
5425 10 : return "fldln2";
5426 12 : case 5:
5427 12 : return "fldl2e";
5428 2 : case 6:
5429 2 : return "fldl2t";
5430 192 : case 7:
5431 192 : return "fldpi";
5432 0 : case 8:
5433 0 : case 9:
5434 0 : return "#";
5435 0 : default:
5436 0 : gcc_unreachable ();
5437 : }
5438 : }
5439 :
5440 : /* Return the CONST_DOUBLE representing the 80387 constant that is
5441 : loaded by the specified special instruction. The argument IDX
5442 : matches the return value from standard_80387_constant_p. */
5443 :
5444 : rtx
5445 24 : standard_80387_constant_rtx (int idx)
5446 : {
5447 24 : int i;
5448 :
5449 24 : if (! ext_80387_constants_init)
5450 7 : init_ext_80387_constants ();
5451 :
5452 24 : switch (idx)
5453 : {
5454 24 : case 3:
5455 24 : case 4:
5456 24 : case 5:
5457 24 : case 6:
5458 24 : case 7:
5459 24 : i = idx - 3;
5460 24 : break;
5461 :
5462 0 : default:
5463 0 : gcc_unreachable ();
5464 : }
5465 :
5466 24 : return const_double_from_real_value (ext_80387_constants_table[i],
5467 24 : XFmode);
5468 : }
5469 :
5470 : /* Return 1 if X is all bits 0, 2 if X is all bits 1
5471 : and 3 if X is all bits 1 with zero extend
5472 : in supported SSE/AVX vector mode. */
5473 :
5474 : int
5475 55055238 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
5476 : {
5477 55055238 : machine_mode mode;
5478 :
5479 55055238 : if (!TARGET_SSE)
5480 : return 0;
5481 :
5482 54886207 : mode = GET_MODE (x);
5483 :
5484 54886207 : if (x == const0_rtx || const0_operand (x, mode))
5485 13105980 : return 1;
5486 :
5487 41780227 : if (x == constm1_rtx
5488 41640800 : || vector_all_ones_operand (x, mode)
5489 82853722 : || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5490 34446074 : || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5491 6628368 : && float_vector_all_ones_operand (x, mode)))
5492 : {
5493 : /* VOIDmode integer constant, get mode from the predicate. */
5494 708823 : if (mode == VOIDmode)
5495 139427 : mode = pred_mode;
5496 :
5497 1417646 : switch (GET_MODE_SIZE (mode))
5498 : {
5499 30534 : case 64:
5500 30534 : if (TARGET_AVX512F)
5501 : return 2;
5502 : break;
5503 39939 : case 32:
5504 39939 : if (TARGET_AVX2)
5505 : return 2;
5506 : break;
5507 625957 : case 16:
5508 625957 : if (TARGET_SSE2)
5509 : return 2;
5510 : break;
5511 0 : case 0:
5512 : /* VOIDmode */
5513 0 : gcc_unreachable ();
5514 : default:
5515 : break;
5516 : }
5517 : }
5518 :
5519 41084728 : if (vector_all_ones_zero_extend_half_operand (x, mode)
5520 41084728 : || vector_all_ones_zero_extend_quarter_operand (x, mode))
5521 706 : return 3;
5522 :
5523 : return 0;
5524 : }
5525 :
5526 : /* Return the opcode of the special instruction to be used to load
5527 : the constant operands[1] into operands[0]. */
5528 :
5529 : const char *
5530 464169 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5531 : {
5532 464169 : machine_mode mode;
5533 464169 : rtx x = operands[1];
5534 :
5535 464169 : gcc_assert (TARGET_SSE);
5536 :
5537 464169 : mode = GET_MODE (x);
5538 :
5539 464169 : if (x == const0_rtx || const0_operand (x, mode))
5540 : {
5541 452524 : switch (get_attr_mode (insn))
5542 : {
5543 434865 : case MODE_TI:
5544 434865 : if (!EXT_REX_SSE_REG_P (operands[0]))
5545 : return "%vpxor\t%0, %d0";
5546 : /* FALLTHRU */
5547 6168 : case MODE_XI:
5548 6168 : case MODE_OI:
5549 6168 : if (EXT_REX_SSE_REG_P (operands[0]))
5550 : {
5551 67 : if (TARGET_AVX512VL)
5552 : return "vpxord\t%x0, %x0, %x0";
5553 : else
5554 28 : return "vpxord\t%g0, %g0, %g0";
5555 : }
5556 : return "vpxor\t%x0, %x0, %x0";
5557 :
5558 2098 : case MODE_V2DF:
5559 2098 : if (!EXT_REX_SSE_REG_P (operands[0]))
5560 : return "%vxorpd\t%0, %d0";
5561 : /* FALLTHRU */
5562 829 : case MODE_V8DF:
5563 829 : case MODE_V4DF:
5564 829 : if (EXT_REX_SSE_REG_P (operands[0]))
5565 : {
5566 4 : if (TARGET_AVX512DQ)
5567 : {
5568 0 : if (TARGET_AVX512VL)
5569 : return "vxorpd\t%x0, %x0, %x0";
5570 : else
5571 0 : return "vxorpd\t%g0, %g0, %g0";
5572 : }
5573 : else
5574 : {
5575 4 : if (TARGET_AVX512VL)
5576 : return "vpxorq\t%x0, %x0, %x0";
5577 : else
5578 4 : return "vpxorq\t%g0, %g0, %g0";
5579 : }
5580 : }
5581 : return "vxorpd\t%x0, %x0, %x0";
5582 :
5583 6617 : case MODE_V4SF:
5584 6617 : if (!EXT_REX_SSE_REG_P (operands[0]))
5585 : return "%vxorps\t%0, %d0";
5586 : /* FALLTHRU */
5587 2011 : case MODE_V16SF:
5588 2011 : case MODE_V8SF:
5589 2011 : if (EXT_REX_SSE_REG_P (operands[0]))
5590 : {
5591 68 : if (TARGET_AVX512DQ)
5592 : {
5593 26 : if (TARGET_AVX512VL)
5594 : return "vxorps\t%x0, %x0, %x0";
5595 : else
5596 0 : return "vxorps\t%g0, %g0, %g0";
5597 : }
5598 : else
5599 : {
5600 42 : if (TARGET_AVX512VL)
5601 : return "vpxord\t%x0, %x0, %x0";
5602 : else
5603 40 : return "vpxord\t%g0, %g0, %g0";
5604 : }
5605 : }
5606 : return "vxorps\t%x0, %x0, %x0";
5607 :
5608 0 : default:
5609 0 : gcc_unreachable ();
5610 : }
5611 : }
5612 11645 : else if (x == constm1_rtx
5613 11634 : || vector_all_ones_operand (x, mode)
5614 11712 : || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5615 45 : && float_vector_all_ones_operand (x, mode)))
5616 : {
5617 11623 : enum attr_mode insn_mode = get_attr_mode (insn);
5618 :
5619 11623 : switch (insn_mode)
5620 : {
5621 3 : case MODE_XI:
5622 3 : case MODE_V8DF:
5623 3 : case MODE_V16SF:
5624 3 : gcc_assert (TARGET_AVX512F);
5625 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5626 :
5627 959 : case MODE_OI:
5628 959 : case MODE_V4DF:
5629 959 : case MODE_V8SF:
5630 959 : gcc_assert (TARGET_AVX2);
5631 : /* FALLTHRU */
5632 11620 : case MODE_TI:
5633 11620 : case MODE_V2DF:
5634 11620 : case MODE_V4SF:
5635 11620 : gcc_assert (TARGET_SSE2);
5636 11620 : if (EXT_REX_SSE_REG_P (operands[0]))
5637 : {
5638 2 : if (TARGET_AVX512VL)
5639 : return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5640 : else
5641 0 : return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5642 : }
5643 11618 : return (TARGET_AVX
5644 11618 : ? "vpcmpeqd\t%0, %0, %0"
5645 11618 : : "pcmpeqd\t%0, %0");
5646 :
5647 0 : default:
5648 0 : gcc_unreachable ();
5649 : }
5650 : }
5651 22 : else if (vector_all_ones_zero_extend_half_operand (x, mode))
5652 : {
5653 40 : if (GET_MODE_SIZE (mode) == 64)
5654 : {
5655 5 : gcc_assert (TARGET_AVX512F);
5656 : return "vpcmpeqd\t%t0, %t0, %t0";
5657 : }
5658 30 : else if (GET_MODE_SIZE (mode) == 32)
5659 : {
5660 15 : gcc_assert (TARGET_AVX);
5661 : return "vpcmpeqd\t%x0, %x0, %x0";
5662 : }
5663 0 : gcc_unreachable ();
5664 : }
5665 2 : else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5666 : {
5667 2 : gcc_assert (TARGET_AVX512F);
5668 : return "vpcmpeqd\t%x0, %x0, %x0";
5669 : }
5670 :
5671 0 : gcc_unreachable ();
5672 : }
5673 :
5674 : /* Returns true if INSN can be transformed from a memory load
5675 : to a supported FP constant load. */
5676 :
5677 : bool
5678 2144820 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5679 : {
5680 2144820 : rtx src = find_constant_src (insn);
5681 :
5682 2144820 : gcc_assert (REG_P (dst));
5683 :
5684 2144820 : if (src == NULL
5685 595991 : || (SSE_REGNO_P (REGNO (dst))
5686 463968 : && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5687 165919 : || (!TARGET_AVX512VL
5688 165858 : && EXT_REX_SSE_REGNO_P (REGNO (dst))
5689 0 : && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5690 2310739 : || (STACK_REGNO_P (REGNO (dst))
5691 132023 : && standard_80387_constant_p (src) < 1))
5692 2068335 : return false;
5693 :
5694 : return true;
5695 : }
5696 :
5697 : /* Predicate for pre-reload splitters with associated instructions,
5698 : which can match any time before the split1 pass (usually combine),
5699 : then are unconditionally split in that pass and should not be
5700 : matched again afterwards. */
5701 :
5702 : bool
5703 17909070 : ix86_pre_reload_split (void)
5704 : {
5705 17909070 : return (can_create_pseudo_p ()
5706 27315368 : && !(cfun->curr_properties & PROP_rtl_split_insns));
5707 : }
5708 :
5709 : /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5710 : or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5711 : TARGET_AVX512VL or it is a register to register move which can
5712 : be done with zmm register move. */
5713 :
5714 : static const char *
5715 4188696 : ix86_get_ssemov (rtx *operands, unsigned size,
5716 : enum attr_mode insn_mode, machine_mode mode)
5717 : {
5718 4188696 : char buf[128];
5719 4188696 : bool misaligned_p = (misaligned_operand (operands[0], mode)
5720 4188696 : || misaligned_operand (operands[1], mode));
5721 4188696 : bool evex_reg_p = (size == 64
5722 4102318 : || EXT_REX_SSE_REG_P (operands[0])
5723 8290273 : || EXT_REX_SSE_REG_P (operands[1]));
5724 :
5725 4188696 : bool egpr_p = (TARGET_APX_EGPR
5726 4188696 : && (x86_extended_rex2reg_mentioned_p (operands[0])
5727 183 : || x86_extended_rex2reg_mentioned_p (operands[1])));
5728 196 : bool egpr_vl = egpr_p && TARGET_AVX512VL;
5729 :
5730 4188696 : machine_mode scalar_mode;
5731 :
5732 4188696 : const char *opcode = NULL;
5733 4188696 : enum
5734 : {
5735 : opcode_int,
5736 : opcode_float,
5737 : opcode_double
5738 4188696 : } type = opcode_int;
5739 :
5740 4188696 : switch (insn_mode)
5741 : {
5742 : case MODE_V16SF:
5743 : case MODE_V8SF:
5744 : case MODE_V4SF:
5745 : scalar_mode = E_SFmode;
5746 : type = opcode_float;
5747 : break;
5748 209257 : case MODE_V8DF:
5749 209257 : case MODE_V4DF:
5750 209257 : case MODE_V2DF:
5751 209257 : scalar_mode = E_DFmode;
5752 209257 : type = opcode_double;
5753 209257 : break;
5754 1520081 : case MODE_XI:
5755 1520081 : case MODE_OI:
5756 1520081 : case MODE_TI:
5757 1520081 : scalar_mode = GET_MODE_INNER (mode);
5758 : break;
5759 0 : default:
5760 0 : gcc_unreachable ();
5761 : }
5762 :
5763 : /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5764 : we can only use zmm register move without memory operand. */
5765 4188696 : if (evex_reg_p
5766 88427 : && !TARGET_AVX512VL
5767 4238817 : && GET_MODE_SIZE (mode) < 64)
5768 : {
5769 : /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5770 : xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5771 : AVX512VL is disabled, LRA can still generate reg to
5772 : reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5773 : modes. */
5774 0 : if (memory_operand (operands[0], mode)
5775 0 : || memory_operand (operands[1], mode))
5776 0 : gcc_unreachable ();
5777 0 : size = 64;
5778 0 : switch (type)
5779 : {
5780 0 : case opcode_int:
5781 0 : if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5782 0 : opcode = (misaligned_p
5783 0 : ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5784 : : "vmovdqa64");
5785 : else
5786 0 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5787 : break;
5788 0 : case opcode_float:
5789 0 : opcode = misaligned_p ? "vmovups" : "vmovaps";
5790 : break;
5791 0 : case opcode_double:
5792 0 : opcode = misaligned_p ? "vmovupd" : "vmovapd";
5793 : break;
5794 : }
5795 : }
5796 4188696 : else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5797 : {
5798 2844557 : switch (scalar_mode)
5799 : {
5800 36750 : case E_HFmode:
5801 36750 : case E_BFmode:
5802 36750 : if (evex_reg_p || egpr_vl)
5803 11597 : opcode = (misaligned_p
5804 173 : ? (TARGET_AVX512BW
5805 : ? "vmovdqu16"
5806 : : "vmovdqu64")
5807 : : "vmovdqa64");
5808 25153 : else if (egpr_p)
5809 803435 : opcode = (misaligned_p
5810 0 : ? (TARGET_AVX512BW
5811 0 : ? "vmovdqu16"
5812 : : "%vmovups")
5813 : : "%vmovaps");
5814 : else
5815 425679 : opcode = (misaligned_p
5816 25153 : ? (TARGET_AVX512BW && evex_reg_p
5817 : ? "vmovdqu16"
5818 : : "%vmovdqu")
5819 : : "%vmovdqa");
5820 : break;
5821 2459358 : case E_SFmode:
5822 2459358 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5823 : break;
5824 209257 : case E_DFmode:
5825 209257 : opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5826 : break;
5827 139192 : case E_TFmode:
5828 139192 : if (evex_reg_p || egpr_vl)
5829 14 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5830 139178 : else if (egpr_p)
5831 0 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5832 : else
5833 139178 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5834 : break;
5835 0 : default:
5836 0 : gcc_unreachable ();
5837 : }
5838 : }
5839 1344139 : else if (SCALAR_INT_MODE_P (scalar_mode))
5840 : {
5841 1344139 : switch (scalar_mode)
5842 : {
5843 106732 : case E_QImode:
5844 106732 : if (evex_reg_p || egpr_vl)
5845 4198907 : opcode = (misaligned_p
5846 10211 : ? (TARGET_AVX512BW
5847 5071 : ? "vmovdqu8"
5848 : : "vmovdqu64")
5849 : : "vmovdqa64");
5850 96521 : else if (egpr_p)
5851 30 : opcode = (misaligned_p
5852 0 : ? (TARGET_AVX512BW
5853 : ? "vmovdqu8"
5854 : : "%vmovups")
5855 : : "%vmovaps");
5856 : else
5857 96491 : opcode = (misaligned_p
5858 : ? (TARGET_AVX512BW && evex_reg_p
5859 : ? "vmovdqu8"
5860 : : "%vmovdqu")
5861 : : "%vmovdqa");
5862 : break;
5863 42594 : case E_HImode:
5864 42594 : if (evex_reg_p || egpr_vl)
5865 3716 : opcode = (misaligned_p
5866 300 : ? (TARGET_AVX512BW
5867 : ? "vmovdqu16"
5868 : : "vmovdqu64")
5869 : : "vmovdqa64");
5870 38878 : else if (egpr_p)
5871 803435 : opcode = (misaligned_p
5872 27 : ? (TARGET_AVX512BW
5873 0 : ? "vmovdqu16"
5874 : : "%vmovups")
5875 : : "%vmovaps");
5876 : else
5877 400526 : opcode = (misaligned_p
5878 38851 : ? (TARGET_AVX512BW && evex_reg_p
5879 : ? "vmovdqu16"
5880 : : "%vmovdqu")
5881 : : "%vmovdqa");
5882 : break;
5883 182029 : case E_SImode:
5884 182029 : if (evex_reg_p || egpr_vl)
5885 8200 : opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5886 173829 : else if (egpr_p)
5887 14 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5888 : else
5889 173815 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5890 : break;
5891 1000976 : case E_DImode:
5892 1000976 : case E_TImode:
5893 1000976 : case E_OImode:
5894 1000976 : if (evex_reg_p || egpr_vl)
5895 18500 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5896 982476 : else if (egpr_p)
5897 26 : opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5898 : else
5899 982450 : opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5900 : break;
5901 11808 : case E_XImode:
5902 49491 : opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5903 : break;
5904 0 : default:
5905 0 : gcc_unreachable ();
5906 : }
5907 : }
5908 : else
5909 0 : gcc_unreachable ();
5910 :
5911 4188696 : switch (size)
5912 : {
5913 86378 : case 64:
5914 86378 : snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5915 : opcode);
5916 86378 : break;
5917 91643 : case 32:
5918 91643 : snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5919 : opcode);
5920 91643 : break;
5921 4010675 : case 16:
5922 4010675 : snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5923 : opcode);
5924 4010675 : break;
5925 0 : default:
5926 0 : gcc_unreachable ();
5927 : }
5928 4188696 : output_asm_insn (buf, operands);
5929 4188696 : return "";
5930 : }
5931 :
5932 : /* Return the template of the TYPE_SSEMOV instruction to move
5933 : operands[1] into operands[0]. */
5934 :
5935 : const char *
5936 6565236 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5937 : {
5938 6565236 : machine_mode mode = GET_MODE (operands[0]);
5939 6565236 : if (get_attr_type (insn) != TYPE_SSEMOV
5940 6565236 : || mode != GET_MODE (operands[1]))
5941 0 : gcc_unreachable ();
5942 :
5943 6565236 : enum attr_mode insn_mode = get_attr_mode (insn);
5944 :
5945 6565236 : switch (insn_mode)
5946 : {
5947 86378 : case MODE_XI:
5948 86378 : case MODE_V8DF:
5949 86378 : case MODE_V16SF:
5950 86378 : return ix86_get_ssemov (operands, 64, insn_mode, mode);
5951 :
5952 91643 : case MODE_OI:
5953 91643 : case MODE_V4DF:
5954 91643 : case MODE_V8SF:
5955 91643 : return ix86_get_ssemov (operands, 32, insn_mode, mode);
5956 :
5957 4010675 : case MODE_TI:
5958 4010675 : case MODE_V2DF:
5959 4010675 : case MODE_V4SF:
5960 4010675 : return ix86_get_ssemov (operands, 16, insn_mode, mode);
5961 :
5962 662429 : case MODE_DI:
5963 : /* Handle broken assemblers that require movd instead of movq. */
5964 662429 : if (GENERAL_REG_P (operands[0]))
5965 : {
5966 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5967 : return "%vmovq\t{%1, %q0|%q0, %1}";
5968 : else
5969 : return "%vmovd\t{%1, %q0|%q0, %1}";
5970 : }
5971 586224 : else if (GENERAL_REG_P (operands[1]))
5972 : {
5973 : if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5974 : return "%vmovq\t{%q1, %0|%0, %q1}";
5975 : else
5976 : return "%vmovd\t{%q1, %0|%0, %q1}";
5977 : }
5978 : else
5979 420082 : return "%vmovq\t{%1, %0|%0, %1}";
5980 :
5981 201352 : case MODE_SI:
5982 201352 : if (GENERAL_REG_P (operands[0]))
5983 : return "%vmovd\t{%1, %k0|%k0, %1}";
5984 145341 : else if (GENERAL_REG_P (operands[1]))
5985 : return "%vmovd\t{%k1, %0|%0, %k1}";
5986 : else
5987 60662 : return "%vmovd\t{%1, %0|%0, %1}";
5988 :
5989 54085 : case MODE_HI:
5990 54085 : if (GENERAL_REG_P (operands[0]))
5991 : return "vmovw\t{%1, %k0|%k0, %1}";
5992 53922 : else if (GENERAL_REG_P (operands[1]))
5993 : return "vmovw\t{%k1, %0|%0, %k1}";
5994 : else
5995 53688 : return "vmovw\t{%1, %0|%0, %1}";
5996 :
5997 780993 : case MODE_DF:
5998 780993 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5999 : return "vmovsd\t{%d1, %0|%0, %d1}";
6000 : else
6001 780170 : return "%vmovsd\t{%1, %0|%0, %1}";
6002 :
6003 673614 : case MODE_SF:
6004 673614 : if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
6005 : return "vmovss\t{%d1, %0|%0, %d1}";
6006 : else
6007 673046 : return "%vmovss\t{%1, %0|%0, %1}";
6008 :
6009 96 : case MODE_HF:
6010 96 : case MODE_BF:
6011 96 : if (REG_P (operands[0]) && REG_P (operands[1]))
6012 : return "vmovsh\t{%d1, %0|%0, %d1}";
6013 : else
6014 0 : return "vmovsh\t{%1, %0|%0, %1}";
6015 :
6016 36 : case MODE_V1DF:
6017 36 : gcc_assert (!TARGET_AVX);
6018 : return "movlpd\t{%1, %0|%0, %1}";
6019 :
6020 3935 : case MODE_V2SF:
6021 3935 : if (TARGET_AVX && REG_P (operands[0]))
6022 : return "vmovlps\t{%1, %d0|%d0, %1}";
6023 : else
6024 3862 : return "%vmovlps\t{%1, %0|%0, %1}";
6025 :
6026 0 : default:
6027 0 : gcc_unreachable ();
6028 : }
6029 : }
6030 :
6031 : /* Returns true if OP contains a symbol reference */
6032 :
6033 : bool
6034 583867066 : symbolic_reference_mentioned_p (const_rtx op)
6035 : {
6036 583867066 : const char *fmt;
6037 583867066 : int i;
6038 :
6039 583867066 : if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
6040 : return true;
6041 :
6042 441335511 : fmt = GET_RTX_FORMAT (GET_CODE (op));
6043 748818725 : for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6044 : {
6045 597460605 : if (fmt[i] == 'E')
6046 : {
6047 2020273 : int j;
6048 :
6049 4041105 : for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6050 3327260 : if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6051 : return true;
6052 : }
6053 :
6054 595440332 : else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6055 : return true;
6056 : }
6057 :
6058 : return false;
6059 : }
6060 :
6061 : /* Return true if it is appropriate to emit `ret' instructions in the
6062 : body of a function. Do this only if the epilogue is simple, needing a
6063 : couple of insns. Prior to reloading, we can't tell how many registers
6064 : must be saved, so return false then. Return false if there is no frame
6065 : marker to de-allocate. */
6066 :
6067 : bool
6068 0 : ix86_can_use_return_insn_p (void)
6069 : {
6070 0 : if (ix86_function_ms_hook_prologue (current_function_decl))
6071 : return false;
6072 :
6073 0 : if (ix86_function_naked (current_function_decl))
6074 : return false;
6075 :
6076 : /* Don't use `ret' instruction in interrupt handler. */
6077 0 : if (! reload_completed
6078 0 : || frame_pointer_needed
6079 0 : || cfun->machine->func_type != TYPE_NORMAL)
6080 : return 0;
6081 :
6082 : /* Don't allow more than 32k pop, since that's all we can do
6083 : with one instruction. */
6084 0 : if (crtl->args.pops_args && crtl->args.size >= 32768)
6085 : return 0;
6086 :
6087 0 : struct ix86_frame &frame = cfun->machine->frame;
6088 0 : return (frame.stack_pointer_offset == UNITS_PER_WORD
6089 0 : && (frame.nregs + frame.nsseregs) == 0);
6090 : }
6091 :
6092 : /* Return stack frame size. get_frame_size () returns used stack slots
6093 : during compilation, which may be optimized out later. If stack frame
6094 : is needed, stack_frame_required should be true. */
6095 :
6096 : static HOST_WIDE_INT
6097 8243772 : ix86_get_frame_size (void)
6098 : {
6099 8243772 : if (cfun->machine->stack_frame_required)
6100 8174415 : return get_frame_size ();
6101 : else
6102 : return 0;
6103 : }
6104 :
6105 : /* Value should be nonzero if functions must have frame pointers.
6106 : Zero means the frame pointer need not be set up (and parms may
6107 : be accessed via the stack pointer) in functions that seem suitable. */
6108 :
6109 : static bool
6110 1227961 : ix86_frame_pointer_required (void)
6111 : {
6112 : /* If we accessed previous frames, then the generated code expects
6113 : to be able to access the saved ebp value in our frame. */
6114 1227961 : if (cfun->machine->accesses_prev_frame)
6115 : return true;
6116 :
6117 : /* Several x86 os'es need a frame pointer for other reasons,
6118 : usually pertaining to setjmp. */
6119 1227928 : if (SUBTARGET_FRAME_POINTER_REQUIRED)
6120 : return true;
6121 :
6122 : /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6123 1227928 : if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6124 : return true;
6125 :
6126 : /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6127 : allocation is 4GB. */
6128 1227928 : if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6129 : return true;
6130 :
6131 : /* SSE saves require frame-pointer when stack is misaligned. */
6132 1227928 : if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6133 : return true;
6134 :
6135 : /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6136 : turns off the frame pointer by default. Turn it back on now if
6137 : we've not got a leaf function. */
6138 1227927 : if (TARGET_OMIT_LEAF_FRAME_POINTER
6139 1227927 : && (!crtl->is_leaf
6140 0 : || ix86_current_function_calls_tls_descriptor))
6141 0 : return true;
6142 :
6143 : /* Several versions of mcount for the x86 assumes that there is a
6144 : frame, so we cannot allow profiling without a frame pointer. */
6145 1227927 : if (crtl->profile && !flag_fentry)
6146 : return true;
6147 :
6148 : return false;
6149 : }
6150 :
6151 : /* Record that the current function accesses previous call frames. */
6152 :
6153 : void
6154 966 : ix86_setup_frame_addresses (void)
6155 : {
6156 966 : cfun->machine->accesses_prev_frame = 1;
6157 966 : }
6158 :
6159 : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6160 : # define USE_HIDDEN_LINKONCE 1
6161 : #else
6162 : # define USE_HIDDEN_LINKONCE 0
6163 : #endif
6164 :
6165 : /* Label count for call and return thunks. It is used to make unique
6166 : labels in call and return thunks. */
6167 : static int indirectlabelno;
6168 :
6169 : /* True if call thunk function is needed. */
6170 : static bool indirect_thunk_needed = false;
6171 :
6172 : /* Bit masks of integer registers, which contain branch target, used
6173 : by call thunk functions. */
6174 : static HARD_REG_SET indirect_thunks_used;
6175 :
6176 : /* True if return thunk function is needed. */
6177 : static bool indirect_return_needed = false;
6178 :
6179 : /* True if return thunk function via CX is needed. */
6180 : static bool indirect_return_via_cx;
6181 :
6182 : #ifndef INDIRECT_LABEL
6183 : # define INDIRECT_LABEL "LIND"
6184 : #endif
6185 :
6186 : /* Indicate what prefix is needed for an indirect branch. */
6187 : enum indirect_thunk_prefix
6188 : {
6189 : indirect_thunk_prefix_none,
6190 : indirect_thunk_prefix_nt
6191 : };
6192 :
6193 : /* Return the prefix needed for an indirect branch INSN. */
6194 :
6195 : enum indirect_thunk_prefix
6196 67 : indirect_thunk_need_prefix (rtx_insn *insn)
6197 : {
6198 67 : enum indirect_thunk_prefix need_prefix;
6199 67 : if ((cfun->machine->indirect_branch_type
6200 67 : == indirect_branch_thunk_extern)
6201 67 : && ix86_notrack_prefixed_insn_p (insn))
6202 : {
6203 : /* NOTRACK prefix is only used with external thunk so that it
6204 : can be properly updated to support CET at run-time. */
6205 : need_prefix = indirect_thunk_prefix_nt;
6206 : }
6207 : else
6208 : need_prefix = indirect_thunk_prefix_none;
6209 67 : return need_prefix;
6210 : }
6211 :
6212 : /* Fills in the label name that should be used for the indirect thunk. */
6213 :
6214 : static void
6215 73 : indirect_thunk_name (char name[32], unsigned int regno,
6216 : enum indirect_thunk_prefix need_prefix,
6217 : bool ret_p)
6218 : {
6219 73 : if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6220 0 : gcc_unreachable ();
6221 :
6222 73 : if (USE_HIDDEN_LINKONCE)
6223 : {
6224 73 : const char *prefix;
6225 :
6226 73 : if (need_prefix == indirect_thunk_prefix_nt
6227 73 : && regno != INVALID_REGNUM)
6228 : {
6229 : /* NOTRACK prefix is only used with external thunk via
6230 : register so that NOTRACK prefix can be added to indirect
6231 : branch via register to support CET at run-time. */
6232 : prefix = "_nt";
6233 : }
6234 : else
6235 71 : prefix = "";
6236 :
6237 73 : const char *ret = ret_p ? "return" : "indirect";
6238 :
6239 73 : if (regno != INVALID_REGNUM)
6240 : {
6241 55 : const char *reg_prefix;
6242 55 : if (LEGACY_INT_REGNO_P (regno))
6243 53 : reg_prefix = TARGET_64BIT ? "r" : "e";
6244 : else
6245 : reg_prefix = "";
6246 55 : sprintf (name, "__x86_%s_thunk%s_%s%s",
6247 : ret, prefix, reg_prefix, reg_names[regno]);
6248 : }
6249 : else
6250 18 : sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6251 : }
6252 : else
6253 : {
6254 : if (regno != INVALID_REGNUM)
6255 : ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6256 : else
6257 : {
6258 : if (ret_p)
6259 : ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6260 : else
6261 73 : ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6262 : }
6263 : }
6264 73 : }
6265 :
6266 : /* Output a call and return thunk for indirect branch. If REGNO != -1,
6267 : the function address is in REGNO and the call and return thunk looks like:
6268 :
6269 : call L2
6270 : L1:
6271 : pause
6272 : lfence
6273 : jmp L1
6274 : L2:
6275 : mov %REG, (%sp)
6276 : ret
6277 :
6278 : Otherwise, the function address is on the top of stack and the
6279 : call and return thunk looks like:
6280 :
6281 : call L2
6282 : L1:
6283 : pause
6284 : lfence
6285 : jmp L1
6286 : L2:
6287 : lea WORD_SIZE(%sp), %sp
6288 : ret
6289 : */
6290 :
6291 : static void
6292 38 : output_indirect_thunk (unsigned int regno)
6293 : {
6294 38 : char indirectlabel1[32];
6295 38 : char indirectlabel2[32];
6296 :
6297 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6298 : indirectlabelno++);
6299 38 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6300 : indirectlabelno++);
6301 :
6302 : /* Call */
6303 38 : fputs ("\tcall\t", asm_out_file);
6304 38 : assemble_name_raw (asm_out_file, indirectlabel2);
6305 38 : fputc ('\n', asm_out_file);
6306 :
6307 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6308 :
6309 : /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6310 : Usage of both pause + lfence is compromise solution. */
6311 38 : fprintf (asm_out_file, "\tpause\n\tlfence\n");
6312 :
6313 : /* Jump. */
6314 38 : fputs ("\tjmp\t", asm_out_file);
6315 38 : assemble_name_raw (asm_out_file, indirectlabel1);
6316 38 : fputc ('\n', asm_out_file);
6317 :
6318 38 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6319 :
6320 : /* The above call insn pushed a word to stack. Adjust CFI info. */
6321 38 : if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6322 : {
6323 38 : if (! dwarf2out_do_cfi_asm ())
6324 : {
6325 0 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6326 0 : xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6327 0 : xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6328 0 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6329 : }
6330 38 : dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6331 38 : xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6332 38 : xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6333 38 : vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6334 38 : dwarf2out_emit_cfi (xcfi);
6335 : }
6336 :
6337 38 : if (regno != INVALID_REGNUM)
6338 : {
6339 : /* MOV. */
6340 27 : rtx xops[2];
6341 27 : xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6342 27 : xops[1] = gen_rtx_REG (word_mode, regno);
6343 27 : output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6344 : }
6345 : else
6346 : {
6347 : /* LEA. */
6348 11 : rtx xops[2];
6349 11 : xops[0] = stack_pointer_rtx;
6350 11 : xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6351 11 : output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6352 : }
6353 :
6354 38 : fputs ("\tret\n", asm_out_file);
6355 38 : if ((ix86_harden_sls & harden_sls_return))
6356 1 : fputs ("\tint3\n", asm_out_file);
6357 38 : }
6358 :
6359 : /* Output a funtion with a call and return thunk for indirect branch.
6360 : If REGNO != INVALID_REGNUM, the function address is in REGNO.
6361 : Otherwise, the function address is on the top of stack. Thunk is
6362 : used for function return if RET_P is true. */
6363 :
6364 : static void
6365 22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6366 : unsigned int regno, bool ret_p)
6367 : {
6368 22 : char name[32];
6369 22 : tree decl;
6370 :
6371 : /* Create __x86_indirect_thunk. */
6372 22 : indirect_thunk_name (name, regno, need_prefix, ret_p);
6373 22 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6374 : get_identifier (name),
6375 : build_function_type_list (void_type_node, NULL_TREE));
6376 22 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6377 : NULL_TREE, void_type_node);
6378 22 : TREE_PUBLIC (decl) = 1;
6379 22 : TREE_STATIC (decl) = 1;
6380 22 : DECL_IGNORED_P (decl) = 1;
6381 :
6382 : #if TARGET_MACHO
6383 : if (TARGET_MACHO)
6384 : {
6385 : switch_to_section (darwin_sections[picbase_thunk_section]);
6386 : fputs ("\t.weak_definition\t", asm_out_file);
6387 : assemble_name (asm_out_file, name);
6388 : fputs ("\n\t.private_extern\t", asm_out_file);
6389 : assemble_name (asm_out_file, name);
6390 : putc ('\n', asm_out_file);
6391 : ASM_OUTPUT_LABEL (asm_out_file, name);
6392 : DECL_WEAK (decl) = 1;
6393 : }
6394 : else
6395 : #endif
6396 22 : if (USE_HIDDEN_LINKONCE)
6397 : {
6398 22 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6399 :
6400 22 : targetm.asm_out.unique_section (decl, 0);
6401 22 : switch_to_section (get_named_section (decl, NULL, 0));
6402 :
6403 22 : targetm.asm_out.globalize_label (asm_out_file, name);
6404 22 : fputs ("\t.hidden\t", asm_out_file);
6405 22 : assemble_name (asm_out_file, name);
6406 22 : putc ('\n', asm_out_file);
6407 22 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6408 : }
6409 : else
6410 : {
6411 : switch_to_section (text_section);
6412 22 : ASM_OUTPUT_LABEL (asm_out_file, name);
6413 : }
6414 :
6415 22 : DECL_INITIAL (decl) = make_node (BLOCK);
6416 22 : current_function_decl = decl;
6417 22 : allocate_struct_function (decl, false);
6418 22 : init_function_start (decl);
6419 : /* We're about to hide the function body from callees of final_* by
6420 : emitting it directly; tell them we're a thunk, if they care. */
6421 22 : cfun->is_thunk = true;
6422 22 : first_function_block_is_cold = false;
6423 : /* Make sure unwind info is emitted for the thunk if needed. */
6424 22 : final_start_function (emit_barrier (), asm_out_file, 1);
6425 :
6426 22 : output_indirect_thunk (regno);
6427 :
6428 22 : final_end_function ();
6429 22 : init_insn_lengths ();
6430 22 : free_after_compilation (cfun);
6431 22 : set_cfun (NULL);
6432 22 : current_function_decl = NULL;
6433 22 : }
6434 :
6435 : static int pic_labels_used;
6436 :
6437 : /* Fills in the label name that should be used for a pc thunk for
6438 : the given register. */
6439 :
6440 : static void
6441 37452 : get_pc_thunk_name (char name[32], unsigned int regno)
6442 : {
6443 37452 : gcc_assert (!TARGET_64BIT);
6444 :
6445 37452 : if (USE_HIDDEN_LINKONCE)
6446 37452 : sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6447 : else
6448 37452 : ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6449 37452 : }
6450 :
6451 :
6452 : /* This function generates code for -fpic that loads %ebx with
6453 : the return address of the caller and then returns. */
6454 :
6455 : static void
6456 232630 : ix86_code_end (void)
6457 : {
6458 232630 : rtx xops[2];
6459 232630 : unsigned int regno;
6460 :
6461 232630 : if (indirect_return_needed)
6462 6 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6463 : INVALID_REGNUM, true);
6464 232630 : if (indirect_return_via_cx)
6465 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6466 : CX_REG, true);
6467 232630 : if (indirect_thunk_needed)
6468 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6469 : INVALID_REGNUM, false);
6470 :
6471 2093670 : for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6472 : {
6473 1861040 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6474 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6475 : regno, false);
6476 : }
6477 :
6478 3954710 : for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6479 : {
6480 3722080 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6481 0 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6482 : regno, false);
6483 : }
6484 :
6485 2093670 : for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6486 : {
6487 1861040 : char name[32];
6488 1861040 : tree decl;
6489 :
6490 1861040 : if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6491 16 : output_indirect_thunk_function (indirect_thunk_prefix_none,
6492 : regno, false);
6493 :
6494 1861040 : if (!(pic_labels_used & (1 << regno)))
6495 1857456 : continue;
6496 :
6497 3584 : get_pc_thunk_name (name, regno);
6498 :
6499 3584 : decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6500 : get_identifier (name),
6501 : build_function_type_list (void_type_node, NULL_TREE));
6502 3584 : DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6503 : NULL_TREE, void_type_node);
6504 3584 : TREE_PUBLIC (decl) = 1;
6505 3584 : TREE_STATIC (decl) = 1;
6506 3584 : DECL_IGNORED_P (decl) = 1;
6507 :
6508 : #if TARGET_MACHO
6509 : if (TARGET_MACHO)
6510 : {
6511 : switch_to_section (darwin_sections[picbase_thunk_section]);
6512 : fputs ("\t.weak_definition\t", asm_out_file);
6513 : assemble_name (asm_out_file, name);
6514 : fputs ("\n\t.private_extern\t", asm_out_file);
6515 : assemble_name (asm_out_file, name);
6516 : putc ('\n', asm_out_file);
6517 : ASM_OUTPUT_LABEL (asm_out_file, name);
6518 : DECL_WEAK (decl) = 1;
6519 : }
6520 : else
6521 : #endif
6522 3584 : if (USE_HIDDEN_LINKONCE)
6523 : {
6524 3584 : cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6525 :
6526 3584 : targetm.asm_out.unique_section (decl, 0);
6527 3584 : switch_to_section (get_named_section (decl, NULL, 0));
6528 :
6529 3584 : targetm.asm_out.globalize_label (asm_out_file, name);
6530 3584 : fputs ("\t.hidden\t", asm_out_file);
6531 3584 : assemble_name (asm_out_file, name);
6532 3584 : putc ('\n', asm_out_file);
6533 3584 : ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6534 : }
6535 : else
6536 : {
6537 : switch_to_section (text_section);
6538 3584 : ASM_OUTPUT_LABEL (asm_out_file, name);
6539 : }
6540 :
6541 3584 : DECL_INITIAL (decl) = make_node (BLOCK);
6542 3584 : current_function_decl = decl;
6543 3584 : allocate_struct_function (decl, false);
6544 3584 : init_function_start (decl);
6545 : /* We're about to hide the function body from callees of final_* by
6546 : emitting it directly; tell them we're a thunk, if they care. */
6547 3584 : cfun->is_thunk = true;
6548 3584 : first_function_block_is_cold = false;
6549 : /* Make sure unwind info is emitted for the thunk if needed. */
6550 3584 : final_start_function (emit_barrier (), asm_out_file, 1);
6551 :
6552 : /* Pad stack IP move with 4 instructions (two NOPs count
6553 : as one instruction). */
6554 3584 : if (TARGET_PAD_SHORT_FUNCTION)
6555 : {
6556 : int i = 8;
6557 :
6558 0 : while (i--)
6559 0 : fputs ("\tnop\n", asm_out_file);
6560 : }
6561 :
6562 7168 : xops[0] = gen_rtx_REG (Pmode, regno);
6563 7168 : xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6564 3584 : output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6565 3584 : fputs ("\tret\n", asm_out_file);
6566 3584 : final_end_function ();
6567 3584 : init_insn_lengths ();
6568 3584 : free_after_compilation (cfun);
6569 3584 : set_cfun (NULL);
6570 3584 : current_function_decl = NULL;
6571 : }
6572 :
6573 232630 : if (flag_split_stack)
6574 4710 : file_end_indicate_split_stack ();
6575 232630 : }
6576 :
6577 : /* Emit code for the SET_GOT patterns. */
6578 :
6579 : const char *
6580 33868 : output_set_got (rtx dest, rtx label)
6581 : {
6582 33868 : rtx xops[3];
6583 :
6584 33868 : xops[0] = dest;
6585 :
6586 33868 : if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
6587 : {
6588 : /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6589 : xops[2] = gen_rtx_MEM (Pmode,
6590 : gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6591 : output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6592 :
6593 : /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6594 : Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6595 : an unadorned address. */
6596 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6597 : SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6598 : output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6599 : return "";
6600 : }
6601 :
6602 67736 : xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6603 :
6604 33868 : if (flag_pic)
6605 : {
6606 33868 : char name[32];
6607 33868 : get_pc_thunk_name (name, REGNO (dest));
6608 33868 : pic_labels_used |= 1 << REGNO (dest);
6609 :
6610 67736 : xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6611 33868 : xops[2] = gen_rtx_MEM (QImode, xops[2]);
6612 33868 : output_asm_insn ("%!call\t%X2", xops);
6613 :
6614 : #if TARGET_MACHO
6615 : /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6616 : This is what will be referenced by the Mach-O PIC subsystem. */
6617 : if (machopic_should_output_picbase_label () || !label)
6618 : ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6619 :
6620 : /* When we are restoring the pic base at the site of a nonlocal label,
6621 : and we decided to emit the pic base above, we will still output a
6622 : local label used for calculating the correction offset (even though
6623 : the offset will be 0 in that case). */
6624 : if (label)
6625 : targetm.asm_out.internal_label (asm_out_file, "L",
6626 : CODE_LABEL_NUMBER (label));
6627 : #endif
6628 : }
6629 : else
6630 : {
6631 0 : if (TARGET_MACHO)
6632 : /* We don't need a pic base, we're not producing pic. */
6633 : gcc_unreachable ();
6634 :
6635 0 : xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6636 0 : output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6637 0 : targetm.asm_out.internal_label (asm_out_file, "L",
6638 0 : CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6639 : }
6640 :
6641 33868 : if (!TARGET_MACHO)
6642 33868 : output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6643 :
6644 33868 : return "";
6645 : }
6646 :
6647 : /* Generate an "push" pattern for input ARG. */
6648 :
6649 : rtx
6650 1885996 : gen_push (rtx arg, bool ppx_p)
6651 : {
6652 1885996 : struct machine_function *m = cfun->machine;
6653 :
6654 1885996 : if (m->fs.cfa_reg == stack_pointer_rtx)
6655 1610498 : m->fs.cfa_offset += UNITS_PER_WORD;
6656 1885996 : m->fs.sp_offset += UNITS_PER_WORD;
6657 :
6658 1885996 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6659 31 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6660 :
6661 1885996 : rtx stack = gen_rtx_MEM (word_mode,
6662 1885996 : gen_rtx_PRE_DEC (Pmode,
6663 : stack_pointer_rtx));
6664 3771904 : return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6665 : }
6666 :
6667 : rtx
6668 23 : gen_pushfl (void)
6669 : {
6670 23 : struct machine_function *m = cfun->machine;
6671 23 : rtx flags, mem;
6672 :
6673 23 : if (m->fs.cfa_reg == stack_pointer_rtx)
6674 0 : m->fs.cfa_offset += UNITS_PER_WORD;
6675 23 : m->fs.sp_offset += UNITS_PER_WORD;
6676 :
6677 23 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6678 :
6679 23 : mem = gen_rtx_MEM (word_mode,
6680 23 : gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6681 :
6682 23 : return gen_pushfl2 (word_mode, mem, flags);
6683 : }
6684 :
6685 : /* Generate an "pop" pattern for input ARG. */
6686 :
6687 : rtx
6688 1468989 : gen_pop (rtx arg, bool ppx_p)
6689 : {
6690 1468989 : if (REG_P (arg) && GET_MODE (arg) != word_mode)
6691 27 : arg = gen_rtx_REG (word_mode, REGNO (arg));
6692 :
6693 1468989 : rtx stack = gen_rtx_MEM (word_mode,
6694 1468989 : gen_rtx_POST_INC (Pmode,
6695 : stack_pointer_rtx));
6696 :
6697 2937890 : return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6698 : }
6699 :
6700 : rtx
6701 21 : gen_popfl (void)
6702 : {
6703 21 : rtx flags, mem;
6704 :
6705 21 : flags = gen_rtx_REG (CCmode, FLAGS_REG);
6706 :
6707 21 : mem = gen_rtx_MEM (word_mode,
6708 21 : gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6709 :
6710 21 : return gen_popfl1 (word_mode, flags, mem);
6711 : }
6712 :
6713 : /* Generate a "push2" pattern for input ARG. */
6714 : rtx
6715 19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6716 : {
6717 19 : struct machine_function *m = cfun->machine;
6718 19 : const int offset = UNITS_PER_WORD * 2;
6719 :
6720 19 : if (m->fs.cfa_reg == stack_pointer_rtx)
6721 14 : m->fs.cfa_offset += offset;
6722 19 : m->fs.sp_offset += offset;
6723 :
6724 19 : if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6725 0 : reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6726 :
6727 19 : if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6728 0 : reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6729 :
6730 19 : return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6731 4 : : gen_push2_di (mem, reg1, reg2);
6732 : }
6733 :
6734 : /* Return >= 0 if there is an unused call-clobbered register available
6735 : for the entire function. */
6736 :
6737 : static unsigned int
6738 0 : ix86_select_alt_pic_regnum (void)
6739 : {
6740 0 : if (ix86_use_pseudo_pic_reg ())
6741 : return INVALID_REGNUM;
6742 :
6743 0 : if (crtl->is_leaf
6744 0 : && !crtl->profile
6745 0 : && !ix86_current_function_calls_tls_descriptor)
6746 : {
6747 0 : int i, drap;
6748 : /* Can't use the same register for both PIC and DRAP. */
6749 0 : if (crtl->drap_reg)
6750 0 : drap = REGNO (crtl->drap_reg);
6751 : else
6752 : drap = -1;
6753 0 : for (i = 2; i >= 0; --i)
6754 0 : if (i != drap && !df_regs_ever_live_p (i))
6755 : return i;
6756 : }
6757 :
6758 : return INVALID_REGNUM;
6759 : }
6760 :
6761 : /* Return true if REGNO is used by the epilogue. */
6762 :
6763 : bool
6764 1665528998 : ix86_epilogue_uses (int regno)
6765 : {
6766 : /* If there are no caller-saved registers, we preserve all registers,
6767 : except for MMX and x87 registers which aren't supported when saving
6768 : and restoring registers. Don't explicitly save SP register since
6769 : it is always preserved. */
6770 1665528998 : return (epilogue_completed
6771 263492814 : && (cfun->machine->call_saved_registers
6772 263492814 : == TYPE_NO_CALLER_SAVED_REGISTERS)
6773 27140 : && !fixed_regs[regno]
6774 4857 : && !STACK_REGNO_P (regno)
6775 1665533855 : && !MMX_REGNO_P (regno));
6776 : }
6777 :
6778 : /* Return nonzero if register REGNO can be used as a scratch register
6779 : in peephole2. */
6780 :
6781 : static bool
6782 1224157 : ix86_hard_regno_scratch_ok (unsigned int regno)
6783 : {
6784 : /* If there are no caller-saved registers, we can't use any register
6785 : as a scratch register after epilogue and use REGNO as scratch
6786 : register only if it has been used before to avoid saving and
6787 : restoring it. */
6788 1224157 : return ((cfun->machine->call_saved_registers
6789 1224157 : != TYPE_NO_CALLER_SAVED_REGISTERS)
6790 1224157 : || (!epilogue_completed
6791 0 : && df_regs_ever_live_p (regno)));
6792 : }
6793 :
6794 : /* Return TRUE if we need to save REGNO. */
6795 :
6796 : bool
6797 353851150 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6798 : {
6799 353851150 : rtx reg;
6800 :
6801 : /* Save and restore DRAP register between prologue and epilogue so
6802 : that stack pointer can be restored. */
6803 353851150 : if (crtl->drap_reg
6804 2287026 : && regno == REGNO (crtl->drap_reg)
6805 353906813 : && !cfun->machine->no_drap_save_restore)
6806 : return true;
6807 :
6808 353795487 : switch (cfun->machine->call_saved_registers)
6809 : {
6810 : case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6811 : break;
6812 :
6813 57152 : case TYPE_NO_CALLER_SAVED_REGISTERS:
6814 : /* If there are no caller-saved registers, we preserve all
6815 : registers, except for MMX and x87 registers which aren't
6816 : supported when saving and restoring registers. Don't
6817 : explicitly save SP register since it is always preserved.
6818 :
6819 : Don't preserve registers used for function return value. */
6820 57152 : reg = crtl->return_rtx;
6821 57152 : if (reg)
6822 : {
6823 768 : unsigned int i = REGNO (reg);
6824 768 : unsigned int nregs = REG_NREGS (reg);
6825 1522 : while (nregs-- > 0)
6826 768 : if ((i + nregs) == regno)
6827 : return false;
6828 : }
6829 :
6830 57138 : return (df_regs_ever_live_p (regno)
6831 6930 : && !fixed_regs[regno]
6832 5962 : && !STACK_REGNO_P (regno)
6833 5962 : && !MMX_REGNO_P (regno)
6834 63100 : && (regno != HARD_FRAME_POINTER_REGNUM
6835 249 : || !frame_pointer_needed));
6836 :
6837 18192 : case TYPE_NO_CALLEE_SAVED_REGISTERS:
6838 18192 : case TYPE_PRESERVE_NONE:
6839 18192 : if (regno != HARD_FRAME_POINTER_REGNUM)
6840 : return false;
6841 : break;
6842 : }
6843 :
6844 387694417 : if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6845 10762684 : && pic_offset_table_rtx)
6846 : {
6847 385422 : if (ix86_use_pseudo_pic_reg ())
6848 : {
6849 : /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6850 : _mcount in prologue. */
6851 385422 : if (!TARGET_64BIT && flag_pic && crtl->profile)
6852 : return true;
6853 : }
6854 0 : else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6855 0 : || crtl->profile
6856 0 : || crtl->calls_eh_return
6857 0 : || crtl->uses_const_pool
6858 0 : || cfun->has_nonlocal_label)
6859 0 : return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6860 : }
6861 :
6862 353720712 : if (crtl->calls_eh_return && maybe_eh_return)
6863 : {
6864 : unsigned i;
6865 13237 : for (i = 0; ; i++)
6866 : {
6867 20181 : unsigned test = EH_RETURN_DATA_REGNO (i);
6868 13671 : if (test == INVALID_REGNUM)
6869 : break;
6870 13671 : if (test == regno)
6871 : return true;
6872 13237 : }
6873 : }
6874 :
6875 353720278 : if (ignore_outlined && cfun->machine->call_ms2sysv)
6876 : {
6877 2641728 : unsigned count = cfun->machine->call_ms2sysv_extra_regs
6878 : + xlogue_layout::MIN_REGS;
6879 2641728 : if (xlogue_layout::is_stub_managed_reg (regno, count))
6880 : return false;
6881 : }
6882 :
6883 353220409 : return (df_regs_ever_live_p (regno)
6884 372634325 : && !call_used_or_fixed_reg_p (regno)
6885 372003584 : && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6886 : }
6887 :
6888 : /* Return number of saved general prupose registers. */
6889 :
6890 : static int
6891 8168022 : ix86_nsaved_regs (void)
6892 : {
6893 8168022 : int nregs = 0;
6894 8168022 : int regno;
6895 :
6896 759626046 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6897 751458024 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6898 8221039 : nregs ++;
6899 8168022 : return nregs;
6900 : }
6901 :
6902 : /* Return number of saved SSE registers. */
6903 :
6904 : static int
6905 8202986 : ix86_nsaved_sseregs (void)
6906 : {
6907 8202986 : int nregs = 0;
6908 8202986 : int regno;
6909 :
6910 7400915 : if (!TARGET_64BIT_MS_ABI
6911 8202986 : && (cfun->machine->call_saved_registers
6912 7977385 : != TYPE_NO_CALLER_SAVED_REGISTERS))
6913 : return 0;
6914 21049434 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6915 20823096 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6916 1897045 : nregs ++;
6917 : return nregs;
6918 : }
6919 :
6920 : /* Given FROM and TO register numbers, say whether this elimination is
6921 : allowed. If stack alignment is needed, we can only replace argument
6922 : pointer with hard frame pointer, or replace frame pointer with stack
6923 : pointer. Otherwise, frame pointer elimination is automatically
6924 : handled and all other eliminations are valid. */
6925 :
6926 : static bool
6927 48372279 : ix86_can_eliminate (const int from, const int to)
6928 : {
6929 48372279 : if (stack_realign_fp)
6930 1704480 : return ((from == ARG_POINTER_REGNUM
6931 1704480 : && to == HARD_FRAME_POINTER_REGNUM)
6932 1704480 : || (from == FRAME_POINTER_REGNUM
6933 1704480 : && to == STACK_POINTER_REGNUM));
6934 : else
6935 86831734 : return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6936 : }
6937 :
6938 : /* Return the offset between two registers, one to be eliminated, and the other
6939 : its replacement, at the start of a routine. */
6940 :
6941 : HOST_WIDE_INT
6942 141280883 : ix86_initial_elimination_offset (int from, int to)
6943 : {
6944 141280883 : struct ix86_frame &frame = cfun->machine->frame;
6945 :
6946 141280883 : if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6947 10418894 : return frame.hard_frame_pointer_offset;
6948 130861989 : else if (from == FRAME_POINTER_REGNUM
6949 130861989 : && to == HARD_FRAME_POINTER_REGNUM)
6950 8132732 : return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6951 : else
6952 : {
6953 122729257 : gcc_assert (to == STACK_POINTER_REGNUM);
6954 :
6955 122729257 : if (from == ARG_POINTER_REGNUM)
6956 114596525 : return frame.stack_pointer_offset;
6957 :
6958 8132732 : gcc_assert (from == FRAME_POINTER_REGNUM);
6959 8132732 : return frame.stack_pointer_offset - frame.frame_pointer_offset;
6960 : }
6961 : }
6962 :
6963 : /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6964 : void
6965 0 : warn_once_call_ms2sysv_xlogues (const char *feature)
6966 : {
6967 0 : static bool warned_once = false;
6968 0 : if (!warned_once)
6969 : {
6970 0 : warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6971 : feature);
6972 0 : warned_once = true;
6973 : }
6974 0 : }
6975 :
6976 : /* Return the probing interval for -fstack-clash-protection. */
6977 :
6978 : static HOST_WIDE_INT
6979 494 : get_probe_interval (void)
6980 : {
6981 341 : if (flag_stack_clash_protection)
6982 412 : return (HOST_WIDE_INT_1U
6983 412 : << param_stack_clash_protection_probe_interval);
6984 : else
6985 : return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6986 : }
6987 :
6988 : /* When using -fsplit-stack, the allocation routines set a field in
6989 : the TCB to the bottom of the stack plus this much space, measured
6990 : in bytes. */
6991 :
6992 : #define SPLIT_STACK_AVAILABLE 256
6993 :
6994 : /* Return true if push2/pop2 can be generated. */
6995 :
6996 : static bool
6997 8168675 : ix86_can_use_push2pop2 (void)
6998 : {
6999 : /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
7000 8168675 : unsigned int incoming_stack_boundary
7001 8168675 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7002 8168675 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7003 8168675 : return incoming_stack_boundary % 128 == 0;
7004 : }
7005 :
7006 : /* Helper function to determine whether push2/pop2 can be used in prologue or
7007 : epilogue for register save/restore. */
7008 : static bool
7009 8168022 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
7010 : {
7011 8168022 : if (!ix86_can_use_push2pop2 ())
7012 : return false;
7013 8132111 : int aligned = cfun->machine->fs.sp_offset % 16 == 0;
7014 8132111 : return TARGET_APX_PUSH2POP2
7015 2839 : && !cfun->machine->frame.save_regs_using_mov
7016 2827 : && cfun->machine->func_type == TYPE_NORMAL
7017 8134930 : && (nregs + aligned) >= 3;
7018 : }
7019 :
7020 : /* Check if push/pop should be used to save/restore registers. */
7021 : static bool
7022 8893921 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
7023 : {
7024 3196071 : return ((!to_allocate && cfun->machine->frame.nregs <= 1)
7025 5946382 : || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7026 : /* If static stack checking is enabled and done with probes,
7027 : the registers need to be saved before allocating the frame. */
7028 5945721 : || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7029 : /* If stack clash probing needs a loop, then it needs a
7030 : scratch register. But the returned register is only guaranteed
7031 : to be safe to use after register saves are complete. So if
7032 : stack clash protections are enabled and the allocated frame is
7033 : larger than the probe interval, then use pushes to save
7034 : callee saved registers. */
7035 14839568 : || (flag_stack_clash_protection
7036 341 : && !ix86_target_stack_probe ()
7037 341 : && to_allocate > get_probe_interval ()));
7038 : }
7039 :
7040 : /* Fill structure ix86_frame about frame of currently computed function. */
7041 :
7042 : static void
7043 8168022 : ix86_compute_frame_layout (void)
7044 : {
7045 8168022 : struct ix86_frame *frame = &cfun->machine->frame;
7046 8168022 : struct machine_function *m = cfun->machine;
7047 8168022 : unsigned HOST_WIDE_INT stack_alignment_needed;
7048 8168022 : HOST_WIDE_INT offset;
7049 8168022 : unsigned HOST_WIDE_INT preferred_alignment;
7050 8168022 : HOST_WIDE_INT size = ix86_get_frame_size ();
7051 8168022 : HOST_WIDE_INT to_allocate;
7052 :
7053 : /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
7054 : * ms_abi functions that call a sysv function. We now need to prune away
7055 : * cases where it should be disabled. */
7056 8168022 : if (TARGET_64BIT && m->call_ms2sysv)
7057 : {
7058 35225 : gcc_assert (TARGET_64BIT_MS_ABI);
7059 35225 : gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
7060 35225 : gcc_assert (!TARGET_SEH);
7061 35225 : gcc_assert (TARGET_SSE);
7062 35225 : gcc_assert (!ix86_using_red_zone ());
7063 :
7064 35225 : if (crtl->calls_eh_return)
7065 : {
7066 0 : gcc_assert (!reload_completed);
7067 0 : m->call_ms2sysv = false;
7068 0 : warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
7069 : }
7070 :
7071 35225 : else if (ix86_static_chain_on_stack)
7072 : {
7073 0 : gcc_assert (!reload_completed);
7074 0 : m->call_ms2sysv = false;
7075 0 : warn_once_call_ms2sysv_xlogues ("static call chains");
7076 : }
7077 :
7078 : /* Finally, compute which registers the stub will manage. */
7079 : else
7080 : {
7081 35225 : unsigned count = xlogue_layout::count_stub_managed_regs ();
7082 35225 : m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
7083 35225 : m->call_ms2sysv_pad_in = 0;
7084 : }
7085 : }
7086 :
7087 8168022 : frame->nregs = ix86_nsaved_regs ();
7088 8168022 : frame->nsseregs = ix86_nsaved_sseregs ();
7089 :
7090 : /* 64-bit MS ABI seem to require stack alignment to be always 16,
7091 : except for function prologues, leaf functions and when the defult
7092 : incoming stack boundary is overriden at command line or via
7093 : force_align_arg_pointer attribute.
7094 :
7095 : Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
7096 : at call sites, including profile function calls.
7097 :
7098 : For APX push2/pop2, the stack also requires 128b alignment. */
7099 8168022 : if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
7100 67 : && crtl->preferred_stack_boundary < 128)
7101 8168087 : || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
7102 225599 : && crtl->preferred_stack_boundary < 128)
7103 0 : && (!crtl->is_leaf || cfun->calls_alloca != 0
7104 0 : || ix86_current_function_calls_tls_descriptor
7105 0 : || (TARGET_MACHO && crtl->profile)
7106 0 : || ix86_incoming_stack_boundary < 128)))
7107 : {
7108 2 : crtl->preferred_stack_boundary = 128;
7109 2 : if (crtl->stack_alignment_needed < 128)
7110 1 : crtl->stack_alignment_needed = 128;
7111 : }
7112 :
7113 8168022 : stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7114 8168022 : preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7115 :
7116 8168022 : gcc_assert (!size || stack_alignment_needed);
7117 8970064 : gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7118 8168022 : gcc_assert (preferred_alignment <= stack_alignment_needed);
7119 :
7120 : /* The only ABI saving SSE regs should be 64-bit ms_abi or with
7121 : no_caller_saved_registers attribue. */
7122 8168022 : gcc_assert (TARGET_64BIT
7123 : || (cfun->machine->call_saved_registers
7124 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7125 : || !frame->nsseregs);
7126 8168022 : if (TARGET_64BIT && m->call_ms2sysv)
7127 : {
7128 35225 : gcc_assert (stack_alignment_needed >= 16);
7129 35225 : gcc_assert ((cfun->machine->call_saved_registers
7130 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7131 : || !frame->nsseregs);
7132 : }
7133 :
7134 : /* For SEH we have to limit the amount of code movement into the prologue.
7135 : At present we do this via a BLOCKAGE, at which point there's very little
7136 : scheduling that can be done, which means that there's very little point
7137 : in doing anything except PUSHs. */
7138 8168022 : if (TARGET_SEH)
7139 : m->use_fast_prologue_epilogue = false;
7140 8168022 : else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7141 : {
7142 7832372 : int count = frame->nregs;
7143 7832372 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
7144 :
7145 : /* The fast prologue uses move instead of push to save registers. This
7146 : is significantly longer, but also executes faster as modern hardware
7147 : can execute the moves in parallel, but can't do that for push/pop.
7148 :
7149 : Be careful about choosing what prologue to emit: When function takes
7150 : many instructions to execute we may use slow version as well as in
7151 : case function is known to be outside hot spot (this is known with
7152 : feedback only). Weight the size of function by number of registers
7153 : to save as it is cheap to use one or two push instructions but very
7154 : slow to use many of them.
7155 :
7156 : Calling this hook multiple times with the same frame requirements
7157 : must produce the same layout, since the RA might otherwise be
7158 : unable to reach a fixed point or might fail its final sanity checks.
7159 : This means that once we've assumed that a function does or doesn't
7160 : have a particular size, we have to stick to that assumption
7161 : regardless of how the function has changed since. */
7162 7832372 : if (count)
7163 2612226 : count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7164 7832372 : if (node->frequency < NODE_FREQUENCY_NORMAL
7165 7137991 : || (flag_branch_probabilities
7166 1031 : && node->frequency < NODE_FREQUENCY_HOT))
7167 694691 : m->use_fast_prologue_epilogue = false;
7168 : else
7169 : {
7170 7137681 : if (count != frame->expensive_count)
7171 : {
7172 286452 : frame->expensive_count = count;
7173 286452 : frame->expensive_p = expensive_function_p (count);
7174 : }
7175 7137681 : m->use_fast_prologue_epilogue = !frame->expensive_p;
7176 : }
7177 : }
7178 :
7179 8168022 : frame->save_regs_using_mov
7180 8168022 : = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7181 :
7182 : /* Skip return address and error code in exception handler. */
7183 8168022 : offset = INCOMING_FRAME_SP_OFFSET;
7184 :
7185 : /* Skip pushed static chain. */
7186 8168022 : if (ix86_static_chain_on_stack)
7187 0 : offset += UNITS_PER_WORD;
7188 :
7189 : /* Skip saved base pointer. */
7190 8168022 : if (frame_pointer_needed)
7191 2769745 : offset += UNITS_PER_WORD;
7192 8168022 : frame->hfp_save_offset = offset;
7193 :
7194 : /* The traditional frame pointer location is at the top of the frame. */
7195 8168022 : frame->hard_frame_pointer_offset = offset;
7196 :
7197 : /* Register save area */
7198 8168022 : offset += frame->nregs * UNITS_PER_WORD;
7199 8168022 : frame->reg_save_offset = offset;
7200 :
7201 : /* Calculate the size of the va-arg area (not including padding, if any). */
7202 8168022 : frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7203 :
7204 : /* Also adjust stack_realign_offset for the largest alignment of
7205 : stack slot actually used. */
7206 8168022 : if (stack_realign_fp
7207 7861480 : || (cfun->machine->max_used_stack_alignment != 0
7208 138 : && (offset % cfun->machine->max_used_stack_alignment) != 0))
7209 : {
7210 : /* We may need a 16-byte aligned stack for the remainder of the
7211 : register save area, but the stack frame for the local function
7212 : may require a greater alignment if using AVX/2/512. In order
7213 : to avoid wasting space, we first calculate the space needed for
7214 : the rest of the register saves, add that to the stack pointer,
7215 : and then realign the stack to the boundary of the start of the
7216 : frame for the local function. */
7217 306611 : HOST_WIDE_INT space_needed = 0;
7218 306611 : HOST_WIDE_INT sse_reg_space_needed = 0;
7219 :
7220 306611 : if (TARGET_64BIT)
7221 : {
7222 304826 : if (m->call_ms2sysv)
7223 : {
7224 6415 : m->call_ms2sysv_pad_in = 0;
7225 6415 : space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7226 : }
7227 :
7228 298411 : else if (frame->nsseregs)
7229 : /* The only ABI that has saved SSE registers (Win64) also has a
7230 : 16-byte aligned default stack. However, many programs violate
7231 : the ABI, and Wine64 forces stack realignment to compensate. */
7232 6447 : space_needed = frame->nsseregs * 16;
7233 :
7234 304826 : sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7235 :
7236 : /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7237 : rounding to be pedantic. */
7238 304826 : space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7239 : }
7240 : else
7241 1785 : space_needed = frame->va_arg_size;
7242 :
7243 : /* Record the allocation size required prior to the realignment AND. */
7244 306611 : frame->stack_realign_allocate = space_needed;
7245 :
7246 : /* The re-aligned stack starts at frame->stack_realign_offset. Values
7247 : before this point are not directly comparable with values below
7248 : this point. Use sp_valid_at to determine if the stack pointer is
7249 : valid for a given offset, fp_valid_at for the frame pointer, or
7250 : choose_baseaddr to have a base register chosen for you.
7251 :
7252 : Note that the result of (frame->stack_realign_offset
7253 : & (stack_alignment_needed - 1)) may not equal zero. */
7254 306611 : offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7255 306611 : frame->stack_realign_offset = offset - space_needed;
7256 306611 : frame->sse_reg_save_offset = frame->stack_realign_offset
7257 306611 : + sse_reg_space_needed;
7258 306611 : }
7259 : else
7260 : {
7261 7861411 : frame->stack_realign_offset = offset;
7262 :
7263 7861411 : if (TARGET_64BIT && m->call_ms2sysv)
7264 : {
7265 28810 : m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7266 28810 : offset += xlogue_layout::get_instance ().get_stack_space_used ();
7267 : }
7268 :
7269 : /* Align and set SSE register save area. */
7270 7832601 : else if (frame->nsseregs)
7271 : {
7272 : /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7273 : required and the DRAP re-alignment boundary is at least 16 bytes,
7274 : then we want the SSE register save area properly aligned. */
7275 183247 : if (ix86_incoming_stack_boundary >= 128
7276 6400 : || (stack_realign_drap && stack_alignment_needed >= 16))
7277 183247 : offset = ROUND_UP (offset, 16);
7278 183247 : offset += frame->nsseregs * 16;
7279 : }
7280 7861411 : frame->sse_reg_save_offset = offset;
7281 7861411 : offset += frame->va_arg_size;
7282 : }
7283 :
7284 : /* Align start of frame for local function. When a function call
7285 : is removed, it may become a leaf function. But if argument may
7286 : be passed on stack, we need to align the stack when there is no
7287 : tail call. */
7288 8168022 : if (m->call_ms2sysv
7289 8132797 : || frame->va_arg_size != 0
7290 8053775 : || size != 0
7291 4389342 : || !crtl->is_leaf
7292 2048772 : || (!crtl->tail_call_emit
7293 1729163 : && cfun->machine->outgoing_args_on_stack)
7294 2048722 : || cfun->calls_alloca
7295 10215035 : || ix86_current_function_calls_tls_descriptor)
7296 6121421 : offset = ROUND_UP (offset, stack_alignment_needed);
7297 :
7298 : /* Frame pointer points here. */
7299 8168022 : frame->frame_pointer_offset = offset;
7300 :
7301 8168022 : offset += size;
7302 :
7303 : /* Add outgoing arguments area. Can be skipped if we eliminated
7304 : all the function calls as dead code.
7305 : Skipping is however impossible when function calls alloca. Alloca
7306 : expander assumes that last crtl->outgoing_args_size
7307 : of stack frame are unused. */
7308 8168022 : if (ACCUMULATE_OUTGOING_ARGS
7309 8786138 : && (!crtl->is_leaf || cfun->calls_alloca
7310 391912 : || ix86_current_function_calls_tls_descriptor))
7311 : {
7312 226204 : offset += crtl->outgoing_args_size;
7313 226204 : frame->outgoing_arguments_size = crtl->outgoing_args_size;
7314 : }
7315 : else
7316 7941818 : frame->outgoing_arguments_size = 0;
7317 :
7318 : /* Align stack boundary. Only needed if we're calling another function
7319 : or using alloca. */
7320 2760961 : if (!crtl->is_leaf || cfun->calls_alloca
7321 10925575 : || ix86_current_function_calls_tls_descriptor)
7322 5412265 : offset = ROUND_UP (offset, preferred_alignment);
7323 :
7324 : /* We've reached end of stack frame. */
7325 8168022 : frame->stack_pointer_offset = offset;
7326 :
7327 : /* Size prologue needs to allocate. */
7328 8168022 : to_allocate = offset - frame->sse_reg_save_offset;
7329 :
7330 8168022 : if (save_regs_using_push_pop (to_allocate))
7331 2574647 : frame->save_regs_using_mov = false;
7332 :
7333 8168022 : if (ix86_using_red_zone ()
7334 7140190 : && crtl->sp_is_unchanging
7335 6497358 : && crtl->is_leaf
7336 2661536 : && !cfun->machine->asm_redzone_clobber_seen
7337 2661523 : && !ix86_pc_thunk_call_expanded
7338 10829545 : && !ix86_current_function_calls_tls_descriptor)
7339 : {
7340 2661508 : frame->red_zone_size = to_allocate;
7341 2661508 : if (frame->save_regs_using_mov)
7342 139945 : frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7343 2661508 : if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7344 102371 : frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7345 : }
7346 : else
7347 5506514 : frame->red_zone_size = 0;
7348 8168022 : frame->stack_pointer_offset -= frame->red_zone_size;
7349 :
7350 : /* The SEH frame pointer location is near the bottom of the frame.
7351 : This is enforced by the fact that the difference between the
7352 : stack pointer and the frame pointer is limited to 240 bytes in
7353 : the unwind data structure. */
7354 8168022 : if (TARGET_SEH)
7355 : {
7356 : /* Force the frame pointer to point at or below the lowest register save
7357 : area, see the SEH code in config/i386/winnt.cc for the rationale. */
7358 : frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7359 :
7360 : /* If we can leave the frame pointer where it is, do so; however return
7361 : the establisher frame for __builtin_frame_address (0) or else if the
7362 : frame overflows the SEH maximum frame size.
7363 :
7364 : Note that the value returned by __builtin_frame_address (0) is quite
7365 : constrained, because setjmp is piggybacked on the SEH machinery with
7366 : recent versions of MinGW:
7367 :
7368 : # elif defined(__SEH__)
7369 : # if defined(__aarch64__) || defined(_ARM64_)
7370 : # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7371 : # elif (__MINGW_GCC_VERSION < 40702)
7372 : # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7373 : # else
7374 : # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7375 : # endif
7376 :
7377 : and the second argument passed to _setjmp, if not null, is forwarded
7378 : to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7379 : built an ExceptionRecord on the fly describing the setjmp buffer). */
7380 : const HOST_WIDE_INT diff
7381 : = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7382 : if (diff <= 255 && !crtl->accesses_prior_frames)
7383 : {
7384 : /* The resulting diff will be a multiple of 16 lower than 255,
7385 : i.e. at most 240 as required by the unwind data structure. */
7386 : frame->hard_frame_pointer_offset += (diff & 15);
7387 : }
7388 : else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7389 : {
7390 : /* Ideally we'd determine what portion of the local stack frame
7391 : (within the constraint of the lowest 240) is most heavily used.
7392 : But without that complication, simply bias the frame pointer
7393 : by 128 bytes so as to maximize the amount of the local stack
7394 : frame that is addressable with 8-bit offsets. */
7395 : frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7396 : }
7397 : else
7398 : frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7399 : }
7400 8168022 : }
7401 :
7402 : /* This is semi-inlined memory_address_length, but simplified
7403 : since we know that we're always dealing with reg+offset, and
7404 : to avoid having to create and discard all that rtl. */
7405 :
7406 : static inline int
7407 1011764 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7408 : {
7409 1011764 : int len = 4;
7410 :
7411 0 : if (offset == 0)
7412 : {
7413 : /* EBP and R13 cannot be encoded without an offset. */
7414 0 : len = (regno == BP_REG || regno == R13_REG);
7415 : }
7416 1003563 : else if (IN_RANGE (offset, -128, 127))
7417 635913 : len = 1;
7418 :
7419 : /* ESP and R12 must be encoded with a SIB byte. */
7420 0 : if (regno == SP_REG || regno == R12_REG)
7421 0 : len++;
7422 :
7423 1011764 : return len;
7424 : }
7425 :
7426 : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7427 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7428 :
7429 : static bool
7430 3496623 : sp_valid_at (HOST_WIDE_INT cfa_offset)
7431 : {
7432 3496623 : const struct machine_frame_state &fs = cfun->machine->fs;
7433 3496623 : if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7434 : {
7435 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7436 46356 : gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7437 : return false;
7438 : }
7439 3450267 : return fs.sp_valid;
7440 : }
7441 :
7442 : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7443 : the frame save area. The register is saved at CFA - CFA_OFFSET. */
7444 :
7445 : static inline bool
7446 1365359 : fp_valid_at (HOST_WIDE_INT cfa_offset)
7447 : {
7448 1365359 : const struct machine_frame_state &fs = cfun->machine->fs;
7449 1365359 : if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7450 : {
7451 : /* Validate that the cfa_offset isn't in a "no-man's land". */
7452 28328 : gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7453 : return false;
7454 : }
7455 1337031 : return fs.fp_valid;
7456 : }
7457 :
7458 : /* Choose a base register based upon alignment requested, speed and/or
7459 : size. */
7460 :
7461 : static void
7462 1365359 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7463 : HOST_WIDE_INT &base_offset,
7464 : unsigned int align_reqested, unsigned int *align)
7465 : {
7466 1365359 : const struct machine_function *m = cfun->machine;
7467 1365359 : unsigned int hfp_align;
7468 1365359 : unsigned int drap_align;
7469 1365359 : unsigned int sp_align;
7470 1365359 : bool hfp_ok = fp_valid_at (cfa_offset);
7471 1365359 : bool drap_ok = m->fs.drap_valid;
7472 1365359 : bool sp_ok = sp_valid_at (cfa_offset);
7473 :
7474 1365359 : hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7475 :
7476 : /* Filter out any registers that don't meet the requested alignment
7477 : criteria. */
7478 1365359 : if (align_reqested)
7479 : {
7480 967435 : if (m->fs.realigned)
7481 28160 : hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7482 : /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7483 : notes (which we would need to use a realigned stack pointer),
7484 : so disable on SEH targets. */
7485 939275 : else if (m->fs.sp_realigned)
7486 28328 : sp_align = crtl->stack_alignment_needed;
7487 :
7488 967435 : hfp_ok = hfp_ok && hfp_align >= align_reqested;
7489 967435 : drap_ok = drap_ok && drap_align >= align_reqested;
7490 967435 : sp_ok = sp_ok && sp_align >= align_reqested;
7491 : }
7492 :
7493 1365359 : if (m->use_fast_prologue_epilogue)
7494 : {
7495 : /* Choose the base register most likely to allow the most scheduling
7496 : opportunities. Generally FP is valid throughout the function,
7497 : while DRAP must be reloaded within the epilogue. But choose either
7498 : over the SP due to increased encoding size. */
7499 :
7500 648977 : if (hfp_ok)
7501 : {
7502 117842 : base_reg = hard_frame_pointer_rtx;
7503 117842 : base_offset = m->fs.fp_offset - cfa_offset;
7504 : }
7505 531135 : else if (drap_ok)
7506 : {
7507 0 : base_reg = crtl->drap_reg;
7508 0 : base_offset = 0 - cfa_offset;
7509 : }
7510 531135 : else if (sp_ok)
7511 : {
7512 531135 : base_reg = stack_pointer_rtx;
7513 531135 : base_offset = m->fs.sp_offset - cfa_offset;
7514 : }
7515 : }
7516 : else
7517 : {
7518 716382 : HOST_WIDE_INT toffset;
7519 716382 : int len = 16, tlen;
7520 :
7521 : /* Choose the base register with the smallest address encoding.
7522 : With a tie, choose FP > DRAP > SP. */
7523 716382 : if (sp_ok)
7524 : {
7525 699064 : base_reg = stack_pointer_rtx;
7526 699064 : base_offset = m->fs.sp_offset - cfa_offset;
7527 1389927 : len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7528 : }
7529 716382 : if (drap_ok)
7530 : {
7531 0 : toffset = 0 - cfa_offset;
7532 0 : tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7533 0 : if (tlen <= len)
7534 : {
7535 0 : base_reg = crtl->drap_reg;
7536 0 : base_offset = toffset;
7537 0 : len = tlen;
7538 : }
7539 : }
7540 716382 : if (hfp_ok)
7541 : {
7542 312700 : toffset = m->fs.fp_offset - cfa_offset;
7543 312700 : tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7544 312700 : if (tlen <= len)
7545 : {
7546 222030 : base_reg = hard_frame_pointer_rtx;
7547 222030 : base_offset = toffset;
7548 : }
7549 : }
7550 : }
7551 :
7552 : /* Set the align return value. */
7553 1365359 : if (align)
7554 : {
7555 967435 : if (base_reg == stack_pointer_rtx)
7556 685730 : *align = sp_align;
7557 281705 : else if (base_reg == crtl->drap_reg)
7558 0 : *align = drap_align;
7559 281705 : else if (base_reg == hard_frame_pointer_rtx)
7560 281705 : *align = hfp_align;
7561 : }
7562 1365359 : }
7563 :
7564 : /* Return an RTX that points to CFA_OFFSET within the stack frame and
7565 : the alignment of address. If ALIGN is non-null, it should point to
7566 : an alignment value (in bits) that is preferred or zero and will
7567 : recieve the alignment of the base register that was selected,
7568 : irrespective of rather or not CFA_OFFSET is a multiple of that
7569 : alignment value. If it is possible for the base register offset to be
7570 : non-immediate then SCRATCH_REGNO should specify a scratch register to
7571 : use.
7572 :
7573 : The valid base registers are taken from CFUN->MACHINE->FS. */
7574 :
7575 : static rtx
7576 1365359 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7577 : unsigned int scratch_regno = INVALID_REGNUM)
7578 : {
7579 1365359 : rtx base_reg = NULL;
7580 1365359 : HOST_WIDE_INT base_offset = 0;
7581 :
7582 : /* If a specific alignment is requested, try to get a base register
7583 : with that alignment first. */
7584 1365359 : if (align && *align)
7585 967435 : choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7586 :
7587 1365359 : if (!base_reg)
7588 397924 : choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7589 :
7590 1365359 : gcc_assert (base_reg != NULL);
7591 :
7592 1365359 : rtx base_offset_rtx = GEN_INT (base_offset);
7593 :
7594 1416751 : if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7595 : {
7596 1 : gcc_assert (scratch_regno != INVALID_REGNUM);
7597 :
7598 1 : rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7599 1 : emit_move_insn (scratch_reg, base_offset_rtx);
7600 :
7601 1 : return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7602 : }
7603 :
7604 1416750 : return plus_constant (Pmode, base_reg, base_offset);
7605 : }
7606 :
7607 : /* Emit code to save registers in the prologue. */
7608 :
7609 : static void
7610 429611 : ix86_emit_save_regs (void)
7611 : {
7612 429611 : int regno;
7613 429611 : rtx_insn *insn;
7614 429611 : bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7615 :
7616 429611 : if (!TARGET_APX_PUSH2POP2
7617 90 : || !ix86_can_use_push2pop2 ()
7618 429699 : || cfun->machine->func_type != TYPE_NORMAL)
7619 : {
7620 39945732 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7621 39516208 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7622 : {
7623 1202179 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7624 : use_ppx));
7625 1202179 : RTX_FRAME_RELATED_P (insn) = 1;
7626 : }
7627 : }
7628 : else
7629 : {
7630 87 : int regno_list[2];
7631 87 : regno_list[0] = regno_list[1] = -1;
7632 87 : int loaded_regnum = 0;
7633 87 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7634 :
7635 8091 : for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7636 8004 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7637 : {
7638 127 : if (aligned)
7639 : {
7640 45 : regno_list[loaded_regnum++] = regno;
7641 45 : if (loaded_regnum == 2)
7642 : {
7643 19 : gcc_assert (regno_list[0] != -1
7644 : && regno_list[1] != -1
7645 : && regno_list[0] != regno_list[1]);
7646 19 : const int offset = UNITS_PER_WORD * 2;
7647 19 : rtx mem = gen_rtx_MEM (TImode,
7648 19 : gen_rtx_PRE_DEC (Pmode,
7649 : stack_pointer_rtx));
7650 19 : insn = emit_insn (gen_push2 (mem,
7651 : gen_rtx_REG (word_mode,
7652 : regno_list[0]),
7653 : gen_rtx_REG (word_mode,
7654 : regno_list[1]),
7655 : use_ppx));
7656 19 : RTX_FRAME_RELATED_P (insn) = 1;
7657 19 : rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7658 :
7659 57 : for (int i = 0; i < 2; i++)
7660 : {
7661 76 : rtx dwarf_reg = gen_rtx_REG (word_mode,
7662 38 : regno_list[i]);
7663 38 : rtx sp_offset = plus_constant (Pmode,
7664 : stack_pointer_rtx,
7665 38 : + UNITS_PER_WORD
7666 38 : * (1 - i));
7667 38 : rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7668 : sp_offset),
7669 : dwarf_reg);
7670 38 : RTX_FRAME_RELATED_P (tmp) = 1;
7671 38 : XVECEXP (dwarf, 0, i + 1) = tmp;
7672 : }
7673 19 : rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7674 : plus_constant (Pmode,
7675 : stack_pointer_rtx,
7676 : -offset));
7677 19 : RTX_FRAME_RELATED_P (sp_tmp) = 1;
7678 19 : XVECEXP (dwarf, 0, 0) = sp_tmp;
7679 19 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7680 :
7681 19 : loaded_regnum = 0;
7682 19 : regno_list[0] = regno_list[1] = -1;
7683 : }
7684 : }
7685 : else
7686 : {
7687 82 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7688 : use_ppx));
7689 82 : RTX_FRAME_RELATED_P (insn) = 1;
7690 82 : aligned = true;
7691 : }
7692 : }
7693 87 : if (loaded_regnum == 1)
7694 : {
7695 7 : insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7696 7 : regno_list[0]),
7697 : use_ppx));
7698 7 : RTX_FRAME_RELATED_P (insn) = 1;
7699 : }
7700 : }
7701 429611 : }
7702 :
7703 : /* Emit a single register save at CFA - CFA_OFFSET. */
7704 :
7705 : static void
7706 613352 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7707 : HOST_WIDE_INT cfa_offset)
7708 : {
7709 613352 : struct machine_function *m = cfun->machine;
7710 613352 : rtx reg = gen_rtx_REG (mode, regno);
7711 613352 : rtx mem, addr, base, insn;
7712 613352 : unsigned int align = GET_MODE_ALIGNMENT (mode);
7713 :
7714 613352 : addr = choose_baseaddr (cfa_offset, &align);
7715 613352 : mem = gen_frame_mem (mode, addr);
7716 :
7717 : /* The location aligment depends upon the base register. */
7718 613352 : align = MIN (GET_MODE_ALIGNMENT (mode), align);
7719 613352 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7720 613352 : set_mem_align (mem, align);
7721 :
7722 613352 : insn = emit_insn (gen_rtx_SET (mem, reg));
7723 613352 : RTX_FRAME_RELATED_P (insn) = 1;
7724 :
7725 613352 : base = addr;
7726 613352 : if (GET_CODE (base) == PLUS)
7727 601467 : base = XEXP (base, 0);
7728 613352 : gcc_checking_assert (REG_P (base));
7729 :
7730 : /* When saving registers into a re-aligned local stack frame, avoid
7731 : any tricky guessing by dwarf2out. */
7732 613352 : if (m->fs.realigned)
7733 : {
7734 12800 : gcc_checking_assert (stack_realign_drap);
7735 :
7736 12800 : if (regno == REGNO (crtl->drap_reg))
7737 : {
7738 : /* A bit of a hack. We force the DRAP register to be saved in
7739 : the re-aligned stack frame, which provides us with a copy
7740 : of the CFA that will last past the prologue. Install it. */
7741 0 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7742 0 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7743 0 : cfun->machine->fs.fp_offset - cfa_offset);
7744 0 : mem = gen_rtx_MEM (mode, addr);
7745 0 : add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7746 : }
7747 : else
7748 : {
7749 : /* The frame pointer is a stable reference within the
7750 : aligned frame. Use it. */
7751 12800 : gcc_checking_assert (cfun->machine->fs.fp_valid);
7752 12800 : addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7753 12800 : cfun->machine->fs.fp_offset - cfa_offset);
7754 12800 : mem = gen_rtx_MEM (mode, addr);
7755 12800 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7756 : }
7757 : }
7758 :
7759 600552 : else if (base == stack_pointer_rtx && m->fs.sp_realigned
7760 12881 : && cfa_offset >= m->fs.sp_realigned_offset)
7761 : {
7762 12881 : gcc_checking_assert (stack_realign_fp);
7763 12881 : add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7764 : }
7765 :
7766 : /* The memory may not be relative to the current CFA register,
7767 : which means that we may need to generate a new pattern for
7768 : use by the unwind info. */
7769 587671 : else if (base != m->fs.cfa_reg)
7770 : {
7771 45097 : addr = plus_constant (Pmode, m->fs.cfa_reg,
7772 45097 : m->fs.cfa_offset - cfa_offset);
7773 45097 : mem = gen_rtx_MEM (mode, addr);
7774 45097 : add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7775 : }
7776 613352 : }
7777 :
7778 : /* Emit code to save registers using MOV insns.
7779 : First register is stored at CFA - CFA_OFFSET. */
7780 : static void
7781 44786 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7782 : {
7783 44786 : unsigned int regno;
7784 :
7785 4165098 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7786 4120312 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7787 : {
7788 : /* Skip registers, already processed by shrink wrap separate. */
7789 191076 : if (!cfun->machine->reg_is_wrapped_separately[regno])
7790 85173 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7791 205883 : cfa_offset -= UNITS_PER_WORD;
7792 : }
7793 44786 : }
7794 :
7795 : /* Emit code to save SSE registers using MOV insns.
7796 : First register is stored at CFA - CFA_OFFSET. */
7797 : static void
7798 33363 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7799 : {
7800 33363 : unsigned int regno;
7801 :
7802 3102759 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7803 3069396 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7804 : {
7805 333657 : ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7806 333657 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
7807 : }
7808 33363 : }
7809 :
7810 : static GTY(()) rtx queued_cfa_restores;
7811 :
7812 : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7813 : manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7814 : Don't add the note if the previously saved value will be left untouched
7815 : within stack red-zone till return, as unwinders can find the same value
7816 : in the register and on the stack. */
7817 :
7818 : static void
7819 2290280 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7820 : {
7821 2290280 : if (!crtl->shrink_wrapped
7822 2271276 : && cfa_offset <= cfun->machine->fs.red_zone_offset)
7823 : return;
7824 :
7825 771533 : if (insn)
7826 : {
7827 360729 : add_reg_note (insn, REG_CFA_RESTORE, reg);
7828 360729 : RTX_FRAME_RELATED_P (insn) = 1;
7829 : }
7830 : else
7831 410804 : queued_cfa_restores
7832 410804 : = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7833 : }
7834 :
7835 : /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7836 :
7837 : static void
7838 2550682 : ix86_add_queued_cfa_restore_notes (rtx insn)
7839 : {
7840 2550682 : rtx last;
7841 2550682 : if (!queued_cfa_restores)
7842 : return;
7843 410804 : for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7844 : ;
7845 53053 : XEXP (last, 1) = REG_NOTES (insn);
7846 53053 : REG_NOTES (insn) = queued_cfa_restores;
7847 53053 : queued_cfa_restores = NULL_RTX;
7848 53053 : RTX_FRAME_RELATED_P (insn) = 1;
7849 : }
7850 :
7851 : /* Expand prologue or epilogue stack adjustment.
7852 : The pattern exist to put a dependency on all ebp-based memory accesses.
7853 : STYLE should be negative if instructions should be marked as frame related,
7854 : zero if %r11 register is live and cannot be freely used and positive
7855 : otherwise. */
7856 :
7857 : static rtx
7858 1583422 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7859 : int style, bool set_cfa)
7860 : {
7861 1583422 : struct machine_function *m = cfun->machine;
7862 1583422 : rtx addend = offset;
7863 1583422 : rtx insn;
7864 1583422 : bool add_frame_related_expr = false;
7865 :
7866 1801950 : if (!x86_64_immediate_operand (offset, Pmode))
7867 : {
7868 : /* r11 is used by indirect sibcall return as well, set before the
7869 : epilogue and used after the epilogue. */
7870 199 : if (style)
7871 174 : addend = gen_rtx_REG (Pmode, R11_REG);
7872 : else
7873 : {
7874 25 : gcc_assert (src != hard_frame_pointer_rtx
7875 : && dest != hard_frame_pointer_rtx);
7876 : addend = hard_frame_pointer_rtx;
7877 : }
7878 199 : emit_insn (gen_rtx_SET (addend, offset));
7879 199 : if (style < 0)
7880 88 : add_frame_related_expr = true;
7881 : }
7882 :
7883 : /* Shrink wrap separate may insert prologue between TEST and JMP. In order
7884 : not to affect EFlags, emit add without reg clobbering. */
7885 1583422 : if (crtl->shrink_wrapped_separate)
7886 94670 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
7887 94670 : (Pmode, dest, src, addend));
7888 : else
7889 1488752 : insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7890 1488752 : (Pmode, dest, src, addend));
7891 :
7892 1583422 : if (style >= 0)
7893 698301 : ix86_add_queued_cfa_restore_notes (insn);
7894 :
7895 1583422 : if (set_cfa)
7896 : {
7897 1221797 : rtx r;
7898 :
7899 1221797 : gcc_assert (m->fs.cfa_reg == src);
7900 1221797 : m->fs.cfa_offset += INTVAL (offset);
7901 1221797 : m->fs.cfa_reg = dest;
7902 :
7903 1417957 : r = gen_rtx_PLUS (Pmode, src, offset);
7904 1221797 : r = gen_rtx_SET (dest, r);
7905 1221797 : add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7906 1221797 : RTX_FRAME_RELATED_P (insn) = 1;
7907 : }
7908 361625 : else if (style < 0)
7909 : {
7910 295048 : RTX_FRAME_RELATED_P (insn) = 1;
7911 295048 : if (add_frame_related_expr)
7912 : {
7913 20 : rtx r = gen_rtx_PLUS (Pmode, src, offset);
7914 20 : r = gen_rtx_SET (dest, r);
7915 20 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7916 : }
7917 : }
7918 :
7919 1583422 : if (dest == stack_pointer_rtx)
7920 : {
7921 1583422 : HOST_WIDE_INT ooffset = m->fs.sp_offset;
7922 1583422 : bool valid = m->fs.sp_valid;
7923 1583422 : bool realigned = m->fs.sp_realigned;
7924 :
7925 1583422 : if (src == hard_frame_pointer_rtx)
7926 : {
7927 29749 : valid = m->fs.fp_valid;
7928 29749 : realigned = false;
7929 29749 : ooffset = m->fs.fp_offset;
7930 : }
7931 1553673 : else if (src == crtl->drap_reg)
7932 : {
7933 0 : valid = m->fs.drap_valid;
7934 0 : realigned = false;
7935 0 : ooffset = 0;
7936 : }
7937 : else
7938 : {
7939 : /* Else there are two possibilities: SP itself, which we set
7940 : up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7941 : taken care of this by hand along the eh_return path. */
7942 1553673 : gcc_checking_assert (src == stack_pointer_rtx
7943 : || offset == const0_rtx);
7944 : }
7945 :
7946 1583422 : m->fs.sp_offset = ooffset - INTVAL (offset);
7947 1583422 : m->fs.sp_valid = valid;
7948 1583422 : m->fs.sp_realigned = realigned;
7949 : }
7950 1583422 : return insn;
7951 : }
7952 :
7953 : /* Find an available register to be used as dynamic realign argument
7954 : pointer regsiter. Such a register will be written in prologue and
7955 : used in begin of body, so it must not be
7956 : 1. parameter passing register.
7957 : 2. GOT pointer.
7958 : We reuse static-chain register if it is available. Otherwise, we
7959 : use DI for i386 and R13 for x86-64. We chose R13 since it has
7960 : shorter encoding.
7961 :
7962 : Return: the regno of chosen register. */
7963 :
7964 : static unsigned int
7965 7295 : find_drap_reg (void)
7966 : {
7967 7295 : tree decl = cfun->decl;
7968 :
7969 : /* Always use callee-saved register if there are no caller-saved
7970 : registers. */
7971 7295 : if (TARGET_64BIT)
7972 : {
7973 : /* In preserve_none functions, any register can be used for DRAP,
7974 : except AX, R12–R15, DI, SI (argument registers), SP, and BP. */
7975 7010 : if (cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
7976 : return R11_REG;
7977 :
7978 : /* Use R13 for nested function or function need static chain.
7979 : Since function with tail call may use any caller-saved
7980 : registers in epilogue, DRAP must not use caller-saved
7981 : register in such case. */
7982 7009 : if (DECL_STATIC_CHAIN (decl)
7983 6967 : || (cfun->machine->call_saved_registers
7984 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7985 13976 : || crtl->tail_call_emit)
7986 191 : return R13_REG;
7987 :
7988 : return R10_REG;
7989 : }
7990 : else
7991 : {
7992 : /* Use DI for nested function or function need static chain.
7993 : Since function with tail call may use any caller-saved
7994 : registers in epilogue, DRAP must not use caller-saved
7995 : register in such case. */
7996 285 : if (DECL_STATIC_CHAIN (decl)
7997 285 : || (cfun->machine->call_saved_registers
7998 285 : == TYPE_NO_CALLER_SAVED_REGISTERS)
7999 285 : || crtl->tail_call_emit
8000 550 : || crtl->calls_eh_return)
8001 : return DI_REG;
8002 :
8003 : /* Reuse static chain register if it isn't used for parameter
8004 : passing. */
8005 265 : if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
8006 : {
8007 265 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
8008 265 : if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
8009 : return CX_REG;
8010 : }
8011 0 : return DI_REG;
8012 : }
8013 : }
8014 :
8015 : /* Return minimum incoming stack alignment. */
8016 :
8017 : static unsigned int
8018 1615608 : ix86_minimum_incoming_stack_boundary (bool sibcall)
8019 : {
8020 1615608 : unsigned int incoming_stack_boundary;
8021 :
8022 : /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
8023 1615608 : if (cfun->machine->func_type != TYPE_NORMAL)
8024 120 : incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
8025 : /* Prefer the one specified at command line. */
8026 1615488 : else if (ix86_user_incoming_stack_boundary)
8027 : incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8028 : /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8029 : if -mstackrealign is used, it isn't used for sibcall check and
8030 : estimated stack alignment is 128bit. */
8031 1615466 : else if (!sibcall
8032 1482124 : && ix86_force_align_arg_pointer
8033 4572 : && crtl->stack_alignment_estimated == 128)
8034 596 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8035 : else
8036 1614870 : incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8037 :
8038 : /* Incoming stack alignment can be changed on individual functions
8039 : via force_align_arg_pointer attribute. We use the smallest
8040 : incoming stack boundary. */
8041 1615608 : if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8042 3230610 : && lookup_attribute ("force_align_arg_pointer",
8043 1615002 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8044 5708 : incoming_stack_boundary = MIN_STACK_BOUNDARY;
8045 :
8046 : /* The incoming stack frame has to be aligned at least at
8047 : parm_stack_boundary. */
8048 1615608 : if (incoming_stack_boundary < crtl->parm_stack_boundary)
8049 : incoming_stack_boundary = crtl->parm_stack_boundary;
8050 :
8051 : /* Stack at entrance of main is aligned by runtime. We use the
8052 : smallest incoming stack boundary. */
8053 1615608 : if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8054 140777 : && DECL_NAME (current_function_decl)
8055 140777 : && MAIN_NAME_P (DECL_NAME (current_function_decl))
8056 1618082 : && DECL_FILE_SCOPE_P (current_function_decl))
8057 2474 : incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8058 :
8059 1615608 : return incoming_stack_boundary;
8060 : }
8061 :
8062 : /* Update incoming stack boundary and estimated stack alignment. */
8063 :
8064 : static void
8065 1482261 : ix86_update_stack_boundary (void)
8066 : {
8067 1482261 : ix86_incoming_stack_boundary
8068 1482261 : = ix86_minimum_incoming_stack_boundary (false);
8069 :
8070 : /* x86_64 vararg needs 16byte stack alignment for register save area. */
8071 1482261 : if (TARGET_64BIT
8072 1355726 : && cfun->stdarg
8073 21336 : && crtl->stack_alignment_estimated < 128)
8074 10152 : crtl->stack_alignment_estimated = 128;
8075 :
8076 : /* __tls_get_addr needs to be called with 16-byte aligned stack. */
8077 1482261 : if (ix86_tls_descriptor_calls_expanded_in_cfun
8078 1073 : && crtl->preferred_stack_boundary < 128)
8079 745 : crtl->preferred_stack_boundary = 128;
8080 :
8081 : /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
8082 : are 32 bits, but if force_align_arg_pointer is specified, it should
8083 : prefer 128 bits for a backward-compatibility reason, which is also
8084 : what the doc suggests. */
8085 1482261 : if (lookup_attribute ("force_align_arg_pointer",
8086 1482261 : TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
8087 1482261 : && crtl->preferred_stack_boundary < 128)
8088 4 : crtl->preferred_stack_boundary = 128;
8089 1482261 : }
8090 :
8091 : /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8092 : needed or an rtx for DRAP otherwise. */
8093 :
8094 : static rtx
8095 1586119 : ix86_get_drap_rtx (void)
8096 : {
8097 : /* We must use DRAP if there are outgoing arguments on stack or
8098 : the stack pointer register is clobbered by asm statement and
8099 : ACCUMULATE_OUTGOING_ARGS is false. */
8100 1586119 : if (ix86_force_drap
8101 1586119 : || ((cfun->machine->outgoing_args_on_stack
8102 1254088 : || crtl->sp_is_clobbered_by_asm)
8103 330086 : && !ACCUMULATE_OUTGOING_ARGS))
8104 309891 : crtl->need_drap = true;
8105 :
8106 1586119 : if (stack_realign_drap)
8107 : {
8108 : /* Assign DRAP to vDRAP and returns vDRAP */
8109 7295 : unsigned int regno = find_drap_reg ();
8110 7295 : rtx drap_vreg;
8111 7295 : rtx arg_ptr;
8112 7295 : rtx_insn *seq, *insn;
8113 :
8114 7580 : arg_ptr = gen_rtx_REG (Pmode, regno);
8115 7295 : crtl->drap_reg = arg_ptr;
8116 :
8117 7295 : start_sequence ();
8118 7295 : drap_vreg = copy_to_reg (arg_ptr);
8119 7295 : seq = end_sequence ();
8120 :
8121 7295 : insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8122 7295 : if (!optimize)
8123 : {
8124 1896 : add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8125 1896 : RTX_FRAME_RELATED_P (insn) = 1;
8126 : }
8127 7295 : return drap_vreg;
8128 : }
8129 : else
8130 : return NULL;
8131 : }
8132 :
8133 : /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8134 :
8135 : static rtx
8136 1482262 : ix86_internal_arg_pointer (void)
8137 : {
8138 1482262 : return virtual_incoming_args_rtx;
8139 : }
8140 :
8141 : struct scratch_reg {
8142 : rtx reg;
8143 : bool saved;
8144 : };
8145 :
8146 : /* Return a short-lived scratch register for use on function entry.
8147 : In 32-bit mode, it is valid only after the registers are saved
8148 : in the prologue. This register must be released by means of
8149 : release_scratch_register_on_entry once it is dead. */
8150 :
8151 : static void
8152 25 : get_scratch_register_on_entry (struct scratch_reg *sr)
8153 : {
8154 25 : int regno;
8155 :
8156 25 : sr->saved = false;
8157 :
8158 25 : if (TARGET_64BIT)
8159 : {
8160 : /* We always use R11 in 64-bit mode. */
8161 : regno = R11_REG;
8162 : }
8163 : else
8164 : {
8165 0 : tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8166 0 : bool fastcall_p
8167 0 : = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8168 0 : bool thiscall_p
8169 0 : = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8170 0 : bool static_chain_p = DECL_STATIC_CHAIN (decl);
8171 0 : int regparm = ix86_function_regparm (fntype, decl);
8172 0 : int drap_regno
8173 0 : = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8174 :
8175 : /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8176 : for the static chain register. */
8177 0 : if ((regparm < 1 || (fastcall_p && !static_chain_p))
8178 0 : && drap_regno != AX_REG)
8179 : regno = AX_REG;
8180 : /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8181 : for the static chain register. */
8182 0 : else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8183 : regno = AX_REG;
8184 0 : else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8185 : regno = DX_REG;
8186 : /* ecx is the static chain register. */
8187 0 : else if (regparm < 3 && !fastcall_p && !thiscall_p
8188 0 : && !static_chain_p
8189 0 : && drap_regno != CX_REG)
8190 : regno = CX_REG;
8191 0 : else if (ix86_save_reg (BX_REG, true, false))
8192 : regno = BX_REG;
8193 : /* esi is the static chain register. */
8194 0 : else if (!(regparm == 3 && static_chain_p)
8195 0 : && ix86_save_reg (SI_REG, true, false))
8196 : regno = SI_REG;
8197 0 : else if (ix86_save_reg (DI_REG, true, false))
8198 : regno = DI_REG;
8199 : else
8200 : {
8201 0 : regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8202 0 : sr->saved = true;
8203 : }
8204 : }
8205 :
8206 25 : sr->reg = gen_rtx_REG (Pmode, regno);
8207 25 : if (sr->saved)
8208 : {
8209 0 : rtx_insn *insn = emit_insn (gen_push (sr->reg));
8210 0 : RTX_FRAME_RELATED_P (insn) = 1;
8211 : }
8212 25 : }
8213 :
8214 : /* Release a scratch register obtained from the preceding function.
8215 :
8216 : If RELEASE_VIA_POP is true, we just pop the register off the stack
8217 : to release it. This is what non-Linux systems use with -fstack-check.
8218 :
8219 : Otherwise we use OFFSET to locate the saved register and the
8220 : allocated stack space becomes part of the local frame and is
8221 : deallocated by the epilogue. */
8222 :
8223 : static void
8224 25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8225 : bool release_via_pop)
8226 : {
8227 25 : if (sr->saved)
8228 : {
8229 0 : if (release_via_pop)
8230 : {
8231 0 : struct machine_function *m = cfun->machine;
8232 0 : rtx x, insn = emit_insn (gen_pop (sr->reg));
8233 :
8234 : /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8235 0 : RTX_FRAME_RELATED_P (insn) = 1;
8236 0 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8237 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
8238 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8239 0 : m->fs.sp_offset -= UNITS_PER_WORD;
8240 : }
8241 : else
8242 : {
8243 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8244 0 : x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8245 0 : emit_insn (x);
8246 : }
8247 : }
8248 25 : }
8249 :
8250 : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8251 :
8252 : If INT_REGISTERS_SAVED is true, then integer registers have already been
8253 : pushed on the stack.
8254 :
8255 : If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8256 : beyond SIZE bytes.
8257 :
8258 : This assumes no knowledge of the current probing state, i.e. it is never
8259 : allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8260 : a suitable probe. */
8261 :
8262 : static void
8263 126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8264 : const bool int_registers_saved,
8265 : const bool protection_area)
8266 : {
8267 126 : struct machine_function *m = cfun->machine;
8268 :
8269 : /* If this function does not statically allocate stack space, then
8270 : no probes are needed. */
8271 126 : if (!size)
8272 : {
8273 : /* However, the allocation of space via pushes for register
8274 : saves could be viewed as allocating space, but without the
8275 : need to probe. */
8276 43 : if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8277 23 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8278 : else
8279 20 : dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8280 43 : return;
8281 : }
8282 :
8283 : /* If we are a noreturn function, then we have to consider the
8284 : possibility that we're called via a jump rather than a call.
8285 :
8286 : Thus we don't have the implicit probe generated by saving the
8287 : return address into the stack at the call. Thus, the stack
8288 : pointer could be anywhere in the guard page. The safe thing
8289 : to do is emit a probe now.
8290 :
8291 : The probe can be avoided if we have already emitted any callee
8292 : register saves into the stack or have a frame pointer (which will
8293 : have been saved as well). Those saves will function as implicit
8294 : probes.
8295 :
8296 : ?!? This should be revamped to work like aarch64 and s390 where
8297 : we track the offset from the most recent probe. Normally that
8298 : offset would be zero. For a noreturn function we would reset
8299 : it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8300 : we just probe when we cross PROBE_INTERVAL. */
8301 83 : if (TREE_THIS_VOLATILE (cfun->decl)
8302 15 : && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8303 : {
8304 : /* We can safely use any register here since we're just going to push
8305 : its value and immediately pop it back. But we do try and avoid
8306 : argument passing registers so as not to introduce dependencies in
8307 : the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8308 15 : rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8309 15 : rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8310 15 : rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8311 15 : m->fs.sp_offset -= UNITS_PER_WORD;
8312 15 : if (m->fs.cfa_reg == stack_pointer_rtx)
8313 : {
8314 15 : m->fs.cfa_offset -= UNITS_PER_WORD;
8315 15 : rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8316 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8317 15 : add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8318 15 : RTX_FRAME_RELATED_P (insn_push) = 1;
8319 15 : x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8320 15 : x = gen_rtx_SET (stack_pointer_rtx, x);
8321 15 : add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8322 15 : RTX_FRAME_RELATED_P (insn_pop) = 1;
8323 : }
8324 15 : emit_insn (gen_blockage ());
8325 : }
8326 :
8327 83 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8328 83 : const int dope = 4 * UNITS_PER_WORD;
8329 :
8330 : /* If there is protection area, take it into account in the size. */
8331 83 : if (protection_area)
8332 24 : size += probe_interval + dope;
8333 :
8334 : /* If we allocate less than the size of the guard statically,
8335 : then no probing is necessary, but we do need to allocate
8336 : the stack. */
8337 59 : else if (size < (1 << param_stack_clash_protection_guard_size))
8338 : {
8339 38 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8340 : GEN_INT (-size), -1,
8341 38 : m->fs.cfa_reg == stack_pointer_rtx);
8342 38 : dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8343 38 : return;
8344 : }
8345 :
8346 : /* We're allocating a large enough stack frame that we need to
8347 : emit probes. Either emit them inline or in a loop depending
8348 : on the size. */
8349 45 : if (size <= 4 * probe_interval)
8350 : {
8351 : HOST_WIDE_INT i;
8352 47 : for (i = probe_interval; i <= size; i += probe_interval)
8353 : {
8354 : /* Allocate PROBE_INTERVAL bytes. */
8355 27 : rtx insn
8356 27 : = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8357 : GEN_INT (-probe_interval), -1,
8358 27 : m->fs.cfa_reg == stack_pointer_rtx);
8359 27 : add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8360 :
8361 : /* And probe at *sp. */
8362 27 : emit_stack_probe (stack_pointer_rtx);
8363 27 : emit_insn (gen_blockage ());
8364 : }
8365 :
8366 : /* We need to allocate space for the residual, but we do not need
8367 : to probe the residual... */
8368 20 : HOST_WIDE_INT residual = (i - probe_interval - size);
8369 20 : if (residual)
8370 : {
8371 20 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8372 : GEN_INT (residual), -1,
8373 20 : m->fs.cfa_reg == stack_pointer_rtx);
8374 :
8375 : /* ...except if there is a protection area to maintain. */
8376 20 : if (protection_area)
8377 11 : emit_stack_probe (stack_pointer_rtx);
8378 : }
8379 :
8380 20 : dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8381 : }
8382 : else
8383 : {
8384 : /* We expect the GP registers to be saved when probes are used
8385 : as the probing sequences might need a scratch register and
8386 : the routine to allocate one assumes the integer registers
8387 : have already been saved. */
8388 25 : gcc_assert (int_registers_saved);
8389 :
8390 25 : struct scratch_reg sr;
8391 25 : get_scratch_register_on_entry (&sr);
8392 :
8393 : /* If we needed to save a register, then account for any space
8394 : that was pushed (we are not going to pop the register when
8395 : we do the restore). */
8396 25 : if (sr.saved)
8397 0 : size -= UNITS_PER_WORD;
8398 :
8399 : /* Step 1: round SIZE down to a multiple of the interval. */
8400 25 : HOST_WIDE_INT rounded_size = size & -probe_interval;
8401 :
8402 : /* Step 2: compute final value of the loop counter. Use lea if
8403 : possible. */
8404 25 : rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8405 25 : rtx insn;
8406 25 : if (address_no_seg_operand (addr, Pmode))
8407 13 : insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8408 : else
8409 : {
8410 12 : emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8411 12 : insn = emit_insn (gen_rtx_SET (sr.reg,
8412 : gen_rtx_PLUS (Pmode, sr.reg,
8413 : stack_pointer_rtx)));
8414 : }
8415 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8416 : {
8417 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8418 22 : plus_constant (Pmode, sr.reg,
8419 22 : m->fs.cfa_offset + rounded_size));
8420 22 : RTX_FRAME_RELATED_P (insn) = 1;
8421 : }
8422 :
8423 : /* Step 3: the loop. */
8424 25 : rtx size_rtx = GEN_INT (rounded_size);
8425 25 : insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8426 : size_rtx));
8427 25 : if (m->fs.cfa_reg == stack_pointer_rtx)
8428 : {
8429 22 : m->fs.cfa_offset += rounded_size;
8430 22 : add_reg_note (insn, REG_CFA_DEF_CFA,
8431 22 : plus_constant (Pmode, stack_pointer_rtx,
8432 22 : m->fs.cfa_offset));
8433 22 : RTX_FRAME_RELATED_P (insn) = 1;
8434 : }
8435 25 : m->fs.sp_offset += rounded_size;
8436 25 : emit_insn (gen_blockage ());
8437 :
8438 : /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8439 : is equal to ROUNDED_SIZE. */
8440 :
8441 25 : if (size != rounded_size)
8442 : {
8443 25 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8444 : GEN_INT (rounded_size - size), -1,
8445 25 : m->fs.cfa_reg == stack_pointer_rtx);
8446 :
8447 25 : if (protection_area)
8448 13 : emit_stack_probe (stack_pointer_rtx);
8449 : }
8450 :
8451 25 : dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8452 :
8453 : /* This does not deallocate the space reserved for the scratch
8454 : register. That will be deallocated in the epilogue. */
8455 25 : release_scratch_register_on_entry (&sr, size, false);
8456 : }
8457 :
8458 : /* Adjust back to account for the protection area. */
8459 45 : if (protection_area)
8460 24 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8461 24 : GEN_INT (probe_interval + dope), -1,
8462 24 : m->fs.cfa_reg == stack_pointer_rtx);
8463 :
8464 : /* Make sure nothing is scheduled before we are done. */
8465 45 : emit_insn (gen_blockage ());
8466 : }
8467 :
8468 : /* Adjust the stack pointer up to REG while probing it. */
8469 :
8470 : const char *
8471 25 : output_adjust_stack_and_probe (rtx reg)
8472 : {
8473 25 : static int labelno = 0;
8474 25 : char loop_lab[32];
8475 25 : rtx xops[2];
8476 :
8477 25 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8478 :
8479 : /* Loop. */
8480 25 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8481 :
8482 : /* SP = SP + PROBE_INTERVAL. */
8483 25 : xops[0] = stack_pointer_rtx;
8484 37 : xops[1] = GEN_INT (get_probe_interval ());
8485 25 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8486 :
8487 : /* Probe at SP. */
8488 25 : xops[1] = const0_rtx;
8489 25 : output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
8490 :
8491 : /* Test if SP == LAST_ADDR. */
8492 25 : xops[0] = stack_pointer_rtx;
8493 25 : xops[1] = reg;
8494 25 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8495 :
8496 : /* Branch. */
8497 25 : fputs ("\tjne\t", asm_out_file);
8498 25 : assemble_name_raw (asm_out_file, loop_lab);
8499 25 : fputc ('\n', asm_out_file);
8500 :
8501 25 : return "";
8502 : }
8503 :
8504 : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8505 : inclusive. These are offsets from the current stack pointer.
8506 :
8507 : INT_REGISTERS_SAVED is true if integer registers have already been
8508 : pushed on the stack. */
8509 :
8510 : static void
8511 0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8512 : const bool int_registers_saved)
8513 : {
8514 0 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
8515 :
8516 : /* See if we have a constant small number of probes to generate. If so,
8517 : that's the easy case. The run-time loop is made up of 6 insns in the
8518 : generic case while the compile-time loop is made up of n insns for n #
8519 : of intervals. */
8520 0 : if (size <= 6 * probe_interval)
8521 : {
8522 : HOST_WIDE_INT i;
8523 :
8524 : /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8525 : it exceeds SIZE. If only one probe is needed, this will not
8526 : generate any code. Then probe at FIRST + SIZE. */
8527 0 : for (i = probe_interval; i < size; i += probe_interval)
8528 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8529 0 : -(first + i)));
8530 :
8531 0 : emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8532 0 : -(first + size)));
8533 : }
8534 :
8535 : /* Otherwise, do the same as above, but in a loop. Note that we must be
8536 : extra careful with variables wrapping around because we might be at
8537 : the very top (or the very bottom) of the address space and we have
8538 : to be able to handle this case properly; in particular, we use an
8539 : equality test for the loop condition. */
8540 : else
8541 : {
8542 : /* We expect the GP registers to be saved when probes are used
8543 : as the probing sequences might need a scratch register and
8544 : the routine to allocate one assumes the integer registers
8545 : have already been saved. */
8546 0 : gcc_assert (int_registers_saved);
8547 :
8548 0 : HOST_WIDE_INT rounded_size, last;
8549 0 : struct scratch_reg sr;
8550 :
8551 0 : get_scratch_register_on_entry (&sr);
8552 :
8553 :
8554 : /* Step 1: round SIZE to the previous multiple of the interval. */
8555 :
8556 0 : rounded_size = ROUND_DOWN (size, probe_interval);
8557 :
8558 :
8559 : /* Step 2: compute initial and final value of the loop counter. */
8560 :
8561 : /* TEST_OFFSET = FIRST. */
8562 0 : emit_move_insn (sr.reg, GEN_INT (-first));
8563 :
8564 : /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8565 0 : last = first + rounded_size;
8566 :
8567 :
8568 : /* Step 3: the loop
8569 :
8570 : do
8571 : {
8572 : TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8573 : probe at TEST_ADDR
8574 : }
8575 : while (TEST_ADDR != LAST_ADDR)
8576 :
8577 : probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8578 : until it is equal to ROUNDED_SIZE. */
8579 :
8580 0 : emit_insn
8581 0 : (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8582 :
8583 :
8584 : /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8585 : that SIZE is equal to ROUNDED_SIZE. */
8586 :
8587 0 : if (size != rounded_size)
8588 0 : emit_stack_probe (plus_constant (Pmode,
8589 0 : gen_rtx_PLUS (Pmode,
8590 : stack_pointer_rtx,
8591 : sr.reg),
8592 0 : rounded_size - size));
8593 :
8594 0 : release_scratch_register_on_entry (&sr, size, true);
8595 : }
8596 :
8597 : /* Make sure nothing is scheduled before we are done. */
8598 0 : emit_insn (gen_blockage ());
8599 0 : }
8600 :
8601 : /* Probe a range of stack addresses from REG to END, inclusive. These are
8602 : offsets from the current stack pointer. */
8603 :
8604 : const char *
8605 0 : output_probe_stack_range (rtx reg, rtx end)
8606 : {
8607 0 : static int labelno = 0;
8608 0 : char loop_lab[32];
8609 0 : rtx xops[3];
8610 :
8611 0 : ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8612 :
8613 : /* Loop. */
8614 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8615 :
8616 : /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8617 0 : xops[0] = reg;
8618 0 : xops[1] = GEN_INT (get_probe_interval ());
8619 0 : output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8620 :
8621 : /* Probe at TEST_ADDR. */
8622 0 : xops[0] = stack_pointer_rtx;
8623 0 : xops[1] = reg;
8624 0 : xops[2] = const0_rtx;
8625 0 : output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
8626 :
8627 : /* Test if TEST_ADDR == LAST_ADDR. */
8628 0 : xops[0] = reg;
8629 0 : xops[1] = end;
8630 0 : output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8631 :
8632 : /* Branch. */
8633 0 : fputs ("\tjne\t", asm_out_file);
8634 0 : assemble_name_raw (asm_out_file, loop_lab);
8635 0 : fputc ('\n', asm_out_file);
8636 :
8637 0 : return "";
8638 : }
8639 :
8640 : /* Data passed to ix86_update_stack_alignment. */
8641 : struct stack_access_data
8642 : {
8643 : /* The stack access register. */
8644 : const_rtx reg;
8645 : /* Pointer to stack alignment. */
8646 : unsigned int *stack_alignment;
8647 : };
8648 :
8649 : /* Return true if OP references an argument passed on stack. */
8650 :
8651 : static bool
8652 135312 : ix86_argument_passed_on_stack_p (const_rtx op)
8653 : {
8654 135312 : tree mem_expr = MEM_EXPR (op);
8655 135312 : if (mem_expr)
8656 : {
8657 133440 : tree var = get_base_address (mem_expr);
8658 133440 : return TREE_CODE (var) == PARM_DECL;
8659 : }
8660 : return false;
8661 : }
8662 :
8663 : /* Update the maximum stack slot alignment from memory alignment in PAT. */
8664 :
8665 : static void
8666 168807 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8667 : {
8668 : /* This insn may reference stack slot. Update the maximum stack slot
8669 : alignment if the memory is referenced by the stack access register. */
8670 168807 : stack_access_data *p = (stack_access_data *) data;
8671 :
8672 168807 : subrtx_iterator::array_type array;
8673 705888 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
8674 : {
8675 565743 : auto op = *iter;
8676 565743 : if (MEM_P (op))
8677 : {
8678 : /* NB: Ignore arguments passed on stack since caller is
8679 : responsible to align the outgoing stack for arguments
8680 : passed on stack. */
8681 165565 : if (reg_mentioned_p (p->reg, XEXP (op, 0))
8682 165565 : && !ix86_argument_passed_on_stack_p (op))
8683 : {
8684 28662 : unsigned int alignment = MEM_ALIGN (op);
8685 :
8686 28662 : if (alignment > *p->stack_alignment)
8687 28581 : *p->stack_alignment = alignment;
8688 : break;
8689 : }
8690 : else
8691 136903 : iter.skip_subrtxes ();
8692 : }
8693 : }
8694 168807 : }
8695 :
8696 : /* Helper function for ix86_find_all_reg_uses. */
8697 :
8698 : static void
8699 45391300 : ix86_find_all_reg_uses_1 (HARD_REG_SET ®set,
8700 : rtx set, unsigned int regno,
8701 : auto_bitmap &worklist)
8702 : {
8703 45391300 : rtx dest = SET_DEST (set);
8704 :
8705 45391300 : if (!REG_P (dest))
8706 41119175 : return;
8707 :
8708 : /* Reject non-Pmode modes. */
8709 34368672 : if (GET_MODE (dest) != Pmode)
8710 : return;
8711 :
8712 18201057 : unsigned int dst_regno = REGNO (dest);
8713 :
8714 18201057 : if (TEST_HARD_REG_BIT (regset, dst_regno))
8715 : return;
8716 :
8717 4272125 : const_rtx src = SET_SRC (set);
8718 :
8719 4272125 : subrtx_iterator::array_type array;
8720 8494657 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8721 : {
8722 5494699 : auto op = *iter;
8723 :
8724 5494699 : if (MEM_P (op))
8725 2979477 : iter.skip_subrtxes ();
8726 :
8727 5494699 : if (REG_P (op) && REGNO (op) == regno)
8728 : {
8729 : /* Add this register to register set. */
8730 1440603 : add_to_hard_reg_set (®set, Pmode, dst_regno);
8731 1272167 : bitmap_set_bit (worklist, dst_regno);
8732 1272167 : break;
8733 : }
8734 : }
8735 4272125 : }
8736 :
8737 : /* Find all registers defined with register REGNO. */
8738 :
8739 : static void
8740 2291425 : ix86_find_all_reg_uses (HARD_REG_SET ®set,
8741 : unsigned int regno, auto_bitmap &worklist)
8742 : {
8743 2291425 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8744 81573571 : ref != NULL;
8745 79282146 : ref = DF_REF_NEXT_REG (ref))
8746 : {
8747 79282146 : if (DF_REF_IS_ARTIFICIAL (ref))
8748 16556898 : continue;
8749 :
8750 62725248 : rtx_insn *insn = DF_REF_INSN (ref);
8751 :
8752 62725248 : if (!NONJUMP_INSN_P (insn))
8753 17992908 : continue;
8754 :
8755 44732340 : unsigned int ref_regno = DF_REF_REGNO (ref);
8756 :
8757 44732340 : rtx set = single_set (insn);
8758 44732340 : if (set)
8759 : {
8760 43958863 : ix86_find_all_reg_uses_1 (regset, set,
8761 : ref_regno, worklist);
8762 43958863 : continue;
8763 : }
8764 :
8765 773477 : rtx pat = PATTERN (insn);
8766 773477 : if (GET_CODE (pat) != PARALLEL)
8767 124306 : continue;
8768 :
8769 2505771 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8770 : {
8771 1856600 : rtx exp = XVECEXP (pat, 0, i);
8772 :
8773 1856600 : if (GET_CODE (exp) == SET)
8774 1432437 : ix86_find_all_reg_uses_1 (regset, exp,
8775 : ref_regno, worklist);
8776 : }
8777 : }
8778 2291425 : }
8779 :
8780 : /* Return true if the hard register REGNO used for a stack access is
8781 : defined in a basic block that dominates the block where it is used. */
8782 :
8783 : static bool
8784 39627 : ix86_access_stack_p (unsigned int regno, basic_block bb,
8785 : HARD_REG_SET &set_up_by_prologue,
8786 : HARD_REG_SET &prologue_used,
8787 : auto_bitmap reg_dominate_bbs_known[],
8788 : auto_bitmap reg_dominate_bbs[])
8789 : {
8790 39627 : if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
8791 10575 : return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
8792 :
8793 29052 : bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
8794 :
8795 : /* Get all BBs which set REGNO and dominate the current BB from all
8796 : DEFs of REGNO. */
8797 29052 : for (df_ref def = DF_REG_DEF_CHAIN (regno);
8798 1550310 : def;
8799 1521258 : def = DF_REF_NEXT_REG (def))
8800 1548762 : if (!DF_REF_IS_ARTIFICIAL (def)
8801 1546902 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
8802 1520044 : && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
8803 : {
8804 1518171 : basic_block set_bb = DF_REF_BB (def);
8805 1518171 : if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
8806 : {
8807 85618 : rtx_insn *insn = DF_REF_INSN (def);
8808 : /* Return true if INSN requires stack. */
8809 85618 : if (requires_stack_frame_p (insn, prologue_used,
8810 : set_up_by_prologue))
8811 : {
8812 27504 : bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
8813 27504 : return true;
8814 : }
8815 : }
8816 : }
8817 :
8818 : /* When we get here, REGNO used in the current BB doesn't access
8819 : stack. */
8820 : return false;
8821 : }
8822 :
8823 : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
8824 : needs alignment > ALIGNMENT. */
8825 :
8826 : static bool
8827 27715514 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
8828 : {
8829 27715514 : bool need_alignment = MEM_ALIGN (op) > alignment;
8830 27715514 : tree mem_expr = MEM_EXPR (op);
8831 27715514 : if (!mem_expr)
8832 : return need_alignment;
8833 :
8834 22685841 : tree var = get_base_address (mem_expr);
8835 22685841 : if (!VAR_P (var) || !DECL_RTL_SET_P (var))
8836 : return need_alignment;
8837 :
8838 14375495 : rtx x = DECL_RTL (var);
8839 14375495 : if (!MEM_P (x))
8840 : return need_alignment;
8841 :
8842 14375492 : x = XEXP (x, 0);
8843 14375492 : return !SYMBOLIC_CONST (x) && need_alignment;
8844 : }
8845 :
8846 : /* Return true if SET needs alignment > ALIGNMENT. */
8847 :
8848 : static bool
8849 45390590 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
8850 : {
8851 45390590 : rtx dest = SET_DEST (set);
8852 :
8853 45390590 : if (MEM_P (dest))
8854 17168816 : return ix86_need_alignment_p_2 (dest, alignment);
8855 :
8856 28221774 : const_rtx src = SET_SRC (set);
8857 :
8858 28221774 : subrtx_iterator::array_type array;
8859 81708055 : FOR_EACH_SUBRTX (iter, array, src, ALL)
8860 : {
8861 64032979 : auto op = *iter;
8862 :
8863 64032979 : if (MEM_P (op))
8864 10546698 : return ix86_need_alignment_p_2 (op, alignment);
8865 : }
8866 :
8867 17675076 : return false;
8868 28221774 : }
8869 :
8870 : /* Return true if INSN needs alignment > ALIGNMENT. */
8871 :
8872 : static bool
8873 44732340 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
8874 : {
8875 44732340 : rtx set = single_set (insn);
8876 44732340 : if (set)
8877 43958863 : return ix86_need_alignment_p_1 (set, alignment);
8878 :
8879 773477 : rtx pat = PATTERN (insn);
8880 773477 : if (GET_CODE (pat) != PARALLEL)
8881 : return false;
8882 :
8883 2504072 : for (int i = 0; i < XVECLEN (pat, 0); i++)
8884 : {
8885 1855694 : rtx exp = XVECEXP (pat, 0, i);
8886 :
8887 1855694 : if (GET_CODE (exp) == SET
8888 1855694 : && ix86_need_alignment_p_1 (exp, alignment))
8889 : return true;
8890 : }
8891 :
8892 : return false;
8893 : }
8894 :
8895 : /* Set stack_frame_required to false if stack frame isn't required.
8896 : Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8897 : slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8898 :
8899 : static void
8900 1481410 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8901 : bool check_stack_slot)
8902 : {
8903 1481410 : HARD_REG_SET set_up_by_prologue, prologue_used;
8904 1481410 : basic_block bb;
8905 :
8906 5925640 : CLEAR_HARD_REG_SET (prologue_used);
8907 1481410 : CLEAR_HARD_REG_SET (set_up_by_prologue);
8908 1608055 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8909 1481410 : add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8910 1481410 : add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8911 : HARD_FRAME_POINTER_REGNUM);
8912 :
8913 1481410 : bool require_stack_frame = false;
8914 :
8915 15826711 : FOR_EACH_BB_FN (bb, cfun)
8916 : {
8917 14345301 : rtx_insn *insn;
8918 90173956 : FOR_BB_INSNS (bb, insn)
8919 83678730 : if (NONDEBUG_INSN_P (insn)
8920 83678730 : && requires_stack_frame_p (insn, prologue_used,
8921 : set_up_by_prologue))
8922 : {
8923 : require_stack_frame = true;
8924 : break;
8925 : }
8926 : }
8927 :
8928 1481410 : cfun->machine->stack_frame_required = require_stack_frame;
8929 :
8930 : /* Stop if we don't need to check stack slot. */
8931 1481410 : if (!check_stack_slot)
8932 787900 : return;
8933 :
8934 : /* The preferred stack alignment is the minimum stack alignment. */
8935 693510 : if (stack_alignment > crtl->preferred_stack_boundary)
8936 142938 : stack_alignment = crtl->preferred_stack_boundary;
8937 :
8938 : HARD_REG_SET stack_slot_access;
8939 693510 : CLEAR_HARD_REG_SET (stack_slot_access);
8940 :
8941 : /* Stack slot can be accessed by stack pointer, frame pointer or
8942 : registers defined by stack pointer or frame pointer. */
8943 693510 : auto_bitmap worklist;
8944 :
8945 753052 : add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8946 693510 : bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8947 :
8948 693510 : if (frame_pointer_needed)
8949 : {
8950 334773 : add_to_hard_reg_set (&stack_slot_access, Pmode,
8951 : HARD_FRAME_POINTER_REGNUM);
8952 325748 : bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8953 : }
8954 :
8955 : /* Registers on HARD_STACK_SLOT_ACCESS always access stack. */
8956 693510 : HARD_REG_SET hard_stack_slot_access = stack_slot_access;
8957 :
8958 693510 : calculate_dominance_info (CDI_DOMINATORS);
8959 :
8960 2291425 : unsigned int regno;
8961 :
8962 2291425 : do
8963 : {
8964 2291425 : regno = bitmap_clear_first_set_bit (worklist);
8965 2291425 : ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
8966 : }
8967 2291425 : while (!bitmap_empty_p (worklist));
8968 :
8969 : hard_reg_set_iterator hrsi;
8970 : stack_access_data data;
8971 :
8972 128299350 : auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
8973 128299350 : auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
8974 :
8975 693510 : data.stack_alignment = &stack_alignment;
8976 :
8977 2984935 : EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8978 : {
8979 2291425 : for (df_ref ref = DF_REG_USE_CHAIN (regno);
8980 81573571 : ref != NULL;
8981 79282146 : ref = DF_REF_NEXT_REG (ref))
8982 : {
8983 79282146 : if (DF_REF_IS_ARTIFICIAL (ref))
8984 16556898 : continue;
8985 :
8986 62725248 : rtx_insn *insn = DF_REF_INSN (ref);
8987 :
8988 62725248 : if (!NONJUMP_INSN_P (insn))
8989 17992908 : continue;
8990 :
8991 : /* Call ix86_access_stack_p only if INSN needs alignment >
8992 : STACK_ALIGNMENT. */
8993 44732340 : if (ix86_need_alignment_p (insn, stack_alignment)
8994 44732340 : && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
8995 39627 : || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
8996 : set_up_by_prologue,
8997 : prologue_used,
8998 : reg_dominate_bbs_known,
8999 : reg_dominate_bbs)))
9000 : {
9001 : /* Update stack alignment if REGNO is used for stack
9002 : access. */
9003 162255 : data.reg = DF_REF_REG (ref);
9004 162255 : note_stores (insn, ix86_update_stack_alignment, &data);
9005 : }
9006 : }
9007 : }
9008 :
9009 693510 : free_dominance_info (CDI_DOMINATORS);
9010 129686370 : }
9011 :
9012 : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
9013 : will guide prologue/epilogue to be generated in correct form. */
9014 :
9015 : static void
9016 3436762 : ix86_finalize_stack_frame_flags (void)
9017 : {
9018 : /* Check if stack realign is really needed after reload, and
9019 : stores result in cfun */
9020 3436762 : unsigned int incoming_stack_boundary
9021 3436762 : = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9022 3436762 : ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9023 3436762 : unsigned int stack_alignment
9024 1179863 : = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
9025 4616625 : ? crtl->max_used_stack_slot_alignment
9026 3436762 : : crtl->stack_alignment_needed);
9027 3436762 : unsigned int stack_realign
9028 3436762 : = (incoming_stack_boundary < stack_alignment);
9029 3436762 : bool recompute_frame_layout_p = false;
9030 :
9031 3436762 : if (crtl->stack_realign_finalized)
9032 : {
9033 : /* After stack_realign_needed is finalized, we can't no longer
9034 : change it. */
9035 1955352 : gcc_assert (crtl->stack_realign_needed == stack_realign);
9036 1955352 : return;
9037 : }
9038 :
9039 : /* It is always safe to compute max_used_stack_alignment. We
9040 : compute it only if 128-bit aligned load/store may be generated
9041 : on misaligned stack slot which will lead to segfault. */
9042 2962820 : bool check_stack_slot
9043 1481410 : = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
9044 1481410 : ix86_find_max_used_stack_alignment (stack_alignment,
9045 : check_stack_slot);
9046 :
9047 : /* If the only reason for frame_pointer_needed is that we conservatively
9048 : assumed stack realignment might be needed or -fno-omit-frame-pointer
9049 : is used, but in the end nothing that needed the stack alignment had
9050 : been spilled nor stack access, clear frame_pointer_needed and say we
9051 : don't need stack realignment.
9052 :
9053 : When vector register is used for piecewise move and store, we don't
9054 : increase stack_alignment_needed as there is no register spill for
9055 : piecewise move and store. Since stack_realign_needed is set to true
9056 : by checking stack_alignment_estimated which is updated by pseudo
9057 : vector register usage, we also need to check stack_realign_needed to
9058 : eliminate frame pointer. */
9059 1481410 : if ((stack_realign
9060 1415168 : || (!flag_omit_frame_pointer && optimize)
9061 1404918 : || crtl->stack_realign_needed)
9062 77151 : && frame_pointer_needed
9063 77151 : && crtl->is_leaf
9064 52686 : && crtl->sp_is_unchanging
9065 52634 : && !ix86_current_function_calls_tls_descriptor
9066 52634 : && !crtl->accesses_prior_frames
9067 52634 : && !cfun->calls_alloca
9068 52634 : && !crtl->calls_eh_return
9069 : /* See ira_setup_eliminable_regset for the rationale. */
9070 52634 : && !(STACK_CHECK_MOVING_SP
9071 52634 : && flag_stack_check
9072 0 : && flag_exceptions
9073 0 : && cfun->can_throw_non_call_exceptions)
9074 52634 : && !ix86_frame_pointer_required ()
9075 52633 : && ix86_get_frame_size () == 0
9076 34964 : && ix86_nsaved_sseregs () == 0
9077 1516374 : && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9078 : {
9079 34964 : if (cfun->machine->stack_frame_required)
9080 : {
9081 : /* Stack frame is required. If stack alignment needed is less
9082 : than incoming stack boundary, don't realign stack. */
9083 287 : stack_realign = incoming_stack_boundary < stack_alignment;
9084 287 : if (!stack_realign)
9085 : {
9086 287 : crtl->max_used_stack_slot_alignment
9087 287 : = incoming_stack_boundary;
9088 287 : crtl->stack_alignment_needed
9089 287 : = incoming_stack_boundary;
9090 : /* Also update preferred_stack_boundary for leaf
9091 : functions. */
9092 287 : crtl->preferred_stack_boundary
9093 287 : = incoming_stack_boundary;
9094 : }
9095 : }
9096 : else
9097 : {
9098 : /* If drap has been set, but it actually isn't live at the
9099 : start of the function, there is no reason to set it up. */
9100 34677 : if (crtl->drap_reg)
9101 : {
9102 35 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9103 70 : if (! REGNO_REG_SET_P (DF_LR_IN (bb),
9104 : REGNO (crtl->drap_reg)))
9105 : {
9106 35 : crtl->drap_reg = NULL_RTX;
9107 35 : crtl->need_drap = false;
9108 : }
9109 : }
9110 : else
9111 34642 : cfun->machine->no_drap_save_restore = true;
9112 :
9113 34677 : frame_pointer_needed = false;
9114 34677 : stack_realign = false;
9115 34677 : crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9116 34677 : crtl->stack_alignment_needed = incoming_stack_boundary;
9117 34677 : crtl->stack_alignment_estimated = incoming_stack_boundary;
9118 34677 : if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9119 1 : crtl->preferred_stack_boundary = incoming_stack_boundary;
9120 34677 : df_finish_pass (true);
9121 34677 : df_scan_alloc (NULL);
9122 34677 : df_scan_blocks ();
9123 34677 : df_compute_regs_ever_live (true);
9124 34677 : df_analyze ();
9125 :
9126 34677 : if (flag_var_tracking)
9127 : {
9128 : /* Since frame pointer is no longer available, replace it with
9129 : stack pointer - UNITS_PER_WORD in debug insns. */
9130 134 : df_ref ref, next;
9131 134 : for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
9132 134 : ref; ref = next)
9133 : {
9134 0 : next = DF_REF_NEXT_REG (ref);
9135 0 : if (!DF_REF_INSN_INFO (ref))
9136 0 : continue;
9137 :
9138 : /* Make sure the next ref is for a different instruction,
9139 : so that we're not affected by the rescan. */
9140 0 : rtx_insn *insn = DF_REF_INSN (ref);
9141 0 : while (next && DF_REF_INSN (next) == insn)
9142 0 : next = DF_REF_NEXT_REG (next);
9143 :
9144 0 : if (DEBUG_INSN_P (insn))
9145 : {
9146 : bool changed = false;
9147 0 : for (; ref != next; ref = DF_REF_NEXT_REG (ref))
9148 : {
9149 0 : rtx *loc = DF_REF_LOC (ref);
9150 0 : if (*loc == hard_frame_pointer_rtx)
9151 : {
9152 0 : *loc = plus_constant (Pmode,
9153 : stack_pointer_rtx,
9154 0 : -UNITS_PER_WORD);
9155 0 : changed = true;
9156 : }
9157 : }
9158 0 : if (changed)
9159 0 : df_insn_rescan (insn);
9160 : }
9161 : }
9162 : }
9163 :
9164 : recompute_frame_layout_p = true;
9165 : }
9166 : }
9167 1446446 : else if (crtl->max_used_stack_slot_alignment >= 128
9168 657522 : && cfun->machine->stack_frame_required)
9169 : {
9170 : /* We don't need to realign stack. max_used_stack_alignment is
9171 : used to decide how stack frame should be aligned. This is
9172 : independent of any psABIs nor 32-bit vs 64-bit. */
9173 612452 : cfun->machine->max_used_stack_alignment
9174 612452 : = stack_alignment / BITS_PER_UNIT;
9175 : }
9176 :
9177 1481410 : if (crtl->stack_realign_needed != stack_realign)
9178 35197 : recompute_frame_layout_p = true;
9179 1481410 : crtl->stack_realign_needed = stack_realign;
9180 1481410 : crtl->stack_realign_finalized = true;
9181 1481410 : if (recompute_frame_layout_p)
9182 35290 : ix86_compute_frame_layout ();
9183 : }
9184 :
9185 : /* Delete SET_GOT right after entry block if it is allocated to reg. */
9186 :
9187 : static void
9188 0 : ix86_elim_entry_set_got (rtx reg)
9189 : {
9190 0 : basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
9191 0 : rtx_insn *c_insn = BB_HEAD (bb);
9192 0 : if (!NONDEBUG_INSN_P (c_insn))
9193 0 : c_insn = next_nonnote_nondebug_insn (c_insn);
9194 0 : if (c_insn && NONJUMP_INSN_P (c_insn))
9195 : {
9196 0 : rtx pat = PATTERN (c_insn);
9197 0 : if (GET_CODE (pat) == PARALLEL)
9198 : {
9199 0 : rtx set = XVECEXP (pat, 0, 0);
9200 0 : if (GET_CODE (set) == SET
9201 0 : && GET_CODE (SET_SRC (set)) == UNSPEC
9202 0 : && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
9203 0 : && REGNO (SET_DEST (set)) == REGNO (reg))
9204 0 : delete_insn (c_insn);
9205 : }
9206 : }
9207 0 : }
9208 :
9209 : static rtx
9210 193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
9211 : {
9212 193166 : rtx addr, mem;
9213 :
9214 193166 : if (offset)
9215 184480 : addr = plus_constant (Pmode, frame_reg, offset);
9216 193166 : mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
9217 193166 : return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
9218 : }
9219 :
9220 : static inline rtx
9221 100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
9222 : {
9223 100333 : return gen_frame_set (reg, frame_reg, offset, false);
9224 : }
9225 :
9226 : static inline rtx
9227 92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
9228 : {
9229 92833 : return gen_frame_set (reg, frame_reg, offset, true);
9230 : }
9231 :
9232 : static void
9233 7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
9234 : {
9235 7045 : struct machine_function *m = cfun->machine;
9236 7045 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9237 7045 : + m->call_ms2sysv_extra_regs;
9238 7045 : rtvec v = rtvec_alloc (ncregs + 1);
9239 7045 : unsigned int align, i, vi = 0;
9240 7045 : rtx_insn *insn;
9241 7045 : rtx sym, addr;
9242 7045 : rtx rax = gen_rtx_REG (word_mode, AX_REG);
9243 7045 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9244 :
9245 : /* AL should only be live with sysv_abi. */
9246 7045 : gcc_assert (!ix86_eax_live_at_start_p ());
9247 7045 : gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
9248 :
9249 : /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
9250 : we've actually realigned the stack or not. */
9251 7045 : align = GET_MODE_ALIGNMENT (V4SFmode);
9252 7045 : addr = choose_baseaddr (frame.stack_realign_offset
9253 7045 : + xlogue.get_stub_ptr_offset (), &align, AX_REG);
9254 7045 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9255 :
9256 7045 : emit_insn (gen_rtx_SET (rax, addr));
9257 :
9258 : /* Get the stub symbol. */
9259 8327 : sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
9260 : : XLOGUE_STUB_SAVE);
9261 7045 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9262 :
9263 99878 : for (i = 0; i < ncregs; ++i)
9264 : {
9265 92833 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9266 92833 : rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
9267 92833 : r.regno);
9268 92833 : RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
9269 : }
9270 :
9271 7045 : gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
9272 :
9273 7045 : insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
9274 7045 : RTX_FRAME_RELATED_P (insn) = true;
9275 7045 : }
9276 :
9277 : /* Generate and return an insn body to AND X with Y. */
9278 :
9279 : static rtx_insn *
9280 31704 : gen_and2_insn (rtx x, rtx y)
9281 : {
9282 31704 : enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
9283 :
9284 31704 : gcc_assert (insn_operand_matches (icode, 0, x));
9285 31704 : gcc_assert (insn_operand_matches (icode, 1, x));
9286 31704 : gcc_assert (insn_operand_matches (icode, 2, y));
9287 :
9288 31704 : return GEN_FCN (icode) (x, x, y);
9289 : }
9290 :
9291 : /* Expand the prologue into a bunch of separate insns. */
9292 :
9293 : void
9294 1526209 : ix86_expand_prologue (void)
9295 : {
9296 1526209 : struct machine_function *m = cfun->machine;
9297 1526209 : rtx insn, t;
9298 1526209 : HOST_WIDE_INT allocate;
9299 1526209 : bool int_registers_saved;
9300 1526209 : bool sse_registers_saved;
9301 1526209 : bool save_stub_call_needed;
9302 1526209 : rtx static_chain = NULL_RTX;
9303 :
9304 1526209 : ix86_last_zero_store_uid = 0;
9305 1526209 : if (ix86_function_naked (current_function_decl))
9306 : {
9307 74 : if (flag_stack_usage_info)
9308 0 : current_function_static_stack_size = 0;
9309 74 : return;
9310 : }
9311 :
9312 1526135 : ix86_finalize_stack_frame_flags ();
9313 :
9314 : /* DRAP should not coexist with stack_realign_fp */
9315 1526135 : gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9316 :
9317 1526135 : memset (&m->fs, 0, sizeof (m->fs));
9318 :
9319 : /* Initialize CFA state for before the prologue. */
9320 1526135 : m->fs.cfa_reg = stack_pointer_rtx;
9321 1526135 : m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9322 :
9323 : /* Track SP offset to the CFA. We continue tracking this after we've
9324 : swapped the CFA register away from SP. In the case of re-alignment
9325 : this is fudged; we're interested to offsets within the local frame. */
9326 1526135 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9327 1526135 : m->fs.sp_valid = true;
9328 1526135 : m->fs.sp_realigned = false;
9329 :
9330 1526135 : const struct ix86_frame &frame = cfun->machine->frame;
9331 :
9332 1526135 : if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9333 : {
9334 : /* We should have already generated an error for any use of
9335 : ms_hook on a nested function. */
9336 0 : gcc_checking_assert (!ix86_static_chain_on_stack);
9337 :
9338 : /* Check if profiling is active and we shall use profiling before
9339 : prologue variant. If so sorry. */
9340 0 : if (crtl->profile && flag_fentry != 0)
9341 0 : sorry ("%<ms_hook_prologue%> attribute is not compatible "
9342 : "with %<-mfentry%> for 32-bit");
9343 :
9344 : /* In ix86_asm_output_function_label we emitted:
9345 : 8b ff movl.s %edi,%edi
9346 : 55 push %ebp
9347 : 8b ec movl.s %esp,%ebp
9348 :
9349 : This matches the hookable function prologue in Win32 API
9350 : functions in Microsoft Windows XP Service Pack 2 and newer.
9351 : Wine uses this to enable Windows apps to hook the Win32 API
9352 : functions provided by Wine.
9353 :
9354 : What that means is that we've already set up the frame pointer. */
9355 :
9356 0 : if (frame_pointer_needed
9357 0 : && !(crtl->drap_reg && crtl->stack_realign_needed))
9358 : {
9359 0 : rtx push, mov;
9360 :
9361 : /* We've decided to use the frame pointer already set up.
9362 : Describe this to the unwinder by pretending that both
9363 : push and mov insns happen right here.
9364 :
9365 : Putting the unwind info here at the end of the ms_hook
9366 : is done so that we can make absolutely certain we get
9367 : the required byte sequence at the start of the function,
9368 : rather than relying on an assembler that can produce
9369 : the exact encoding required.
9370 :
9371 : However it does mean (in the unpatched case) that we have
9372 : a 1 insn window where the asynchronous unwind info is
9373 : incorrect. However, if we placed the unwind info at
9374 : its correct location we would have incorrect unwind info
9375 : in the patched case. Which is probably all moot since
9376 : I don't expect Wine generates dwarf2 unwind info for the
9377 : system libraries that use this feature. */
9378 :
9379 0 : insn = emit_insn (gen_blockage ());
9380 :
9381 0 : push = gen_push (hard_frame_pointer_rtx);
9382 0 : mov = gen_rtx_SET (hard_frame_pointer_rtx,
9383 : stack_pointer_rtx);
9384 0 : RTX_FRAME_RELATED_P (push) = 1;
9385 0 : RTX_FRAME_RELATED_P (mov) = 1;
9386 :
9387 0 : RTX_FRAME_RELATED_P (insn) = 1;
9388 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9389 : gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9390 :
9391 : /* Note that gen_push incremented m->fs.cfa_offset, even
9392 : though we didn't emit the push insn here. */
9393 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9394 0 : m->fs.fp_offset = m->fs.cfa_offset;
9395 0 : m->fs.fp_valid = true;
9396 0 : }
9397 : else
9398 : {
9399 : /* The frame pointer is not needed so pop %ebp again.
9400 : This leaves us with a pristine state. */
9401 0 : emit_insn (gen_pop (hard_frame_pointer_rtx));
9402 : }
9403 : }
9404 :
9405 : /* The first insn of a function that accepts its static chain on the
9406 : stack is to push the register that would be filled in by a direct
9407 : call. This insn will be skipped by the trampoline. */
9408 1526135 : else if (ix86_static_chain_on_stack)
9409 : {
9410 0 : static_chain = ix86_static_chain (cfun->decl, false);
9411 0 : insn = emit_insn (gen_push (static_chain));
9412 0 : emit_insn (gen_blockage ());
9413 :
9414 : /* We don't want to interpret this push insn as a register save,
9415 : only as a stack adjustment. The real copy of the register as
9416 : a save will be done later, if needed. */
9417 0 : t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9418 0 : t = gen_rtx_SET (stack_pointer_rtx, t);
9419 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9420 0 : RTX_FRAME_RELATED_P (insn) = 1;
9421 : }
9422 :
9423 : /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9424 : of DRAP is needed and stack realignment is really needed after reload */
9425 1526135 : if (stack_realign_drap)
9426 : {
9427 7079 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9428 :
9429 : /* Can't use DRAP in interrupt function. */
9430 7079 : if (cfun->machine->func_type != TYPE_NORMAL)
9431 0 : sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9432 : "in interrupt service routine. This may be worked "
9433 : "around by avoiding functions with aggregate return.");
9434 :
9435 : /* Only need to push parameter pointer reg if it is caller saved. */
9436 7079 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9437 : {
9438 : /* Push arg pointer reg */
9439 137 : insn = emit_insn (gen_push (crtl->drap_reg));
9440 137 : RTX_FRAME_RELATED_P (insn) = 1;
9441 : }
9442 :
9443 : /* Grab the argument pointer. */
9444 7364 : t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9445 7079 : insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9446 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9447 7079 : m->fs.cfa_reg = crtl->drap_reg;
9448 7079 : m->fs.cfa_offset = 0;
9449 :
9450 : /* Align the stack. */
9451 7079 : insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9452 7079 : GEN_INT (-align_bytes)));
9453 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9454 :
9455 : /* Replicate the return address on the stack so that return
9456 : address can be reached via (argp - 1) slot. This is needed
9457 : to implement macro RETURN_ADDR_RTX and intrinsic function
9458 : expand_builtin_return_addr etc. */
9459 7649 : t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9460 7079 : t = gen_frame_mem (word_mode, t);
9461 7079 : insn = emit_insn (gen_push (t));
9462 7079 : RTX_FRAME_RELATED_P (insn) = 1;
9463 :
9464 : /* For the purposes of frame and register save area addressing,
9465 : we've started over with a new frame. */
9466 7079 : m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9467 7079 : m->fs.realigned = true;
9468 :
9469 7079 : if (static_chain)
9470 : {
9471 : /* Replicate static chain on the stack so that static chain
9472 : can be reached via (argp - 2) slot. This is needed for
9473 : nested function with stack realignment. */
9474 0 : insn = emit_insn (gen_push (static_chain));
9475 0 : RTX_FRAME_RELATED_P (insn) = 1;
9476 : }
9477 : }
9478 :
9479 1526135 : int_registers_saved = (frame.nregs == 0);
9480 1526135 : sse_registers_saved = (frame.nsseregs == 0);
9481 1526135 : save_stub_call_needed = (m->call_ms2sysv);
9482 1526135 : gcc_assert (sse_registers_saved || !save_stub_call_needed);
9483 :
9484 1526135 : if (frame_pointer_needed && !m->fs.fp_valid)
9485 : {
9486 : /* Note: AT&T enter does NOT have reversed args. Enter is probably
9487 : slower on all targets. Also sdb didn't like it. */
9488 480466 : insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9489 480466 : RTX_FRAME_RELATED_P (insn) = 1;
9490 :
9491 480466 : if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9492 : {
9493 480466 : insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9494 480466 : RTX_FRAME_RELATED_P (insn) = 1;
9495 :
9496 480466 : if (m->fs.cfa_reg == stack_pointer_rtx)
9497 473387 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9498 480466 : m->fs.fp_offset = m->fs.sp_offset;
9499 480466 : m->fs.fp_valid = true;
9500 : }
9501 : }
9502 :
9503 1526135 : if (!int_registers_saved)
9504 : {
9505 : /* If saving registers via PUSH, do so now. */
9506 474397 : if (!frame.save_regs_using_mov)
9507 : {
9508 429611 : ix86_emit_save_regs ();
9509 429611 : m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9510 429611 : int_registers_saved = true;
9511 429611 : gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9512 : }
9513 :
9514 : /* When using red zone we may start register saving before allocating
9515 : the stack frame saving one cycle of the prologue. However, avoid
9516 : doing this if we have to probe the stack; at least on x86_64 the
9517 : stack probe can turn into a call that clobbers a red zone location. */
9518 44786 : else if (ix86_using_red_zone ()
9519 44786 : && (! TARGET_STACK_PROBE
9520 0 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9521 : {
9522 40312 : HOST_WIDE_INT allocate_offset;
9523 40312 : if (crtl->shrink_wrapped_separate)
9524 : {
9525 40256 : allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
9526 :
9527 : /* Adjust the total offset at the beginning of the function. */
9528 40256 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9529 : GEN_INT (allocate_offset), -1,
9530 40256 : m->fs.cfa_reg == stack_pointer_rtx);
9531 40256 : m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
9532 : }
9533 :
9534 40312 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9535 40312 : int_registers_saved = true;
9536 : }
9537 : }
9538 :
9539 1526135 : if (frame.red_zone_size != 0)
9540 140422 : cfun->machine->red_zone_used = true;
9541 :
9542 1526135 : if (stack_realign_fp)
9543 : {
9544 24625 : int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9545 24974 : gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9546 :
9547 : /* Record last valid frame pointer offset. */
9548 24625 : m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9549 :
9550 : /* The computation of the size of the re-aligned stack frame means
9551 : that we must allocate the size of the register save area before
9552 : performing the actual alignment. Otherwise we cannot guarantee
9553 : that there's enough storage above the realignment point. */
9554 24625 : allocate = frame.reg_save_offset - m->fs.sp_offset
9555 24625 : + frame.stack_realign_allocate;
9556 24625 : if (allocate)
9557 2691 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9558 : GEN_INT (-allocate), -1, false);
9559 :
9560 : /* Align the stack. */
9561 24625 : emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9562 24625 : m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9563 24625 : m->fs.sp_realigned_offset = m->fs.sp_offset
9564 24625 : - frame.stack_realign_allocate;
9565 : /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9566 : Beyond this point, stack access should be done via choose_baseaddr or
9567 : by using sp_valid_at and fp_valid_at to determine the correct base
9568 : register. Henceforth, any CFA offset should be thought of as logical
9569 : and not physical. */
9570 24625 : gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9571 24625 : gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9572 24625 : m->fs.sp_realigned = true;
9573 :
9574 : /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9575 : is needed to describe where a register is saved using a realigned
9576 : stack pointer, so we need to invalidate the stack pointer for that
9577 : target. */
9578 24625 : if (TARGET_SEH)
9579 : m->fs.sp_valid = false;
9580 :
9581 : /* If SP offset is non-immediate after allocation of the stack frame,
9582 : then emit SSE saves or stub call prior to allocating the rest of the
9583 : stack frame. This is less efficient for the out-of-line stub because
9584 : we can't combine allocations across the call barrier, but it's better
9585 : than using a scratch register. */
9586 24625 : else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9587 : - m->fs.sp_realigned_offset),
9588 24625 : Pmode))
9589 : {
9590 3 : if (!sse_registers_saved)
9591 : {
9592 1 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9593 1 : sse_registers_saved = true;
9594 : }
9595 2 : else if (save_stub_call_needed)
9596 : {
9597 1 : ix86_emit_outlined_ms2sysv_save (frame);
9598 1 : save_stub_call_needed = false;
9599 : }
9600 : }
9601 : }
9602 :
9603 1526135 : allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9604 :
9605 1526135 : if (flag_stack_usage_info)
9606 : {
9607 : /* We start to count from ARG_POINTER. */
9608 355 : HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9609 :
9610 : /* If it was realigned, take into account the fake frame. */
9611 355 : if (stack_realign_drap)
9612 : {
9613 1 : if (ix86_static_chain_on_stack)
9614 0 : stack_size += UNITS_PER_WORD;
9615 :
9616 1 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9617 0 : stack_size += UNITS_PER_WORD;
9618 :
9619 : /* This over-estimates by 1 minimal-stack-alignment-unit but
9620 : mitigates that by counting in the new return address slot. */
9621 1 : current_function_dynamic_stack_size
9622 1 : += crtl->stack_alignment_needed / BITS_PER_UNIT;
9623 : }
9624 :
9625 355 : current_function_static_stack_size = stack_size;
9626 : }
9627 :
9628 : /* On SEH target with very large frame size, allocate an area to save
9629 : SSE registers (as the very large allocation won't be described). */
9630 1526135 : if (TARGET_SEH
9631 : && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9632 : && !sse_registers_saved)
9633 : {
9634 : HOST_WIDE_INT sse_size
9635 : = frame.sse_reg_save_offset - frame.reg_save_offset;
9636 :
9637 : gcc_assert (int_registers_saved);
9638 :
9639 : /* No need to do stack checking as the area will be immediately
9640 : written. */
9641 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9642 : GEN_INT (-sse_size), -1,
9643 : m->fs.cfa_reg == stack_pointer_rtx);
9644 : allocate -= sse_size;
9645 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9646 : sse_registers_saved = true;
9647 : }
9648 :
9649 : /* If stack clash protection is requested, then probe the stack, unless it
9650 : is already probed on the target. */
9651 1526135 : if (allocate >= 0
9652 1526131 : && flag_stack_clash_protection
9653 1526233 : && !ix86_target_stack_probe ())
9654 : {
9655 98 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9656 98 : allocate = 0;
9657 : }
9658 :
9659 : /* The stack has already been decremented by the instruction calling us
9660 : so probe if the size is non-negative to preserve the protection area. */
9661 1526037 : else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9662 : {
9663 45 : const HOST_WIDE_INT probe_interval = get_probe_interval ();
9664 :
9665 45 : if (STACK_CHECK_MOVING_SP)
9666 : {
9667 45 : if (crtl->is_leaf
9668 18 : && !cfun->calls_alloca
9669 18 : && allocate <= probe_interval)
9670 : ;
9671 :
9672 : else
9673 : {
9674 28 : ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9675 28 : allocate = 0;
9676 : }
9677 : }
9678 :
9679 : else
9680 : {
9681 : HOST_WIDE_INT size = allocate;
9682 :
9683 : if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9684 : size = 0x80000000 - get_stack_check_protect () - 1;
9685 :
9686 : if (TARGET_STACK_PROBE)
9687 : {
9688 : if (crtl->is_leaf && !cfun->calls_alloca)
9689 : {
9690 : if (size > probe_interval)
9691 : ix86_emit_probe_stack_range (0, size, int_registers_saved);
9692 : }
9693 : else
9694 : ix86_emit_probe_stack_range (0,
9695 : size + get_stack_check_protect (),
9696 : int_registers_saved);
9697 : }
9698 : else
9699 : {
9700 : if (crtl->is_leaf && !cfun->calls_alloca)
9701 : {
9702 : if (size > probe_interval
9703 : && size > get_stack_check_protect ())
9704 : ix86_emit_probe_stack_range (get_stack_check_protect (),
9705 : (size
9706 : - get_stack_check_protect ()),
9707 : int_registers_saved);
9708 : }
9709 : else
9710 : ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9711 : int_registers_saved);
9712 : }
9713 : }
9714 : }
9715 :
9716 1526131 : if (allocate == 0)
9717 : ;
9718 842085 : else if (!ix86_target_stack_probe ()
9719 842085 : || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9720 : {
9721 842040 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9722 : GEN_INT (-allocate), -1,
9723 842040 : m->fs.cfa_reg == stack_pointer_rtx);
9724 : }
9725 : else
9726 : {
9727 45 : rtx eax = gen_rtx_REG (Pmode, AX_REG);
9728 45 : rtx r10 = NULL;
9729 45 : const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9730 45 : bool eax_live = ix86_eax_live_at_start_p ();
9731 45 : bool r10_live = false;
9732 :
9733 45 : if (TARGET_64BIT)
9734 45 : r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9735 :
9736 45 : if (eax_live)
9737 : {
9738 0 : insn = emit_insn (gen_push (eax));
9739 0 : allocate -= UNITS_PER_WORD;
9740 : /* Note that SEH directives need to continue tracking the stack
9741 : pointer even after the frame pointer has been set up. */
9742 0 : if (sp_is_cfa_reg || TARGET_SEH)
9743 : {
9744 0 : if (sp_is_cfa_reg)
9745 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9746 0 : RTX_FRAME_RELATED_P (insn) = 1;
9747 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9748 0 : gen_rtx_SET (stack_pointer_rtx,
9749 : plus_constant (Pmode,
9750 : stack_pointer_rtx,
9751 : -UNITS_PER_WORD)));
9752 : }
9753 : }
9754 :
9755 45 : if (r10_live)
9756 : {
9757 0 : r10 = gen_rtx_REG (Pmode, R10_REG);
9758 0 : insn = emit_insn (gen_push (r10));
9759 0 : allocate -= UNITS_PER_WORD;
9760 0 : if (sp_is_cfa_reg || TARGET_SEH)
9761 : {
9762 0 : if (sp_is_cfa_reg)
9763 0 : m->fs.cfa_offset += UNITS_PER_WORD;
9764 0 : RTX_FRAME_RELATED_P (insn) = 1;
9765 0 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9766 0 : gen_rtx_SET (stack_pointer_rtx,
9767 : plus_constant (Pmode,
9768 : stack_pointer_rtx,
9769 : -UNITS_PER_WORD)));
9770 : }
9771 : }
9772 :
9773 45 : emit_move_insn (eax, GEN_INT (allocate));
9774 45 : emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9775 :
9776 : /* Use the fact that AX still contains ALLOCATE. */
9777 45 : insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9778 45 : (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9779 :
9780 45 : if (sp_is_cfa_reg || TARGET_SEH)
9781 : {
9782 37 : if (sp_is_cfa_reg)
9783 37 : m->fs.cfa_offset += allocate;
9784 37 : RTX_FRAME_RELATED_P (insn) = 1;
9785 37 : add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9786 37 : gen_rtx_SET (stack_pointer_rtx,
9787 : plus_constant (Pmode, stack_pointer_rtx,
9788 : -allocate)));
9789 : }
9790 45 : m->fs.sp_offset += allocate;
9791 :
9792 : /* Use stack_pointer_rtx for relative addressing so that code works for
9793 : realigned stack. But this means that we need a blockage to prevent
9794 : stores based on the frame pointer from being scheduled before. */
9795 45 : if (r10_live && eax_live)
9796 : {
9797 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9798 0 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9799 : gen_frame_mem (word_mode, t));
9800 0 : t = plus_constant (Pmode, t, UNITS_PER_WORD);
9801 0 : emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9802 : gen_frame_mem (word_mode, t));
9803 0 : emit_insn (gen_memory_blockage ());
9804 : }
9805 45 : else if (eax_live || r10_live)
9806 : {
9807 0 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9808 0 : emit_move_insn (gen_rtx_REG (word_mode,
9809 : (eax_live ? AX_REG : R10_REG)),
9810 : gen_frame_mem (word_mode, t));
9811 0 : emit_insn (gen_memory_blockage ());
9812 : }
9813 : }
9814 1526135 : gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9815 :
9816 : /* If we havn't already set up the frame pointer, do so now. */
9817 1526135 : if (frame_pointer_needed && !m->fs.fp_valid)
9818 : {
9819 0 : insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9820 0 : GEN_INT (frame.stack_pointer_offset
9821 : - frame.hard_frame_pointer_offset));
9822 0 : insn = emit_insn (insn);
9823 0 : RTX_FRAME_RELATED_P (insn) = 1;
9824 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9825 :
9826 0 : if (m->fs.cfa_reg == stack_pointer_rtx)
9827 0 : m->fs.cfa_reg = hard_frame_pointer_rtx;
9828 0 : m->fs.fp_offset = frame.hard_frame_pointer_offset;
9829 0 : m->fs.fp_valid = true;
9830 : }
9831 :
9832 1526135 : if (!int_registers_saved)
9833 4474 : ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9834 1526135 : if (!sse_registers_saved)
9835 33362 : ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9836 1492773 : else if (save_stub_call_needed)
9837 7044 : ix86_emit_outlined_ms2sysv_save (frame);
9838 :
9839 : /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9840 : in PROLOGUE. */
9841 1526135 : if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9842 : {
9843 0 : rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9844 0 : insn = emit_insn (gen_set_got (pic));
9845 0 : RTX_FRAME_RELATED_P (insn) = 1;
9846 0 : add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9847 0 : emit_insn (gen_prologue_use (pic));
9848 : /* Deleting already emmitted SET_GOT if exist and allocated to
9849 : REAL_PIC_OFFSET_TABLE_REGNUM. */
9850 0 : ix86_elim_entry_set_got (pic);
9851 : }
9852 :
9853 1526135 : if (crtl->drap_reg && !crtl->stack_realign_needed)
9854 : {
9855 : /* vDRAP is setup but after reload it turns out stack realign
9856 : isn't necessary, here we will emit prologue to setup DRAP
9857 : without stack realign adjustment */
9858 181 : t = choose_baseaddr (0, NULL);
9859 181 : emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9860 : }
9861 :
9862 : /* Prevent instructions from being scheduled into register save push
9863 : sequence when access to the redzone area is done through frame pointer.
9864 : The offset between the frame pointer and the stack pointer is calculated
9865 : relative to the value of the stack pointer at the end of the function
9866 : prologue, and moving instructions that access redzone area via frame
9867 : pointer inside push sequence violates this assumption. */
9868 1526135 : if (frame_pointer_needed && frame.red_zone_size)
9869 129454 : emit_insn (gen_memory_blockage ());
9870 :
9871 : /* SEH requires that the prologue end within 256 bytes of the start of
9872 : the function. Prevent instruction schedules that would extend that.
9873 : Further, prevent alloca modifications to the stack pointer from being
9874 : combined with prologue modifications. */
9875 : if (TARGET_SEH)
9876 : emit_insn (gen_prologue_use (stack_pointer_rtx));
9877 : }
9878 :
9879 : /* Emit code to restore REG using a POP or POPP insn. */
9880 :
9881 : static void
9882 1468934 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9883 : {
9884 1468934 : struct machine_function *m = cfun->machine;
9885 1468934 : rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9886 :
9887 1468934 : ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9888 1468934 : m->fs.sp_offset -= UNITS_PER_WORD;
9889 :
9890 1468934 : if (m->fs.cfa_reg == crtl->drap_reg
9891 1468934 : && REGNO (reg) == REGNO (crtl->drap_reg))
9892 : {
9893 : /* Previously we'd represented the CFA as an expression
9894 : like *(%ebp - 8). We've just popped that value from
9895 : the stack, which means we need to reset the CFA to
9896 : the drap register. This will remain until we restore
9897 : the stack pointer. */
9898 4032 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9899 4032 : RTX_FRAME_RELATED_P (insn) = 1;
9900 :
9901 : /* This means that the DRAP register is valid for addressing too. */
9902 4032 : m->fs.drap_valid = true;
9903 4032 : return;
9904 : }
9905 :
9906 1464902 : if (m->fs.cfa_reg == stack_pointer_rtx)
9907 : {
9908 1381669 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9909 1018408 : x = gen_rtx_SET (stack_pointer_rtx, x);
9910 1018408 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9911 1018408 : RTX_FRAME_RELATED_P (insn) = 1;
9912 :
9913 1200031 : m->fs.cfa_offset -= UNITS_PER_WORD;
9914 : }
9915 :
9916 : /* When the frame pointer is the CFA, and we pop it, we are
9917 : swapping back to the stack pointer as the CFA. This happens
9918 : for stack frames that don't allocate other data, so we assume
9919 : the stack pointer is now pointing at the return address, i.e.
9920 : the function entry state, which makes the offset be 1 word. */
9921 1464902 : if (reg == hard_frame_pointer_rtx)
9922 : {
9923 237572 : m->fs.fp_valid = false;
9924 237572 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9925 : {
9926 233527 : m->fs.cfa_reg = stack_pointer_rtx;
9927 233527 : m->fs.cfa_offset -= UNITS_PER_WORD;
9928 :
9929 233527 : add_reg_note (insn, REG_CFA_DEF_CFA,
9930 233527 : plus_constant (Pmode, stack_pointer_rtx,
9931 233527 : m->fs.cfa_offset));
9932 233527 : RTX_FRAME_RELATED_P (insn) = 1;
9933 : }
9934 : }
9935 : }
9936 :
9937 : /* Emit code to restore REG using a POP2 insn. */
9938 : static void
9939 19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9940 : {
9941 19 : struct machine_function *m = cfun->machine;
9942 19 : const int offset = UNITS_PER_WORD * 2;
9943 19 : rtx_insn *insn;
9944 :
9945 19 : rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9946 : stack_pointer_rtx));
9947 :
9948 19 : if (ppx_p)
9949 15 : insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9950 : else
9951 4 : insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9952 :
9953 19 : RTX_FRAME_RELATED_P (insn) = 1;
9954 :
9955 19 : rtx dwarf = NULL_RTX;
9956 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9957 19 : dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9958 19 : REG_NOTES (insn) = dwarf;
9959 19 : m->fs.sp_offset -= offset;
9960 :
9961 19 : if (m->fs.cfa_reg == crtl->drap_reg
9962 19 : && (REGNO (reg1) == REGNO (crtl->drap_reg)
9963 3 : || REGNO (reg2) == REGNO (crtl->drap_reg)))
9964 : {
9965 : /* Previously we'd represented the CFA as an expression
9966 : like *(%ebp - 8). We've just popped that value from
9967 : the stack, which means we need to reset the CFA to
9968 : the drap register. This will remain until we restore
9969 : the stack pointer. */
9970 1 : add_reg_note (insn, REG_CFA_DEF_CFA,
9971 1 : REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9972 1 : RTX_FRAME_RELATED_P (insn) = 1;
9973 :
9974 : /* This means that the DRAP register is valid for addressing too. */
9975 1 : m->fs.drap_valid = true;
9976 1 : return;
9977 : }
9978 :
9979 18 : if (m->fs.cfa_reg == stack_pointer_rtx)
9980 : {
9981 14 : rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9982 14 : x = gen_rtx_SET (stack_pointer_rtx, x);
9983 14 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9984 14 : RTX_FRAME_RELATED_P (insn) = 1;
9985 :
9986 14 : m->fs.cfa_offset -= offset;
9987 : }
9988 :
9989 : /* When the frame pointer is the CFA, and we pop it, we are
9990 : swapping back to the stack pointer as the CFA. This happens
9991 : for stack frames that don't allocate other data, so we assume
9992 : the stack pointer is now pointing at the return address, i.e.
9993 : the function entry state, which makes the offset be 1 word. */
9994 18 : if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9995 : {
9996 0 : m->fs.fp_valid = false;
9997 0 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9998 : {
9999 0 : m->fs.cfa_reg = stack_pointer_rtx;
10000 0 : m->fs.cfa_offset -= offset;
10001 :
10002 0 : add_reg_note (insn, REG_CFA_DEF_CFA,
10003 0 : plus_constant (Pmode, stack_pointer_rtx,
10004 0 : m->fs.cfa_offset));
10005 0 : RTX_FRAME_RELATED_P (insn) = 1;
10006 : }
10007 : }
10008 : }
10009 :
10010 : /* Emit code to restore saved registers using POP insns. */
10011 :
10012 : static void
10013 1355562 : ix86_emit_restore_regs_using_pop (bool ppx_p)
10014 : {
10015 1355562 : unsigned int regno;
10016 :
10017 126067266 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10018 124711704 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
10019 1231041 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
10020 1355562 : }
10021 :
10022 : /* Emit code to restore saved registers using POP2 insns. */
10023 :
10024 : static void
10025 560 : ix86_emit_restore_regs_using_pop2 (void)
10026 : {
10027 560 : int regno;
10028 560 : int regno_list[2];
10029 560 : regno_list[0] = regno_list[1] = -1;
10030 560 : int loaded_regnum = 0;
10031 560 : bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
10032 :
10033 52080 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10034 51520 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
10035 : {
10036 127 : if (aligned)
10037 : {
10038 120 : regno_list[loaded_regnum++] = regno;
10039 120 : if (loaded_regnum == 2)
10040 : {
10041 19 : gcc_assert (regno_list[0] != -1
10042 : && regno_list[1] != -1
10043 : && regno_list[0] != regno_list[1]);
10044 :
10045 19 : ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
10046 : regno_list[0]),
10047 : gen_rtx_REG (word_mode,
10048 : regno_list[1]),
10049 19 : TARGET_APX_PPX);
10050 19 : loaded_regnum = 0;
10051 19 : regno_list[0] = regno_list[1] = -1;
10052 : }
10053 : }
10054 : else
10055 : {
10056 14 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
10057 7 : TARGET_APX_PPX);
10058 7 : aligned = true;
10059 : }
10060 : }
10061 :
10062 560 : if (loaded_regnum == 1)
10063 82 : ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
10064 82 : TARGET_APX_PPX);
10065 560 : }
10066 :
10067 : /* Emit code and notes for the LEAVE instruction. If insn is non-null,
10068 : omits the emit and only attaches the notes. */
10069 :
10070 : static void
10071 244201 : ix86_emit_leave (rtx_insn *insn)
10072 : {
10073 244201 : struct machine_function *m = cfun->machine;
10074 :
10075 244201 : if (!insn)
10076 243230 : insn = emit_insn (gen_leave (word_mode));
10077 :
10078 244201 : ix86_add_queued_cfa_restore_notes (insn);
10079 :
10080 244201 : gcc_assert (m->fs.fp_valid);
10081 244201 : m->fs.sp_valid = true;
10082 244201 : m->fs.sp_realigned = false;
10083 244201 : m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10084 244201 : m->fs.fp_valid = false;
10085 :
10086 244201 : if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10087 : {
10088 241059 : m->fs.cfa_reg = stack_pointer_rtx;
10089 241059 : m->fs.cfa_offset = m->fs.sp_offset;
10090 :
10091 241059 : add_reg_note (insn, REG_CFA_DEF_CFA,
10092 241059 : plus_constant (Pmode, stack_pointer_rtx,
10093 241059 : m->fs.sp_offset));
10094 241059 : RTX_FRAME_RELATED_P (insn) = 1;
10095 : }
10096 244201 : ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10097 : m->fs.fp_offset);
10098 244201 : }
10099 :
10100 : /* Emit code to restore saved registers using MOV insns.
10101 : First register is restored from CFA - CFA_OFFSET. */
10102 : static void
10103 96089 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10104 : bool maybe_eh_return)
10105 : {
10106 96089 : struct machine_function *m = cfun->machine;
10107 96089 : unsigned int regno;
10108 :
10109 8936277 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10110 8840188 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10111 : {
10112 :
10113 : /* Skip registers, already processed by shrink wrap separate. */
10114 264802 : if (!cfun->machine->reg_is_wrapped_separately[regno])
10115 : {
10116 140537 : rtx reg = gen_rtx_REG (word_mode, regno);
10117 140537 : rtx mem;
10118 140537 : rtx_insn *insn;
10119 :
10120 140537 : mem = choose_baseaddr (cfa_offset, NULL);
10121 140537 : mem = gen_frame_mem (word_mode, mem);
10122 140537 : insn = emit_move_insn (reg, mem);
10123 :
10124 140537 : if (m->fs.cfa_reg == crtl->drap_reg
10125 140537 : && regno == REGNO (crtl->drap_reg))
10126 : {
10127 : /* Previously we'd represented the CFA as an expression
10128 : like *(%ebp - 8). We've just popped that value from
10129 : the stack, which means we need to reset the CFA to
10130 : the drap register. This will remain until we restore
10131 : the stack pointer. */
10132 3142 : add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10133 3142 : RTX_FRAME_RELATED_P (insn) = 1;
10134 :
10135 : /* DRAP register is valid for addressing. */
10136 3142 : m->fs.drap_valid = true;
10137 : }
10138 : else
10139 137395 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10140 : }
10141 285366 : cfa_offset -= UNITS_PER_WORD;
10142 : }
10143 96089 : }
10144 :
10145 : /* Emit code to restore saved registers using MOV insns.
10146 : First register is restored from CFA - CFA_OFFSET. */
10147 : static void
10148 33939 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10149 : bool maybe_eh_return)
10150 : {
10151 33939 : unsigned int regno;
10152 :
10153 3156327 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10154 3122388 : if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
10155 : {
10156 339417 : rtx reg = gen_rtx_REG (V4SFmode, regno);
10157 339417 : rtx mem;
10158 339417 : unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
10159 :
10160 339417 : mem = choose_baseaddr (cfa_offset, &align);
10161 339417 : mem = gen_rtx_MEM (V4SFmode, mem);
10162 :
10163 : /* The location aligment depends upon the base register. */
10164 339417 : align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
10165 339417 : gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
10166 339417 : set_mem_align (mem, align);
10167 339417 : emit_insn (gen_rtx_SET (reg, mem));
10168 :
10169 339417 : ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
10170 :
10171 339417 : cfa_offset -= GET_MODE_SIZE (V4SFmode);
10172 : }
10173 33939 : }
10174 :
10175 : static void
10176 7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
10177 : bool use_call, int style)
10178 : {
10179 7621 : struct machine_function *m = cfun->machine;
10180 7621 : const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
10181 7621 : + m->call_ms2sysv_extra_regs;
10182 7621 : rtvec v;
10183 7621 : unsigned int elems_needed, align, i, vi = 0;
10184 7621 : rtx_insn *insn;
10185 7621 : rtx sym, tmp;
10186 7621 : rtx rsi = gen_rtx_REG (word_mode, SI_REG);
10187 7621 : rtx r10 = NULL_RTX;
10188 7621 : const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
10189 7621 : HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
10190 7621 : HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
10191 7621 : rtx rsi_frame_load = NULL_RTX;
10192 7621 : HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
10193 7621 : enum xlogue_stub stub;
10194 :
10195 7621 : gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
10196 :
10197 : /* If using a realigned stack, we should never start with padding. */
10198 7621 : gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
10199 :
10200 : /* Setup RSI as the stub's base pointer. */
10201 7621 : align = GET_MODE_ALIGNMENT (V4SFmode);
10202 7621 : tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
10203 7621 : gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
10204 :
10205 7621 : emit_insn (gen_rtx_SET (rsi, tmp));
10206 :
10207 : /* Get a symbol for the stub. */
10208 7621 : if (frame_pointer_needed)
10209 5955 : stub = use_call ? XLOGUE_STUB_RESTORE_HFP
10210 : : XLOGUE_STUB_RESTORE_HFP_TAIL;
10211 : else
10212 1666 : stub = use_call ? XLOGUE_STUB_RESTORE
10213 : : XLOGUE_STUB_RESTORE_TAIL;
10214 7621 : sym = xlogue.get_stub_rtx (stub);
10215 :
10216 7621 : elems_needed = ncregs;
10217 7621 : if (use_call)
10218 6498 : elems_needed += 1;
10219 : else
10220 1275 : elems_needed += frame_pointer_needed ? 5 : 3;
10221 7621 : v = rtvec_alloc (elems_needed);
10222 :
10223 : /* We call the epilogue stub when we need to pop incoming args or we are
10224 : doing a sibling call as the tail. Otherwise, we will emit a jmp to the
10225 : epilogue stub and it is the tail-call. */
10226 7621 : if (use_call)
10227 6498 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10228 : else
10229 : {
10230 1123 : RTVEC_ELT (v, vi++) = ret_rtx;
10231 1123 : RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10232 1123 : if (frame_pointer_needed)
10233 : {
10234 971 : rtx rbp = gen_rtx_REG (DImode, BP_REG);
10235 971 : gcc_assert (m->fs.fp_valid);
10236 971 : gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
10237 :
10238 971 : tmp = plus_constant (DImode, rbp, 8);
10239 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
10240 971 : RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
10241 971 : tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10242 971 : RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
10243 : }
10244 : else
10245 : {
10246 : /* If no hard frame pointer, we set R10 to the SP restore value. */
10247 152 : gcc_assert (!m->fs.fp_valid);
10248 152 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10249 152 : gcc_assert (m->fs.sp_valid);
10250 :
10251 152 : r10 = gen_rtx_REG (DImode, R10_REG);
10252 152 : tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
10253 152 : emit_insn (gen_rtx_SET (r10, tmp));
10254 :
10255 152 : RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
10256 : }
10257 : }
10258 :
10259 : /* Generate frame load insns and restore notes. */
10260 107954 : for (i = 0; i < ncregs; ++i)
10261 : {
10262 100333 : const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
10263 100333 : machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
10264 100333 : rtx reg, frame_load;
10265 :
10266 100333 : reg = gen_rtx_REG (mode, r.regno);
10267 100333 : frame_load = gen_frame_load (reg, rsi, r.offset);
10268 :
10269 : /* Save RSI frame load insn & note to add last. */
10270 100333 : if (r.regno == SI_REG)
10271 : {
10272 7621 : gcc_assert (!rsi_frame_load);
10273 7621 : rsi_frame_load = frame_load;
10274 7621 : rsi_restore_offset = r.offset;
10275 : }
10276 : else
10277 : {
10278 92712 : RTVEC_ELT (v, vi++) = frame_load;
10279 92712 : ix86_add_cfa_restore_note (NULL, reg, r.offset);
10280 : }
10281 : }
10282 :
10283 : /* Add RSI frame load & restore note at the end. */
10284 7621 : gcc_assert (rsi_frame_load);
10285 7621 : gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
10286 7621 : RTVEC_ELT (v, vi++) = rsi_frame_load;
10287 7621 : ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
10288 : rsi_restore_offset);
10289 :
10290 : /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
10291 7621 : if (!use_call && !frame_pointer_needed)
10292 : {
10293 152 : gcc_assert (m->fs.sp_valid);
10294 152 : gcc_assert (!m->fs.sp_realigned);
10295 :
10296 : /* At this point, R10 should point to frame.stack_realign_offset. */
10297 152 : if (m->fs.cfa_reg == stack_pointer_rtx)
10298 152 : m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
10299 152 : m->fs.sp_offset = frame.stack_realign_offset;
10300 : }
10301 :
10302 7621 : gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
10303 7621 : tmp = gen_rtx_PARALLEL (VOIDmode, v);
10304 7621 : if (use_call)
10305 6498 : insn = emit_insn (tmp);
10306 : else
10307 : {
10308 1123 : insn = emit_jump_insn (tmp);
10309 1123 : JUMP_LABEL (insn) = ret_rtx;
10310 :
10311 1123 : if (frame_pointer_needed)
10312 971 : ix86_emit_leave (insn);
10313 : else
10314 : {
10315 : /* Need CFA adjust note. */
10316 152 : tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10317 152 : add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10318 : }
10319 : }
10320 :
10321 7621 : RTX_FRAME_RELATED_P (insn) = true;
10322 7621 : ix86_add_queued_cfa_restore_notes (insn);
10323 :
10324 : /* If we're not doing a tail-call, we need to adjust the stack. */
10325 7621 : if (use_call && m->fs.sp_valid)
10326 : {
10327 3706 : HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10328 3706 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10329 : GEN_INT (dealloc), style,
10330 3706 : m->fs.cfa_reg == stack_pointer_rtx);
10331 : }
10332 7621 : }
10333 :
10334 : /* Restore function stack, frame, and registers. */
10335 :
10336 : void
10337 1650658 : ix86_expand_epilogue (int style)
10338 : {
10339 1650658 : struct machine_function *m = cfun->machine;
10340 1650658 : struct machine_frame_state frame_state_save = m->fs;
10341 1650658 : bool restore_regs_via_mov;
10342 1650658 : bool using_drap;
10343 1650658 : bool restore_stub_is_tail = false;
10344 :
10345 1650658 : if (ix86_function_naked (current_function_decl))
10346 : {
10347 : /* The program should not reach this point. */
10348 74 : emit_insn (gen_ud2 ());
10349 124568 : return;
10350 : }
10351 :
10352 1650584 : ix86_finalize_stack_frame_flags ();
10353 1650584 : const struct ix86_frame &frame = cfun->machine->frame;
10354 :
10355 1650584 : m->fs.sp_realigned = stack_realign_fp;
10356 31846 : m->fs.sp_valid = stack_realign_fp
10357 1625913 : || !frame_pointer_needed
10358 2107713 : || crtl->sp_is_unchanging;
10359 1650584 : gcc_assert (!m->fs.sp_valid
10360 : || m->fs.sp_offset == frame.stack_pointer_offset);
10361 :
10362 : /* The FP must be valid if the frame pointer is present. */
10363 1650584 : gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10364 1650584 : gcc_assert (!m->fs.fp_valid
10365 : || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10366 :
10367 : /* We must have *some* valid pointer to the stack frame. */
10368 1650584 : gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10369 :
10370 : /* The DRAP is never valid at this point. */
10371 1650584 : gcc_assert (!m->fs.drap_valid);
10372 :
10373 : /* See the comment about red zone and frame
10374 : pointer usage in ix86_expand_prologue. */
10375 1650584 : if (frame_pointer_needed && frame.red_zone_size)
10376 129487 : emit_insn (gen_memory_blockage ());
10377 :
10378 1650584 : using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10379 7175 : gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10380 :
10381 : /* Determine the CFA offset of the end of the red-zone. */
10382 1650584 : m->fs.red_zone_offset = 0;
10383 1650584 : if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10384 : {
10385 : /* The red-zone begins below return address and error code in
10386 : exception handler. */
10387 1473127 : m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10388 :
10389 : /* When the register save area is in the aligned portion of
10390 : the stack, determine the maximum runtime displacement that
10391 : matches up with the aligned frame. */
10392 1473127 : if (stack_realign_drap)
10393 8620 : m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10394 4310 : + UNITS_PER_WORD);
10395 : }
10396 :
10397 1650584 : HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10398 :
10399 : /* Special care must be taken for the normal return case of a function
10400 : using eh_return: the eax and edx registers are marked as saved, but
10401 : not restored along this path. Adjust the save location to match. */
10402 1650584 : if (crtl->calls_eh_return && style != 2)
10403 37 : reg_save_offset -= 2 * UNITS_PER_WORD;
10404 :
10405 : /* EH_RETURN requires the use of moves to function properly. */
10406 1650584 : if (crtl->calls_eh_return)
10407 : restore_regs_via_mov = true;
10408 : /* SEH requires the use of pops to identify the epilogue. */
10409 1650526 : else if (TARGET_SEH)
10410 : restore_regs_via_mov = false;
10411 : /* If we already save reg with pushp, don't use move at epilogue. */
10412 1650526 : else if (m->fs.apx_ppx_used)
10413 : restore_regs_via_mov = false;
10414 : /* If we're only restoring one register and sp cannot be used then
10415 : using a move instruction to restore the register since it's
10416 : less work than reloading sp and popping the register. */
10417 1650439 : else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
10418 : restore_regs_via_mov = true;
10419 1589592 : else if (crtl->shrink_wrapped_separate
10420 1537288 : || (TARGET_EPILOGUE_USING_MOVE
10421 56735 : && cfun->machine->use_fast_prologue_epilogue
10422 56679 : && (frame.nregs > 1
10423 56666 : || m->fs.sp_offset != reg_save_offset)))
10424 : restore_regs_via_mov = true;
10425 1537053 : else if (frame_pointer_needed
10426 418342 : && !frame.nregs
10427 323164 : && m->fs.sp_offset != reg_save_offset)
10428 : restore_regs_via_mov = true;
10429 1385823 : else if (frame_pointer_needed
10430 267112 : && TARGET_USE_LEAVE
10431 267037 : && cfun->machine->use_fast_prologue_epilogue
10432 210065 : && frame.nregs == 1)
10433 : restore_regs_via_mov = true;
10434 : else
10435 1650584 : restore_regs_via_mov = false;
10436 :
10437 1650584 : if (crtl->shrink_wrapped_separate)
10438 52335 : gcc_assert (restore_regs_via_mov);
10439 :
10440 1598249 : if (restore_regs_via_mov || frame.nsseregs)
10441 : {
10442 : /* Ensure that the entire register save area is addressable via
10443 : the stack pointer, if we will restore SSE regs via sp. */
10444 328047 : if (TARGET_64BIT
10445 315417 : && m->fs.sp_offset > 0x7fffffff
10446 23 : && sp_valid_at (frame.stack_realign_offset + 1)
10447 328069 : && (frame.nsseregs + frame.nregs) != 0)
10448 : {
10449 6 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10450 6 : GEN_INT (m->fs.sp_offset
10451 : - frame.sse_reg_save_offset),
10452 : style,
10453 6 : m->fs.cfa_reg == stack_pointer_rtx);
10454 : }
10455 : }
10456 :
10457 : /* If there are any SSE registers to restore, then we have to do it
10458 : via moves, since there's obviously no pop for SSE regs. */
10459 1650584 : if (frame.nsseregs)
10460 33939 : ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10461 : style == 2);
10462 :
10463 1650584 : if (m->call_ms2sysv)
10464 : {
10465 7621 : int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10466 :
10467 : /* We cannot use a tail-call for the stub if:
10468 : 1. We have to pop incoming args,
10469 : 2. We have additional int regs to restore, or
10470 : 3. A sibling call will be the tail-call, or
10471 : 4. We are emitting an eh_return_internal epilogue.
10472 :
10473 : TODO: Item 4 has not yet tested!
10474 :
10475 : If any of the above are true, we will call the stub rather than
10476 : jump to it. */
10477 7621 : restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10478 7621 : ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
10479 : }
10480 :
10481 : /* If using out-of-line stub that is a tail-call, then...*/
10482 1650584 : if (m->call_ms2sysv && restore_stub_is_tail)
10483 : {
10484 : /* TODO: parinoid tests. (remove eventually) */
10485 1123 : gcc_assert (m->fs.sp_valid);
10486 1123 : gcc_assert (!m->fs.sp_realigned);
10487 1123 : gcc_assert (!m->fs.fp_valid);
10488 1123 : gcc_assert (!m->fs.realigned);
10489 1123 : gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10490 1123 : gcc_assert (!crtl->drap_reg);
10491 1123 : gcc_assert (!frame.nregs);
10492 1123 : gcc_assert (!crtl->shrink_wrapped_separate);
10493 : }
10494 1649461 : else if (restore_regs_via_mov)
10495 : {
10496 293339 : rtx t;
10497 :
10498 293339 : if (frame.nregs)
10499 96089 : ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
10500 :
10501 : /* eh_return epilogues need %ecx added to the stack pointer. */
10502 293339 : if (style == 2)
10503 : {
10504 37 : rtx sa = EH_RETURN_STACKADJ_RTX;
10505 29 : rtx_insn *insn;
10506 :
10507 29 : gcc_assert (!crtl->shrink_wrapped_separate);
10508 :
10509 : /* Stack realignment doesn't work with eh_return. */
10510 29 : if (crtl->stack_realign_needed)
10511 0 : sorry ("Stack realignment not supported with "
10512 : "%<__builtin_eh_return%>");
10513 :
10514 : /* regparm nested functions don't work with eh_return. */
10515 29 : if (ix86_static_chain_on_stack)
10516 0 : sorry ("regparm nested function not supported with "
10517 : "%<__builtin_eh_return%>");
10518 :
10519 29 : if (frame_pointer_needed)
10520 : {
10521 35 : t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10522 43 : t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10523 27 : emit_insn (gen_rtx_SET (sa, t));
10524 :
10525 : /* NB: eh_return epilogues must restore the frame pointer
10526 : in word_mode since the upper 32 bits of RBP register
10527 : can have any values. */
10528 27 : t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10529 27 : rtx frame_reg = gen_rtx_REG (word_mode,
10530 : HARD_FRAME_POINTER_REGNUM);
10531 27 : insn = emit_move_insn (frame_reg, t);
10532 :
10533 : /* Note that we use SA as a temporary CFA, as the return
10534 : address is at the proper place relative to it. We
10535 : pretend this happens at the FP restore insn because
10536 : prior to this insn the FP would be stored at the wrong
10537 : offset relative to SA, and after this insn we have no
10538 : other reasonable register to use for the CFA. We don't
10539 : bother resetting the CFA to the SP for the duration of
10540 : the return insn, unless the control flow instrumentation
10541 : is done. In this case the SP is used later and we have
10542 : to reset CFA to SP. */
10543 27 : add_reg_note (insn, REG_CFA_DEF_CFA,
10544 35 : plus_constant (Pmode, sa, UNITS_PER_WORD));
10545 27 : ix86_add_queued_cfa_restore_notes (insn);
10546 27 : add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10547 27 : RTX_FRAME_RELATED_P (insn) = 1;
10548 :
10549 27 : m->fs.cfa_reg = sa;
10550 27 : m->fs.cfa_offset = UNITS_PER_WORD;
10551 27 : m->fs.fp_valid = false;
10552 :
10553 27 : pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10554 : const0_rtx, style,
10555 27 : flag_cf_protection);
10556 : }
10557 : else
10558 : {
10559 2 : t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10560 2 : t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10561 2 : insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10562 2 : ix86_add_queued_cfa_restore_notes (insn);
10563 :
10564 2 : gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10565 2 : if (m->fs.cfa_offset != UNITS_PER_WORD)
10566 : {
10567 2 : m->fs.cfa_offset = UNITS_PER_WORD;
10568 2 : add_reg_note (insn, REG_CFA_DEF_CFA,
10569 2 : plus_constant (Pmode, stack_pointer_rtx,
10570 2 : UNITS_PER_WORD));
10571 2 : RTX_FRAME_RELATED_P (insn) = 1;
10572 : }
10573 : }
10574 29 : m->fs.sp_offset = UNITS_PER_WORD;
10575 29 : m->fs.sp_valid = true;
10576 29 : m->fs.sp_realigned = false;
10577 : }
10578 : }
10579 : else
10580 : {
10581 : /* SEH requires that the function end with (1) a stack adjustment
10582 : if necessary, (2) a sequence of pops, and (3) a return or
10583 : jump instruction. Prevent insns from the function body from
10584 : being scheduled into this sequence. */
10585 1356122 : if (TARGET_SEH)
10586 : {
10587 : /* Prevent a catch region from being adjacent to the standard
10588 : epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10589 : nor several other flags that would be interesting to test are
10590 : set up yet. */
10591 : if (flag_non_call_exceptions)
10592 : emit_insn (gen_nops (const1_rtx));
10593 : else
10594 : emit_insn (gen_blockage ());
10595 : }
10596 :
10597 : /* First step is to deallocate the stack frame so that we can
10598 : pop the registers. If the stack pointer was realigned, it needs
10599 : to be restored now. Also do it on SEH target for very large
10600 : frame as the emitted instructions aren't allowed by the ABI
10601 : in epilogues. */
10602 1356122 : if (!m->fs.sp_valid || m->fs.sp_realigned
10603 : || (TARGET_SEH
10604 : && (m->fs.sp_offset - reg_save_offset
10605 : >= SEH_MAX_FRAME_SIZE)))
10606 : {
10607 29737 : pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10608 29737 : GEN_INT (m->fs.fp_offset
10609 : - reg_save_offset),
10610 : style, false);
10611 : }
10612 1326385 : else if (m->fs.sp_offset != reg_save_offset)
10613 : {
10614 614759 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10615 : GEN_INT (m->fs.sp_offset
10616 : - reg_save_offset),
10617 : style,
10618 614759 : m->fs.cfa_reg == stack_pointer_rtx);
10619 : }
10620 :
10621 1356122 : if (TARGET_APX_PUSH2POP2
10622 563 : && ix86_can_use_push2pop2 ()
10623 1356683 : && m->func_type == TYPE_NORMAL)
10624 560 : ix86_emit_restore_regs_using_pop2 ();
10625 : else
10626 1355562 : ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10627 : }
10628 :
10629 : /* If we used a stack pointer and haven't already got rid of it,
10630 : then do so now. */
10631 1650584 : if (m->fs.fp_valid)
10632 : {
10633 : /* If the stack pointer is valid and pointing at the frame
10634 : pointer store address, then we only need a pop. */
10635 480802 : if (sp_valid_at (frame.hfp_save_offset)
10636 480802 : && m->fs.sp_offset == frame.hfp_save_offset)
10637 237560 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10638 : /* Leave results in shorter dependency chains on CPUs that are
10639 : able to grok it fast. */
10640 243242 : else if (TARGET_USE_LEAVE
10641 12 : || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10642 243254 : || !cfun->machine->use_fast_prologue_epilogue)
10643 243230 : ix86_emit_leave (NULL);
10644 : else
10645 : {
10646 12 : pro_epilogue_adjust_stack (stack_pointer_rtx,
10647 : hard_frame_pointer_rtx,
10648 12 : const0_rtx, style, !using_drap);
10649 12 : ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10650 : }
10651 : }
10652 :
10653 1650584 : if (using_drap)
10654 : {
10655 7175 : int param_ptr_offset = UNITS_PER_WORD;
10656 7175 : rtx_insn *insn;
10657 :
10658 7175 : gcc_assert (stack_realign_drap);
10659 :
10660 7175 : if (ix86_static_chain_on_stack)
10661 0 : param_ptr_offset += UNITS_PER_WORD;
10662 7175 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10663 232 : param_ptr_offset += UNITS_PER_WORD;
10664 :
10665 7480 : insn = emit_insn (gen_rtx_SET
10666 : (stack_pointer_rtx,
10667 : plus_constant (Pmode, crtl->drap_reg,
10668 : -param_ptr_offset)));
10669 7175 : m->fs.cfa_reg = stack_pointer_rtx;
10670 7175 : m->fs.cfa_offset = param_ptr_offset;
10671 7175 : m->fs.sp_offset = param_ptr_offset;
10672 7175 : m->fs.realigned = false;
10673 :
10674 7480 : add_reg_note (insn, REG_CFA_DEF_CFA,
10675 7175 : plus_constant (Pmode, stack_pointer_rtx,
10676 7175 : param_ptr_offset));
10677 7175 : RTX_FRAME_RELATED_P (insn) = 1;
10678 :
10679 7175 : if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10680 232 : ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10681 : }
10682 :
10683 : /* At this point the stack pointer must be valid, and we must have
10684 : restored all of the registers. We may not have deallocated the
10685 : entire stack frame. We've delayed this until now because it may
10686 : be possible to merge the local stack deallocation with the
10687 : deallocation forced by ix86_static_chain_on_stack. */
10688 1650584 : gcc_assert (m->fs.sp_valid);
10689 1650584 : gcc_assert (!m->fs.sp_realigned);
10690 1650584 : gcc_assert (!m->fs.fp_valid);
10691 1650584 : gcc_assert (!m->fs.realigned);
10692 1786289 : if (m->fs.sp_offset != UNITS_PER_WORD)
10693 : {
10694 50054 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10695 : GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10696 : style, true);
10697 : }
10698 : else
10699 1600530 : ix86_add_queued_cfa_restore_notes (get_last_insn ());
10700 :
10701 : /* Sibcall epilogues don't want a return instruction. */
10702 1650584 : if (style == 0)
10703 : {
10704 124420 : m->fs = frame_state_save;
10705 124420 : return;
10706 : }
10707 :
10708 1526164 : if (cfun->machine->func_type != TYPE_NORMAL)
10709 120 : emit_jump_insn (gen_interrupt_return ());
10710 1526044 : else if (crtl->args.pops_args && crtl->args.size)
10711 : {
10712 25996 : rtx popc = GEN_INT (crtl->args.pops_args);
10713 :
10714 : /* i386 can only pop 64K bytes. If asked to pop more, pop return
10715 : address, do explicit add, and jump indirectly to the caller. */
10716 :
10717 25996 : if (crtl->args.pops_args >= 65536)
10718 : {
10719 0 : rtx ecx = gen_rtx_REG (SImode, CX_REG);
10720 0 : rtx_insn *insn;
10721 :
10722 : /* There is no "pascal" calling convention in any 64bit ABI. */
10723 0 : gcc_assert (!TARGET_64BIT);
10724 :
10725 0 : insn = emit_insn (gen_pop (ecx));
10726 0 : m->fs.cfa_offset -= UNITS_PER_WORD;
10727 0 : m->fs.sp_offset -= UNITS_PER_WORD;
10728 :
10729 0 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10730 0 : x = gen_rtx_SET (stack_pointer_rtx, x);
10731 0 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10732 0 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10733 0 : RTX_FRAME_RELATED_P (insn) = 1;
10734 :
10735 0 : pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10736 : popc, -1, true);
10737 0 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10738 : }
10739 : else
10740 25996 : emit_jump_insn (gen_simple_return_pop_internal (popc));
10741 : }
10742 1500048 : else if (!m->call_ms2sysv || !restore_stub_is_tail)
10743 : {
10744 : /* In case of return from EH a simple return cannot be used
10745 : as a return address will be compared with a shadow stack
10746 : return address. Use indirect jump instead. */
10747 1498925 : if (style == 2 && flag_cf_protection)
10748 : {
10749 : /* Register used in indirect jump must be in word_mode. But
10750 : Pmode may not be the same as word_mode for x32. */
10751 17 : rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10752 17 : rtx_insn *insn;
10753 :
10754 17 : insn = emit_insn (gen_pop (ecx));
10755 17 : m->fs.cfa_offset -= UNITS_PER_WORD;
10756 17 : m->fs.sp_offset -= UNITS_PER_WORD;
10757 :
10758 33 : rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10759 17 : x = gen_rtx_SET (stack_pointer_rtx, x);
10760 17 : add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10761 17 : add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10762 17 : RTX_FRAME_RELATED_P (insn) = 1;
10763 :
10764 17 : emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10765 17 : }
10766 : else
10767 1498908 : emit_jump_insn (gen_simple_return_internal ());
10768 : }
10769 :
10770 : /* Restore the state back to the state from the prologue,
10771 : so that it's correct for the next epilogue. */
10772 1526164 : m->fs = frame_state_save;
10773 : }
10774 :
10775 : /* Reset from the function's potential modifications. */
10776 :
10777 : static void
10778 1486852 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10779 : {
10780 1486852 : if (pic_offset_table_rtx
10781 1486852 : && !ix86_use_pseudo_pic_reg ())
10782 0 : SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10783 :
10784 1486852 : if (TARGET_MACHO)
10785 : {
10786 : rtx_insn *insn = get_last_insn ();
10787 : rtx_insn *deleted_debug_label = NULL;
10788 :
10789 : /* Mach-O doesn't support labels at the end of objects, so if
10790 : it looks like we might want one, take special action.
10791 : First, collect any sequence of deleted debug labels. */
10792 : while (insn
10793 : && NOTE_P (insn)
10794 : && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10795 : {
10796 : /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10797 : notes only, instead set their CODE_LABEL_NUMBER to -1,
10798 : otherwise there would be code generation differences
10799 : in between -g and -g0. */
10800 : if (NOTE_P (insn) && NOTE_KIND (insn)
10801 : == NOTE_INSN_DELETED_DEBUG_LABEL)
10802 : deleted_debug_label = insn;
10803 : insn = PREV_INSN (insn);
10804 : }
10805 :
10806 : /* If we have:
10807 : label:
10808 : barrier
10809 : then this needs to be detected, so skip past the barrier. */
10810 :
10811 : if (insn && BARRIER_P (insn))
10812 : insn = PREV_INSN (insn);
10813 :
10814 : /* Up to now we've only seen notes or barriers. */
10815 : if (insn)
10816 : {
10817 : if (LABEL_P (insn)
10818 : || (NOTE_P (insn)
10819 : && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10820 : /* Trailing label. */
10821 : fputs ("\tnop\n", file);
10822 : else if (cfun && ! cfun->is_thunk)
10823 : {
10824 : /* See if we have a completely empty function body, skipping
10825 : the special case of the picbase thunk emitted as asm. */
10826 : while (insn && ! INSN_P (insn))
10827 : insn = PREV_INSN (insn);
10828 : /* If we don't find any insns, we've got an empty function body;
10829 : I.e. completely empty - without a return or branch. This is
10830 : taken as the case where a function body has been removed
10831 : because it contains an inline __builtin_unreachable(). GCC
10832 : declares that reaching __builtin_unreachable() means UB so
10833 : we're not obliged to do anything special; however, we want
10834 : non-zero-sized function bodies. To meet this, and help the
10835 : user out, let's trap the case. */
10836 : if (insn == NULL)
10837 : fputs ("\tud2\n", file);
10838 : }
10839 : }
10840 : else if (deleted_debug_label)
10841 : for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10842 : if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10843 : CODE_LABEL_NUMBER (insn) = -1;
10844 : }
10845 1486852 : }
10846 :
10847 : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10848 :
10849 : void
10850 59 : ix86_print_patchable_function_entry (FILE *file,
10851 : unsigned HOST_WIDE_INT patch_area_size,
10852 : bool record_p)
10853 : {
10854 59 : if (cfun->machine->function_label_emitted)
10855 : {
10856 : /* NB: When ix86_print_patchable_function_entry is called after
10857 : function table has been emitted, we have inserted or queued
10858 : a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10859 : place. There is nothing to do here. */
10860 : return;
10861 : }
10862 :
10863 8 : default_print_patchable_function_entry (file, patch_area_size,
10864 : record_p);
10865 : }
10866 :
10867 : /* Output patchable area. NB: default_print_patchable_function_entry
10868 : isn't available in i386.md. */
10869 :
10870 : void
10871 51 : ix86_output_patchable_area (unsigned int patch_area_size,
10872 : bool record_p)
10873 : {
10874 51 : default_print_patchable_function_entry (asm_out_file,
10875 : patch_area_size,
10876 : record_p);
10877 51 : }
10878 :
10879 : /* Return a scratch register to use in the split stack prologue. The
10880 : split stack prologue is used for -fsplit-stack. It is the first
10881 : instructions in the function, even before the regular prologue.
10882 : The scratch register can be any caller-saved register which is not
10883 : used for parameters or for the static chain. */
10884 :
10885 : static unsigned int
10886 24610 : split_stack_prologue_scratch_regno (void)
10887 : {
10888 24610 : if (TARGET_64BIT)
10889 : return R11_REG;
10890 : else
10891 : {
10892 6943 : bool is_fastcall, is_thiscall;
10893 6943 : int regparm;
10894 :
10895 6943 : is_fastcall = (lookup_attribute ("fastcall",
10896 6943 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10897 : != NULL);
10898 6943 : is_thiscall = (lookup_attribute ("thiscall",
10899 6943 : TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10900 : != NULL);
10901 6943 : regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10902 :
10903 6943 : if (is_fastcall)
10904 : {
10905 0 : if (DECL_STATIC_CHAIN (cfun->decl))
10906 : {
10907 0 : sorry ("%<-fsplit-stack%> does not support fastcall with "
10908 : "nested function");
10909 0 : return INVALID_REGNUM;
10910 : }
10911 : return AX_REG;
10912 : }
10913 6943 : else if (is_thiscall)
10914 : {
10915 0 : if (!DECL_STATIC_CHAIN (cfun->decl))
10916 : return DX_REG;
10917 0 : return AX_REG;
10918 : }
10919 6943 : else if (regparm < 3)
10920 : {
10921 6943 : if (!DECL_STATIC_CHAIN (cfun->decl))
10922 : return CX_REG;
10923 : else
10924 : {
10925 459 : if (regparm >= 2)
10926 : {
10927 0 : sorry ("%<-fsplit-stack%> does not support 2 register "
10928 : "parameters for a nested function");
10929 0 : return INVALID_REGNUM;
10930 : }
10931 : return DX_REG;
10932 : }
10933 : }
10934 : else
10935 : {
10936 : /* FIXME: We could make this work by pushing a register
10937 : around the addition and comparison. */
10938 0 : sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10939 0 : return INVALID_REGNUM;
10940 : }
10941 : }
10942 : }
10943 :
10944 : /* A SYMBOL_REF for the function which allocates new stackspace for
10945 : -fsplit-stack. */
10946 :
10947 : static GTY(()) rtx split_stack_fn;
10948 :
10949 : /* A SYMBOL_REF for the more stack function when using the large model. */
10950 :
10951 : static GTY(()) rtx split_stack_fn_large;
10952 :
10953 : /* Return location of the stack guard value in the TLS block. */
10954 :
10955 : rtx
10956 260053 : ix86_split_stack_guard (void)
10957 : {
10958 260053 : int offset;
10959 260053 : addr_space_t as = DEFAULT_TLS_SEG_REG;
10960 260053 : rtx r;
10961 :
10962 260053 : gcc_assert (flag_split_stack);
10963 :
10964 : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10965 260053 : offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10966 : #else
10967 : gcc_unreachable ();
10968 : #endif
10969 :
10970 260053 : r = GEN_INT (offset);
10971 358058 : r = gen_const_mem (Pmode, r);
10972 260053 : set_mem_addr_space (r, as);
10973 :
10974 260053 : return r;
10975 : }
10976 :
10977 : /* Handle -fsplit-stack. These are the first instructions in the
10978 : function, even before the regular prologue. */
10979 :
10980 : void
10981 260043 : ix86_expand_split_stack_prologue (void)
10982 : {
10983 260043 : HOST_WIDE_INT allocate;
10984 260043 : unsigned HOST_WIDE_INT args_size;
10985 260043 : rtx_code_label *label;
10986 260043 : rtx limit, current, allocate_rtx, call_fusage;
10987 260043 : rtx_insn *call_insn;
10988 260043 : unsigned int scratch_regno = INVALID_REGNUM;
10989 260043 : rtx scratch_reg = NULL_RTX;
10990 260043 : rtx_code_label *varargs_label = NULL;
10991 260043 : rtx fn;
10992 :
10993 260043 : gcc_assert (flag_split_stack && reload_completed);
10994 :
10995 260043 : ix86_finalize_stack_frame_flags ();
10996 260043 : struct ix86_frame &frame = cfun->machine->frame;
10997 260043 : allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10998 :
10999 : /* This is the label we will branch to if we have enough stack
11000 : space. We expect the basic block reordering pass to reverse this
11001 : branch if optimizing, so that we branch in the unlikely case. */
11002 260043 : label = gen_label_rtx ();
11003 :
11004 : /* We need to compare the stack pointer minus the frame size with
11005 : the stack boundary in the TCB. The stack boundary always gives
11006 : us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11007 : can compare directly. Otherwise we need to do an addition. */
11008 :
11009 260043 : limit = ix86_split_stack_guard ();
11010 :
11011 260043 : if (allocate >= SPLIT_STACK_AVAILABLE
11012 235596 : || flag_force_indirect_call)
11013 : {
11014 24462 : scratch_regno = split_stack_prologue_scratch_regno ();
11015 24462 : if (scratch_regno == INVALID_REGNUM)
11016 0 : return;
11017 : }
11018 :
11019 260043 : if (allocate >= SPLIT_STACK_AVAILABLE)
11020 : {
11021 24447 : rtx offset;
11022 :
11023 : /* We need a scratch register to hold the stack pointer minus
11024 : the required frame size. Since this is the very start of the
11025 : function, the scratch register can be any caller-saved
11026 : register which is not used for parameters. */
11027 24447 : offset = GEN_INT (- allocate);
11028 :
11029 31336 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11030 24447 : if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11031 : {
11032 : /* We don't use gen_add in this case because it will
11033 : want to split to lea, but when not optimizing the insn
11034 : will not be split after this point. */
11035 31336 : emit_insn (gen_rtx_SET (scratch_reg,
11036 : gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11037 : offset)));
11038 : }
11039 : else
11040 : {
11041 0 : emit_move_insn (scratch_reg, offset);
11042 0 : emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
11043 : }
11044 : current = scratch_reg;
11045 : }
11046 : else
11047 235596 : current = stack_pointer_rtx;
11048 :
11049 260043 : ix86_expand_branch (GEU, current, limit, label);
11050 260043 : rtx_insn *jump_insn = get_last_insn ();
11051 260043 : JUMP_LABEL (jump_insn) = label;
11052 :
11053 : /* Mark the jump as very likely to be taken. */
11054 260043 : add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
11055 :
11056 260043 : if (split_stack_fn == NULL_RTX)
11057 : {
11058 5451 : split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11059 4347 : SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
11060 : }
11061 260043 : fn = split_stack_fn;
11062 :
11063 : /* Get more stack space. We pass in the desired stack space and the
11064 : size of the arguments to copy to the new stack. In 32-bit mode
11065 : we push the parameters; __morestack will return on a new stack
11066 : anyhow. In 64-bit mode we pass the parameters in r10 and
11067 : r11. */
11068 260043 : allocate_rtx = GEN_INT (allocate);
11069 260043 : args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
11070 260043 : call_fusage = NULL_RTX;
11071 260043 : rtx pop = NULL_RTX;
11072 260043 : if (TARGET_64BIT)
11073 : {
11074 162038 : rtx reg10, reg11;
11075 :
11076 162038 : reg10 = gen_rtx_REG (DImode, R10_REG);
11077 162038 : reg11 = gen_rtx_REG (DImode, R11_REG);
11078 :
11079 : /* If this function uses a static chain, it will be in %r10.
11080 : Preserve it across the call to __morestack. */
11081 162038 : if (DECL_STATIC_CHAIN (cfun->decl))
11082 : {
11083 7505 : rtx rax;
11084 :
11085 7505 : rax = gen_rtx_REG (word_mode, AX_REG);
11086 7505 : emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11087 7505 : use_reg (&call_fusage, rax);
11088 : }
11089 :
11090 162038 : if (flag_force_indirect_call
11091 162023 : || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11092 : {
11093 16 : HOST_WIDE_INT argval;
11094 :
11095 16 : if (split_stack_fn_large == NULL_RTX)
11096 : {
11097 7 : split_stack_fn_large
11098 7 : = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11099 7 : SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
11100 : }
11101 :
11102 16 : fn = split_stack_fn_large;
11103 :
11104 16 : if (ix86_cmodel == CM_LARGE_PIC)
11105 : {
11106 3 : rtx_code_label *label;
11107 3 : rtx x;
11108 :
11109 3 : gcc_assert (Pmode == DImode);
11110 :
11111 3 : label = gen_label_rtx ();
11112 3 : emit_label (label);
11113 3 : LABEL_PRESERVE_P (label) = 1;
11114 3 : emit_insn (gen_set_rip_rex64 (reg10, label));
11115 3 : emit_insn (gen_set_got_offset_rex64 (reg11, label));
11116 3 : emit_insn (gen_add2_insn (reg10, reg11));
11117 3 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
11118 3 : x = gen_rtx_CONST (Pmode, x);
11119 3 : emit_move_insn (reg11, x);
11120 3 : x = gen_rtx_PLUS (Pmode, reg10, reg11);
11121 3 : x = gen_const_mem (Pmode, x);
11122 3 : fn = copy_to_suggested_reg (x, reg11, Pmode);
11123 : }
11124 13 : else if (ix86_cmodel == CM_LARGE)
11125 1 : fn = copy_to_suggested_reg (fn, reg11, Pmode);
11126 :
11127 : /* When using the large model we need to load the address
11128 : into a register, and we've run out of registers. So we
11129 : switch to a different calling convention, and we call a
11130 : different function: __morestack_large. We pass the
11131 : argument size in the upper 32 bits of r10 and pass the
11132 : frame size in the lower 32 bits. */
11133 16 : gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
11134 16 : gcc_assert ((args_size & 0xffffffff) == args_size);
11135 :
11136 16 : argval = ((args_size << 16) << 16) + allocate;
11137 16 : emit_move_insn (reg10, GEN_INT (argval));
11138 16 : }
11139 : else
11140 : {
11141 162022 : emit_move_insn (reg10, allocate_rtx);
11142 162022 : emit_move_insn (reg11, GEN_INT (args_size));
11143 162022 : use_reg (&call_fusage, reg11);
11144 : }
11145 :
11146 162038 : use_reg (&call_fusage, reg10);
11147 : }
11148 : else
11149 : {
11150 98005 : if (flag_force_indirect_call && flag_pic)
11151 : {
11152 0 : rtx x;
11153 :
11154 0 : gcc_assert (Pmode == SImode);
11155 :
11156 0 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11157 :
11158 0 : emit_insn (gen_set_got (scratch_reg));
11159 0 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
11160 : UNSPEC_GOT);
11161 0 : x = gen_rtx_CONST (Pmode, x);
11162 0 : x = gen_rtx_PLUS (Pmode, scratch_reg, x);
11163 0 : x = gen_const_mem (Pmode, x);
11164 0 : fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
11165 : }
11166 :
11167 98005 : rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
11168 196010 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
11169 98005 : insn = emit_insn (gen_push (allocate_rtx));
11170 196010 : add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
11171 196010 : pop = GEN_INT (2 * UNITS_PER_WORD);
11172 : }
11173 :
11174 260043 : if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
11175 : {
11176 12 : scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
11177 :
11178 12 : if (GET_MODE (fn) != word_mode)
11179 0 : fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
11180 :
11181 12 : fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
11182 : }
11183 :
11184 260043 : call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11185 260043 : GEN_INT (UNITS_PER_WORD), constm1_rtx,
11186 : pop, false);
11187 260043 : add_function_usage_to (call_insn, call_fusage);
11188 260043 : if (!TARGET_64BIT)
11189 98005 : add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
11190 : /* Indicate that this function can't jump to non-local gotos. */
11191 260043 : make_reg_eh_region_note_nothrow_nononlocal (call_insn);
11192 :
11193 : /* In order to make call/return prediction work right, we now need
11194 : to execute a return instruction. See
11195 : libgcc/config/i386/morestack.S for the details on how this works.
11196 :
11197 : For flow purposes gcc must not see this as a return
11198 : instruction--we need control flow to continue at the subsequent
11199 : label. Therefore, we use an unspec. */
11200 260043 : gcc_assert (crtl->args.pops_args < 65536);
11201 260043 : rtx_insn *ret_insn
11202 260043 : = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11203 :
11204 260043 : if ((flag_cf_protection & CF_BRANCH))
11205 : {
11206 : /* Insert ENDBR since __morestack will jump back here via indirect
11207 : call. */
11208 21 : rtx cet_eb = gen_nop_endbr ();
11209 21 : emit_insn_after (cet_eb, ret_insn);
11210 : }
11211 :
11212 : /* If we are in 64-bit mode and this function uses a static chain,
11213 : we saved %r10 in %rax before calling _morestack. */
11214 260043 : if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11215 7505 : emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11216 : gen_rtx_REG (word_mode, AX_REG));
11217 :
11218 : /* If this function calls va_start, we need to store a pointer to
11219 : the arguments on the old stack, because they may not have been
11220 : all copied to the new stack. At this point the old stack can be
11221 : found at the frame pointer value used by __morestack, because
11222 : __morestack has set that up before calling back to us. Here we
11223 : store that pointer in a scratch register, and in
11224 : ix86_expand_prologue we store the scratch register in a stack
11225 : slot. */
11226 260043 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11227 : {
11228 12 : rtx frame_reg;
11229 12 : int words;
11230 :
11231 12 : scratch_regno = split_stack_prologue_scratch_regno ();
11232 16 : scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11233 16 : frame_reg = gen_rtx_REG (Pmode, BP_REG);
11234 :
11235 : /* 64-bit:
11236 : fp -> old fp value
11237 : return address within this function
11238 : return address of caller of this function
11239 : stack arguments
11240 : So we add three words to get to the stack arguments.
11241 :
11242 : 32-bit:
11243 : fp -> old fp value
11244 : return address within this function
11245 : first argument to __morestack
11246 : second argument to __morestack
11247 : return address of caller of this function
11248 : stack arguments
11249 : So we add five words to get to the stack arguments.
11250 : */
11251 12 : words = TARGET_64BIT ? 3 : 5;
11252 20 : emit_insn (gen_rtx_SET (scratch_reg,
11253 : plus_constant (Pmode, frame_reg,
11254 : words * UNITS_PER_WORD)));
11255 :
11256 12 : varargs_label = gen_label_rtx ();
11257 12 : emit_jump_insn (gen_jump (varargs_label));
11258 12 : JUMP_LABEL (get_last_insn ()) = varargs_label;
11259 :
11260 12 : emit_barrier ();
11261 : }
11262 :
11263 260043 : emit_label (label);
11264 260043 : LABEL_NUSES (label) = 1;
11265 :
11266 : /* If this function calls va_start, we now have to set the scratch
11267 : register for the case where we do not call __morestack. In this
11268 : case we need to set it based on the stack pointer. */
11269 260043 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11270 : {
11271 20 : emit_insn (gen_rtx_SET (scratch_reg,
11272 : plus_constant (Pmode, stack_pointer_rtx,
11273 : UNITS_PER_WORD)));
11274 :
11275 12 : emit_label (varargs_label);
11276 12 : LABEL_NUSES (varargs_label) = 1;
11277 : }
11278 : }
11279 :
11280 : /* We may have to tell the dataflow pass that the split stack prologue
11281 : is initializing a scratch register. */
11282 :
11283 : static void
11284 15865769 : ix86_live_on_entry (bitmap regs)
11285 : {
11286 15865769 : if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11287 : {
11288 124 : gcc_assert (flag_split_stack);
11289 124 : bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11290 : }
11291 15865769 : }
11292 :
11293 : /* Extract the parts of an RTL expression that is a valid memory address
11294 : for an instruction. Return false if the structure of the address is
11295 : grossly off. */
11296 :
11297 : bool
11298 4333665097 : ix86_decompose_address (rtx addr, struct ix86_address *out)
11299 : {
11300 4333665097 : rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11301 4333665097 : rtx base_reg, index_reg;
11302 4333665097 : HOST_WIDE_INT scale = 1;
11303 4333665097 : rtx scale_rtx = NULL_RTX;
11304 4333665097 : rtx tmp;
11305 4333665097 : addr_space_t seg = ADDR_SPACE_GENERIC;
11306 :
11307 : /* Allow zero-extended SImode addresses,
11308 : they will be emitted with addr32 prefix. */
11309 4333665097 : if (TARGET_64BIT && GET_MODE (addr) == DImode)
11310 : {
11311 2295024636 : if (GET_CODE (addr) == ZERO_EXTEND
11312 2183351 : && GET_MODE (XEXP (addr, 0)) == SImode)
11313 : {
11314 2090014 : addr = XEXP (addr, 0);
11315 2090014 : if (CONST_INT_P (addr))
11316 : return false;
11317 : }
11318 2292934622 : else if (GET_CODE (addr) == AND)
11319 : {
11320 2816511 : rtx mask = XEXP (addr, 1);
11321 2816511 : rtx shift_val;
11322 :
11323 2816511 : if (const_32bit_mask (mask, DImode)
11324 : /* For ASHIFT inside AND, combine will not generate
11325 : canonical zero-extend. Merge mask for AND and shift_count
11326 : to check if it is canonical zero-extend. */
11327 2816511 : || (CONST_INT_P (mask)
11328 1828862 : && GET_CODE (XEXP (addr, 0)) == ASHIFT
11329 141264 : && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11330 138167 : && ((UINTVAL (mask)
11331 138167 : | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11332 : == HOST_WIDE_INT_UC (0xffffffff))))
11333 : {
11334 81250 : addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11335 81250 : if (addr == NULL_RTX)
11336 : return false;
11337 :
11338 81250 : if (CONST_INT_P (addr))
11339 : return false;
11340 : }
11341 : }
11342 : }
11343 :
11344 : /* Allow SImode subregs of DImode addresses,
11345 : they will be emitted with addr32 prefix. */
11346 4333665097 : if (TARGET_64BIT && GET_MODE (addr) == SImode)
11347 : {
11348 17359304 : if (SUBREG_P (addr)
11349 215218 : && GET_MODE (SUBREG_REG (addr)) == DImode)
11350 : {
11351 188320 : addr = SUBREG_REG (addr);
11352 188320 : if (CONST_INT_P (addr))
11353 : return false;
11354 : }
11355 : }
11356 :
11357 4333665097 : if (REG_P (addr))
11358 : base = addr;
11359 : else if (SUBREG_P (addr))
11360 : {
11361 456928 : if (REG_P (SUBREG_REG (addr)))
11362 : base = addr;
11363 : else
11364 : return false;
11365 : }
11366 : else if (GET_CODE (addr) == PLUS)
11367 : {
11368 : rtx addends[4], op;
11369 : int n = 0, i;
11370 :
11371 : op = addr;
11372 3161910574 : do
11373 : {
11374 3161910574 : if (n >= 4)
11375 639859762 : return false;
11376 3161905276 : addends[n++] = XEXP (op, 1);
11377 3161905276 : op = XEXP (op, 0);
11378 : }
11379 3161905276 : while (GET_CODE (op) == PLUS);
11380 3098442454 : if (n >= 4)
11381 : return false;
11382 3098435718 : addends[n] = op;
11383 :
11384 8082154146 : for (i = n; i >= 0; --i)
11385 : {
11386 5623566156 : op = addends[i];
11387 5623566156 : switch (GET_CODE (op))
11388 : {
11389 61246393 : case MULT:
11390 61246393 : if (index)
11391 : return false;
11392 61207080 : index = XEXP (op, 0);
11393 61207080 : scale_rtx = XEXP (op, 1);
11394 61207080 : break;
11395 :
11396 12775410 : case ASHIFT:
11397 12775410 : if (index)
11398 : return false;
11399 12702225 : index = XEXP (op, 0);
11400 12702225 : tmp = XEXP (op, 1);
11401 12702225 : if (!CONST_INT_P (tmp))
11402 : return false;
11403 12687634 : scale = INTVAL (tmp);
11404 12687634 : if ((unsigned HOST_WIDE_INT) scale > 3)
11405 : return false;
11406 12276811 : scale = 1 << scale;
11407 12276811 : break;
11408 :
11409 1073373 : case ZERO_EXTEND:
11410 1073373 : op = XEXP (op, 0);
11411 1073373 : if (GET_CODE (op) != UNSPEC)
11412 : return false;
11413 : /* FALLTHRU */
11414 :
11415 697639 : case UNSPEC:
11416 697639 : if (XINT (op, 1) == UNSPEC_TP
11417 689326 : && TARGET_TLS_DIRECT_SEG_REFS
11418 689326 : && seg == ADDR_SPACE_GENERIC)
11419 689326 : seg = DEFAULT_TLS_SEG_REG;
11420 : else
11421 : return false;
11422 : break;
11423 :
11424 514350 : case SUBREG:
11425 514350 : if (!REG_P (SUBREG_REG (op)))
11426 : return false;
11427 : /* FALLTHRU */
11428 :
11429 2528301838 : case REG:
11430 2528301838 : if (!base)
11431 : base = op;
11432 81377845 : else if (!index)
11433 : index = op;
11434 : else
11435 : return false;
11436 : break;
11437 :
11438 2382065820 : case CONST:
11439 2382065820 : case CONST_INT:
11440 2382065820 : case SYMBOL_REF:
11441 2382065820 : case LABEL_REF:
11442 2382065820 : if (disp)
11443 : return false;
11444 : disp = op;
11445 : break;
11446 :
11447 : default:
11448 : return false;
11449 : }
11450 : }
11451 : }
11452 : else if (GET_CODE (addr) == MULT)
11453 : {
11454 3712333 : index = XEXP (addr, 0); /* index*scale */
11455 3712333 : scale_rtx = XEXP (addr, 1);
11456 : }
11457 : else if (GET_CODE (addr) == ASHIFT)
11458 : {
11459 : /* We're called for lea too, which implements ashift on occasion. */
11460 3242888 : index = XEXP (addr, 0);
11461 3242888 : tmp = XEXP (addr, 1);
11462 3242888 : if (!CONST_INT_P (tmp))
11463 : return false;
11464 2856520 : scale = INTVAL (tmp);
11465 2856520 : if ((unsigned HOST_WIDE_INT) scale > 3)
11466 : return false;
11467 2118677 : scale = 1 << scale;
11468 : }
11469 : else
11470 : disp = addr; /* displacement */
11471 :
11472 2464419000 : if (index)
11473 : {
11474 151041430 : if (REG_P (index))
11475 : ;
11476 4004081 : else if (SUBREG_P (index)
11477 258770 : && REG_P (SUBREG_REG (index)))
11478 : ;
11479 : else
11480 : return false;
11481 : }
11482 :
11483 : /* Extract the integral value of scale. */
11484 3688869700 : if (scale_rtx)
11485 : {
11486 56425320 : if (!CONST_INT_P (scale_rtx))
11487 : return false;
11488 55811753 : scale = INTVAL (scale_rtx);
11489 : }
11490 :
11491 3688256133 : base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11492 3688256133 : index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11493 :
11494 : /* Avoid useless 0 displacement. */
11495 3688256133 : if (disp == const0_rtx && (base || index))
11496 3688256133 : disp = NULL_RTX;
11497 :
11498 : /* Allow arg pointer and stack pointer as index if there is not scaling. */
11499 2686626244 : if (base_reg && index_reg && scale == 1
11500 3768150420 : && (REGNO (index_reg) == ARG_POINTER_REGNUM
11501 : || REGNO (index_reg) == FRAME_POINTER_REGNUM
11502 : || REGNO (index_reg) == SP_REG))
11503 : {
11504 : std::swap (base, index);
11505 : std::swap (base_reg, index_reg);
11506 : }
11507 :
11508 : /* Special case: rewrite index*1+disp into base+disp. */
11509 3688256133 : if (!base && index && scale == 1)
11510 52 : base = index, base_reg = index_reg, index = index_reg = NULL_RTX;
11511 :
11512 : /* Special case: %ebp cannot be encoded as a base without a displacement.
11513 : Similarly %r13. */
11514 322887249 : if (!disp && base_reg
11515 4006820907 : && (REGNO (base_reg) == ARG_POINTER_REGNUM
11516 : || REGNO (base_reg) == FRAME_POINTER_REGNUM
11517 : || REGNO (base_reg) == BP_REG
11518 : || REGNO (base_reg) == R13_REG))
11519 : disp = const0_rtx;
11520 :
11521 : /* Special case: on K6, [%esi] makes the instruction vector decoded.
11522 : Avoid this by transforming to [%esi+0].
11523 : Reload calls address legitimization without cfun defined, so we need
11524 : to test cfun for being non-NULL. */
11525 0 : if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11526 0 : && base_reg && !index_reg && !disp
11527 3688256133 : && REGNO (base_reg) == SI_REG)
11528 0 : disp = const0_rtx;
11529 :
11530 : /* Special case: encode reg+reg instead of reg*2. */
11531 3688256133 : if (!base && index && scale == 2)
11532 1001629837 : base = index, base_reg = index_reg, scale = 1;
11533 :
11534 : /* Special case: scaling cannot be encoded without base or displacement. */
11535 1001629837 : if (!base && !disp && index && scale != 1)
11536 3444671 : disp = const0_rtx;
11537 :
11538 3688256133 : out->base = base;
11539 3688256133 : out->index = index;
11540 3688256133 : out->disp = disp;
11541 3688256133 : out->scale = scale;
11542 3688256133 : out->seg = seg;
11543 :
11544 3688256133 : return true;
11545 : }
11546 :
11547 : /* Return cost of the memory address x.
11548 : For i386, it is better to use a complex address than let gcc copy
11549 : the address into a reg and make a new pseudo. But not if the address
11550 : requires to two regs - that would mean more pseudos with longer
11551 : lifetimes. */
11552 : static int
11553 10828889 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11554 : {
11555 10828889 : struct ix86_address parts;
11556 10828889 : int cost = 1;
11557 10828889 : int ok = ix86_decompose_address (x, &parts);
11558 :
11559 10828889 : gcc_assert (ok);
11560 :
11561 10828889 : if (parts.base && SUBREG_P (parts.base))
11562 492 : parts.base = SUBREG_REG (parts.base);
11563 10828889 : if (parts.index && SUBREG_P (parts.index))
11564 20 : parts.index = SUBREG_REG (parts.index);
11565 :
11566 : /* Attempt to minimize number of registers in the address by increasing
11567 : address cost for each used register. We don't increase address cost
11568 : for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11569 : is not invariant itself it most likely means that base or index is not
11570 : invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11571 : which is not profitable for x86. */
11572 10828889 : if (parts.base
11573 9417416 : && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11574 19946779 : && (current_pass->type == GIMPLE_PASS
11575 2730054 : || !pic_offset_table_rtx
11576 126469 : || !REG_P (parts.base)
11577 126469 : || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11578 : cost++;
11579 :
11580 10828889 : if (parts.index
11581 5188702 : && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11582 16003572 : && (current_pass->type == GIMPLE_PASS
11583 649168 : || !pic_offset_table_rtx
11584 54782 : || !REG_P (parts.index)
11585 54782 : || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11586 5173408 : cost++;
11587 :
11588 : /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11589 : since it's predecode logic can't detect the length of instructions
11590 : and it degenerates to vector decoded. Increase cost of such
11591 : addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11592 : to split such addresses or even refuse such addresses at all.
11593 :
11594 : Following addressing modes are affected:
11595 : [base+scale*index]
11596 : [scale*index+disp]
11597 : [base+index]
11598 :
11599 : The first and last case may be avoidable by explicitly coding the zero in
11600 : memory address, but I don't have AMD-K6 machine handy to check this
11601 : theory. */
11602 :
11603 10828889 : if (TARGET_CPU_P (K6)
11604 0 : && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11605 0 : || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11606 0 : || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11607 0 : cost += 10;
11608 :
11609 10828889 : return cost;
11610 : }
11611 :
11612 : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
11613 :
11614 : bool
11615 1184687 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
11616 : unsigned int align,
11617 : enum by_pieces_operation op,
11618 : bool speed_p)
11619 : {
11620 : /* Return true when we are currently expanding memcpy/memset epilogue
11621 : with move_by_pieces or store_by_pieces. */
11622 1184687 : if (cfun->machine->by_pieces_in_use)
11623 : return true;
11624 :
11625 1182577 : return default_use_by_pieces_infrastructure_p (size, align, op,
11626 1182577 : speed_p);
11627 : }
11628 :
11629 : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11630 : this is used for to form addresses to local data when -fPIC is in
11631 : use. */
11632 :
11633 : static bool
11634 0 : darwin_local_data_pic (rtx disp)
11635 : {
11636 0 : return (GET_CODE (disp) == UNSPEC
11637 0 : && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11638 : }
11639 :
11640 : /* True if the function symbol operand X should be loaded from GOT.
11641 : If CALL_P is true, X is a call operand.
11642 :
11643 : NB: -mno-direct-extern-access doesn't force load from GOT for
11644 : call.
11645 :
11646 : NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11647 : statements, since a PIC register could not be available at the
11648 : call site. */
11649 :
11650 : bool
11651 1852092944 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
11652 : {
11653 96329732 : return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11654 : && !TARGET_PECOFF && !TARGET_MACHO
11655 1849230178 : && (!flag_pic || this_is_asm_operands)
11656 1828902817 : && ix86_cmodel != CM_LARGE
11657 1828896818 : && ix86_cmodel != CM_LARGE_PIC
11658 1828896817 : && SYMBOL_REF_P (x)
11659 1828896815 : && ((!call_p
11660 1823450711 : && (!ix86_direct_extern_access
11661 1823448441 : || (SYMBOL_REF_DECL (x)
11662 1643487062 : && lookup_attribute ("nodirect_extern_access",
11663 1643487062 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11664 1828894091 : || (SYMBOL_REF_FUNCTION_P (x)
11665 690451943 : && (!flag_plt
11666 690447532 : || (SYMBOL_REF_DECL (x)
11667 690447532 : && lookup_attribute ("noplt",
11668 690447532 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11669 1852100478 : && !SYMBOL_REF_LOCAL_P (x));
11670 : }
11671 :
11672 : /* Determine if a given RTX is a valid constant. We already know this
11673 : satisfies CONSTANT_P. */
11674 :
11675 : static bool
11676 1557492929 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
11677 : {
11678 1557492929 : switch (GET_CODE (x))
11679 : {
11680 138575000 : case CONST:
11681 138575000 : x = XEXP (x, 0);
11682 :
11683 138575000 : if (GET_CODE (x) == PLUS)
11684 : {
11685 138458561 : if (!CONST_INT_P (XEXP (x, 1)))
11686 : return false;
11687 138458561 : x = XEXP (x, 0);
11688 : }
11689 :
11690 138575000 : if (TARGET_MACHO && darwin_local_data_pic (x))
11691 : return true;
11692 :
11693 : /* Only some unspecs are valid as "constants". */
11694 138575000 : if (GET_CODE (x) == UNSPEC)
11695 493532 : switch (XINT (x, 1))
11696 : {
11697 21063 : case UNSPEC_GOT:
11698 21063 : case UNSPEC_GOTOFF:
11699 21063 : case UNSPEC_PLTOFF:
11700 21063 : return TARGET_64BIT;
11701 472106 : case UNSPEC_TPOFF:
11702 472106 : case UNSPEC_NTPOFF:
11703 472106 : x = XVECEXP (x, 0, 0);
11704 472106 : return (SYMBOL_REF_P (x)
11705 472106 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11706 275 : case UNSPEC_DTPOFF:
11707 275 : x = XVECEXP (x, 0, 0);
11708 275 : return (SYMBOL_REF_P (x)
11709 275 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11710 0 : case UNSPEC_SECREL32:
11711 0 : x = XVECEXP (x, 0, 0);
11712 0 : return SYMBOL_REF_P (x);
11713 : default:
11714 : return false;
11715 : }
11716 :
11717 : /* We must have drilled down to a symbol. */
11718 138081468 : if (LABEL_REF_P (x))
11719 : return true;
11720 138076194 : if (!SYMBOL_REF_P (x))
11721 : return false;
11722 : /* FALLTHRU */
11723 :
11724 928151861 : case SYMBOL_REF:
11725 : /* TLS symbols are never valid. */
11726 928151861 : if (SYMBOL_REF_TLS_MODEL (x))
11727 : return false;
11728 :
11729 : /* DLLIMPORT symbols are never valid. */
11730 928048072 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11731 : && SYMBOL_REF_DLLIMPORT_P (x))
11732 : return false;
11733 :
11734 : #if TARGET_MACHO
11735 : /* mdynamic-no-pic */
11736 : if (MACHO_DYNAMIC_NO_PIC_P)
11737 : return machopic_symbol_defined_p (x);
11738 : #endif
11739 :
11740 : /* External function address should be loaded
11741 : via the GOT slot to avoid PLT. */
11742 928048072 : if (ix86_force_load_from_GOT_p (x))
11743 : return false;
11744 :
11745 : break;
11746 :
11747 607870811 : CASE_CONST_SCALAR_INT:
11748 607870811 : if (ix86_endbr_immediate_operand (x, VOIDmode))
11749 : return false;
11750 :
11751 607870610 : switch (mode)
11752 : {
11753 1456402 : case E_TImode:
11754 1456402 : if (TARGET_64BIT)
11755 : return true;
11756 : /* FALLTHRU */
11757 25953 : case E_OImode:
11758 25953 : case E_XImode:
11759 25953 : if (!standard_sse_constant_p (x, mode)
11760 43110 : && GET_MODE_SIZE (TARGET_AVX512F
11761 : ? XImode
11762 : : (TARGET_AVX
11763 : ? OImode
11764 : : (TARGET_SSE2
11765 17157 : ? TImode : DImode))) < GET_MODE_SIZE (mode))
11766 : return false;
11767 : default:
11768 : break;
11769 : }
11770 : break;
11771 :
11772 8626544 : case CONST_VECTOR:
11773 8626544 : if (!standard_sse_constant_p (x, mode))
11774 : return false;
11775 : break;
11776 :
11777 7690782 : case CONST_DOUBLE:
11778 7690782 : if (mode == E_BFmode)
11779 : return false;
11780 :
11781 : default:
11782 : break;
11783 : }
11784 :
11785 : /* Otherwise we handle everything else in the move patterns. */
11786 : return true;
11787 : }
11788 :
11789 : /* Determine if it's legal to put X into the constant pool. This
11790 : is not possible for the address of thread-local symbols, which
11791 : is checked above. */
11792 :
11793 : static bool
11794 61749899 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11795 : {
11796 : /* We can put any immediate constant in memory. */
11797 61749899 : switch (GET_CODE (x))
11798 : {
11799 : CASE_CONST_ANY:
11800 : return false;
11801 :
11802 1796472 : default:
11803 1796472 : break;
11804 : }
11805 :
11806 1796472 : return !ix86_legitimate_constant_p (mode, x);
11807 : }
11808 :
11809 : /* Return a unique alias set for the GOT. */
11810 :
11811 : alias_set_type
11812 188716 : ix86_GOT_alias_set (void)
11813 : {
11814 188716 : static alias_set_type set = -1;
11815 188716 : if (set == -1)
11816 2952 : set = new_alias_set ();
11817 188716 : return set;
11818 : }
11819 :
11820 : /* Nonzero if the constant value X is a legitimate general operand
11821 : when generating PIC code. It is given that flag_pic is on and
11822 : that X satisfies CONSTANT_P. */
11823 :
11824 : bool
11825 126267004 : legitimate_pic_operand_p (rtx x)
11826 : {
11827 126267004 : rtx inner;
11828 :
11829 126267004 : switch (GET_CODE (x))
11830 : {
11831 2504403 : case CONST:
11832 2504403 : inner = XEXP (x, 0);
11833 2504403 : if (GET_CODE (inner) == PLUS
11834 357822 : && CONST_INT_P (XEXP (inner, 1)))
11835 357822 : inner = XEXP (inner, 0);
11836 :
11837 : /* Only some unspecs are valid as "constants". */
11838 2504403 : if (GET_CODE (inner) == UNSPEC)
11839 2254540 : switch (XINT (inner, 1))
11840 : {
11841 2193858 : case UNSPEC_GOT:
11842 2193858 : case UNSPEC_GOTOFF:
11843 2193858 : case UNSPEC_PLTOFF:
11844 2193858 : return TARGET_64BIT;
11845 0 : case UNSPEC_TPOFF:
11846 0 : x = XVECEXP (inner, 0, 0);
11847 0 : return (SYMBOL_REF_P (x)
11848 0 : && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11849 0 : case UNSPEC_SECREL32:
11850 0 : x = XVECEXP (inner, 0, 0);
11851 0 : return SYMBOL_REF_P (x);
11852 0 : case UNSPEC_MACHOPIC_OFFSET:
11853 0 : return legitimate_pic_address_disp_p (x);
11854 : default:
11855 : return false;
11856 : }
11857 : /* FALLTHRU */
11858 :
11859 6996703 : case SYMBOL_REF:
11860 6996703 : case LABEL_REF:
11861 6996703 : return legitimate_pic_address_disp_p (x);
11862 :
11863 : default:
11864 : return true;
11865 : }
11866 : }
11867 :
11868 : /* Determine if a given CONST RTX is a valid memory displacement
11869 : in PIC mode. */
11870 :
11871 : bool
11872 65325268 : legitimate_pic_address_disp_p (rtx disp)
11873 : {
11874 65325268 : bool saw_plus;
11875 :
11876 : /* In 64bit mode we can allow direct addresses of symbols and labels
11877 : when they are not dynamic symbols. */
11878 65325268 : if (TARGET_64BIT)
11879 : {
11880 40155966 : rtx op0 = disp, op1;
11881 :
11882 40155966 : switch (GET_CODE (disp))
11883 : {
11884 : case LABEL_REF:
11885 : return true;
11886 :
11887 10954876 : case CONST:
11888 10954876 : if (GET_CODE (XEXP (disp, 0)) != PLUS)
11889 : break;
11890 1173433 : op0 = XEXP (XEXP (disp, 0), 0);
11891 1173433 : op1 = XEXP (XEXP (disp, 0), 1);
11892 1173433 : if (!CONST_INT_P (op1))
11893 : break;
11894 1173433 : if (GET_CODE (op0) == UNSPEC
11895 296 : && (XINT (op0, 1) == UNSPEC_DTPOFF
11896 296 : || XINT (op0, 1) == UNSPEC_NTPOFF)
11897 1173729 : && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11898 : return true;
11899 1173137 : if (INTVAL (op1) >= 16*1024*1024
11900 1173137 : || INTVAL (op1) < -16*1024*1024)
11901 : break;
11902 1173049 : if (LABEL_REF_P (op0))
11903 : return true;
11904 1173049 : if (GET_CODE (op0) == CONST
11905 0 : && GET_CODE (XEXP (op0, 0)) == UNSPEC
11906 0 : && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11907 : return true;
11908 1173049 : if (GET_CODE (op0) == UNSPEC
11909 0 : && XINT (op0, 1) == UNSPEC_PCREL)
11910 : return true;
11911 1173049 : if (!SYMBOL_REF_P (op0))
11912 : break;
11913 : /* FALLTHRU */
11914 :
11915 30151560 : case SYMBOL_REF:
11916 : /* TLS references should always be enclosed in UNSPEC.
11917 : The dllimported symbol needs always to be resolved. */
11918 30151560 : if (SYMBOL_REF_TLS_MODEL (op0)
11919 : || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11920 : return false;
11921 :
11922 29995995 : if (TARGET_PECOFF)
11923 : {
11924 : #if TARGET_PECOFF
11925 : if (is_imported_p (op0))
11926 : return true;
11927 : #endif
11928 :
11929 : if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11930 : break;
11931 :
11932 : /* Non-external-weak function symbols need to be resolved only
11933 : for the large model. Non-external symbols don't need to be
11934 : resolved for large and medium models. For the small model,
11935 : we don't need to resolve anything here. */
11936 : if ((ix86_cmodel != CM_LARGE_PIC
11937 : && SYMBOL_REF_FUNCTION_P (op0)
11938 : && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11939 : || !SYMBOL_REF_EXTERNAL_P (op0)
11940 : || ix86_cmodel == CM_SMALL_PIC)
11941 : return true;
11942 : }
11943 29995995 : else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11944 29995991 : && (SYMBOL_REF_LOCAL_P (op0)
11945 18293818 : || ((ix86_direct_extern_access
11946 36416326 : && !(SYMBOL_REF_DECL (op0)
11947 18122671 : && lookup_attribute ("nodirect_extern_access",
11948 18122671 : DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11949 : && HAVE_LD_PIE_COPYRELOC
11950 18293492 : && flag_pie
11951 34047 : && !SYMBOL_REF_WEAK (op0)
11952 33659 : && !SYMBOL_REF_FUNCTION_P (op0)))
11953 41701901 : && ix86_cmodel != CM_LARGE_PIC)
11954 : return true;
11955 : break;
11956 :
11957 : default:
11958 : break;
11959 : }
11960 : }
11961 53244802 : if (GET_CODE (disp) != CONST)
11962 : return false;
11963 14976614 : disp = XEXP (disp, 0);
11964 :
11965 14976614 : if (TARGET_64BIT)
11966 : {
11967 : /* We are unsafe to allow PLUS expressions. This limit allowed distance
11968 : of GOT tables. We should not need these anyway. */
11969 9833965 : if (GET_CODE (disp) != UNSPEC
11970 9781443 : || (XINT (disp, 1) != UNSPEC_GOTPCREL
11971 9781443 : && XINT (disp, 1) != UNSPEC_GOTOFF
11972 : && XINT (disp, 1) != UNSPEC_PCREL
11973 : && XINT (disp, 1) != UNSPEC_PLTOFF))
11974 : return false;
11975 :
11976 9781443 : if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11977 9781443 : && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
11978 : return false;
11979 : return true;
11980 : }
11981 :
11982 5142649 : saw_plus = false;
11983 5142649 : if (GET_CODE (disp) == PLUS)
11984 : {
11985 589189 : if (!CONST_INT_P (XEXP (disp, 1)))
11986 : return false;
11987 589189 : disp = XEXP (disp, 0);
11988 589189 : saw_plus = true;
11989 : }
11990 :
11991 5142649 : if (TARGET_MACHO && darwin_local_data_pic (disp))
11992 : return true;
11993 :
11994 5142649 : if (GET_CODE (disp) != UNSPEC)
11995 : return false;
11996 :
11997 4977325 : switch (XINT (disp, 1))
11998 : {
11999 2266983 : case UNSPEC_GOT:
12000 2266983 : if (saw_plus)
12001 : return false;
12002 : /* We need to check for both symbols and labels because VxWorks loads
12003 : text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12004 : details. */
12005 2266982 : return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
12006 2266982 : || LABEL_REF_P (XVECEXP (disp, 0, 0)));
12007 2710342 : case UNSPEC_GOTOFF:
12008 : /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12009 : While ABI specify also 32bit relocation but we don't produce it in
12010 : small PIC model at all. */
12011 2710342 : if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
12012 2710342 : || LABEL_REF_P (XVECEXP (disp, 0, 0)))
12013 : && !TARGET_64BIT)
12014 5420684 : return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12015 : return false;
12016 0 : case UNSPEC_GOTTPOFF:
12017 0 : case UNSPEC_GOTNTPOFF:
12018 0 : case UNSPEC_INDNTPOFF:
12019 0 : if (saw_plus)
12020 : return false;
12021 0 : disp = XVECEXP (disp, 0, 0);
12022 0 : return (SYMBOL_REF_P (disp)
12023 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12024 0 : case UNSPEC_NTPOFF:
12025 0 : disp = XVECEXP (disp, 0, 0);
12026 0 : return (SYMBOL_REF_P (disp)
12027 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12028 0 : case UNSPEC_DTPOFF:
12029 0 : disp = XVECEXP (disp, 0, 0);
12030 0 : return (SYMBOL_REF_P (disp)
12031 0 : && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12032 0 : case UNSPEC_SECREL32:
12033 0 : disp = XVECEXP (disp, 0, 0);
12034 0 : return SYMBOL_REF_P (disp);
12035 : }
12036 :
12037 : return false;
12038 : }
12039 :
12040 : /* Determine if op is suitable RTX for an address register.
12041 : Return naked register if a register or a register subreg is
12042 : found, otherwise return NULL_RTX. */
12043 :
12044 : static rtx
12045 1376646848 : ix86_validate_address_register (rtx op)
12046 : {
12047 1376646848 : machine_mode mode = GET_MODE (op);
12048 :
12049 : /* Only SImode or DImode registers can form the address. */
12050 1376646848 : if (mode != SImode && mode != DImode)
12051 : return NULL_RTX;
12052 :
12053 1376639925 : if (REG_P (op))
12054 : return op;
12055 697556 : else if (SUBREG_P (op))
12056 : {
12057 697556 : rtx reg = SUBREG_REG (op);
12058 :
12059 697556 : if (!REG_P (reg))
12060 : return NULL_RTX;
12061 :
12062 697556 : mode = GET_MODE (reg);
12063 :
12064 : /* Don't allow SUBREGs that span more than a word. It can
12065 : lead to spill failures when the register is one word out
12066 : of a two word structure. */
12067 1440488 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12068 : return NULL_RTX;
12069 :
12070 : /* Allow only SUBREGs of non-eliminable hard registers. */
12071 237153 : if (register_no_elim_operand (reg, mode))
12072 : return reg;
12073 : }
12074 :
12075 : /* Op is not a register. */
12076 : return NULL_RTX;
12077 : }
12078 :
12079 : /* Determine which memory address register set insn can use. */
12080 :
12081 : static enum attr_addr
12082 255317350 : ix86_memory_address_reg_class (rtx_insn* insn)
12083 : {
12084 : /* LRA can do some initialization with NULL insn,
12085 : return maximum register class in this case. */
12086 255317350 : enum attr_addr addr_rclass = ADDR_GPR32;
12087 :
12088 255317350 : if (!insn)
12089 : return addr_rclass;
12090 :
12091 72683759 : if (asm_noperands (PATTERN (insn)) >= 0
12092 72683759 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)
12093 75252 : return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
12094 :
12095 : /* Return maximum register class for unrecognized instructions. */
12096 72646133 : if (INSN_CODE (insn) < 0)
12097 : return addr_rclass;
12098 :
12099 : /* Try to recognize the insn before calling get_attr_addr.
12100 : Save current recog_data and current alternative. */
12101 72646133 : struct recog_data_d saved_recog_data = recog_data;
12102 72646133 : int saved_alternative = which_alternative;
12103 :
12104 : /* Update recog_data for processing of alternatives. */
12105 72646133 : extract_insn_cached (insn);
12106 :
12107 : /* If current alternative is not set, loop throught enabled
12108 : alternatives and get the most limited register class. */
12109 72646133 : if (saved_alternative == -1)
12110 : {
12111 72646133 : alternative_mask enabled = get_enabled_alternatives (insn);
12112 :
12113 1253086297 : for (int i = 0; i < recog_data.n_alternatives; i++)
12114 : {
12115 1180440164 : if (!TEST_BIT (enabled, i))
12116 349000094 : continue;
12117 :
12118 831440070 : which_alternative = i;
12119 831440070 : addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
12120 : }
12121 : }
12122 : else
12123 : {
12124 0 : which_alternative = saved_alternative;
12125 0 : addr_rclass = get_attr_addr (insn);
12126 : }
12127 :
12128 72646133 : recog_data = saved_recog_data;
12129 72646133 : which_alternative = saved_alternative;
12130 :
12131 72646133 : return addr_rclass;
12132 : }
12133 :
12134 : /* Return memory address register class insn can use. */
12135 :
12136 : enum reg_class
12137 214635910 : ix86_insn_base_reg_class (rtx_insn* insn)
12138 : {
12139 214635910 : switch (ix86_memory_address_reg_class (insn))
12140 : {
12141 : case ADDR_GPR8:
12142 : return LEGACY_GENERAL_REGS;
12143 : case ADDR_GPR16:
12144 : return GENERAL_GPR16;
12145 : case ADDR_GPR32:
12146 : break;
12147 0 : default:
12148 0 : gcc_unreachable ();
12149 : }
12150 :
12151 : return BASE_REG_CLASS;
12152 : }
12153 :
12154 : bool
12155 1247206 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
12156 : {
12157 1247206 : switch (ix86_memory_address_reg_class (insn))
12158 : {
12159 0 : case ADDR_GPR8:
12160 0 : return LEGACY_INT_REGNO_P (regno);
12161 0 : case ADDR_GPR16:
12162 0 : return GENERAL_GPR16_REGNO_P (regno);
12163 1247206 : case ADDR_GPR32:
12164 1247206 : break;
12165 0 : default:
12166 0 : gcc_unreachable ();
12167 : }
12168 :
12169 1247206 : return GENERAL_REGNO_P (regno);
12170 : }
12171 :
12172 : enum reg_class
12173 39434234 : ix86_insn_index_reg_class (rtx_insn* insn)
12174 : {
12175 39434234 : switch (ix86_memory_address_reg_class (insn))
12176 : {
12177 : case ADDR_GPR8:
12178 : return LEGACY_INDEX_REGS;
12179 : case ADDR_GPR16:
12180 : return INDEX_GPR16;
12181 : case ADDR_GPR32:
12182 : break;
12183 0 : default:
12184 0 : gcc_unreachable ();
12185 : }
12186 :
12187 : return INDEX_REG_CLASS;
12188 : }
12189 :
12190 : /* Recognizes RTL expressions that are valid memory addresses for an
12191 : instruction. The MODE argument is the machine mode for the MEM
12192 : expression that wants to use this address.
12193 :
12194 : It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12195 : convert common non-canonical forms to canonical form so that they will
12196 : be recognized. */
12197 :
12198 : static bool
12199 2253475670 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
12200 : code_helper = ERROR_MARK)
12201 : {
12202 2253475670 : struct ix86_address parts;
12203 2253475670 : rtx base, index, disp;
12204 2253475670 : HOST_WIDE_INT scale;
12205 2253475670 : addr_space_t seg;
12206 :
12207 2253475670 : if (ix86_decompose_address (addr, &parts) == 0)
12208 : /* Decomposition failed. */
12209 : return false;
12210 :
12211 2241800525 : base = parts.base;
12212 2241800525 : index = parts.index;
12213 2241800525 : disp = parts.disp;
12214 2241800525 : scale = parts.scale;
12215 2241800525 : seg = parts.seg;
12216 :
12217 : /* Validate base register. */
12218 2241800525 : if (base)
12219 : {
12220 1289990810 : rtx reg = ix86_validate_address_register (base);
12221 :
12222 1289990810 : if (reg == NULL_RTX)
12223 : return false;
12224 :
12225 1289561455 : unsigned int regno = REGNO (reg);
12226 1289561455 : if ((strict && !REGNO_OK_FOR_BASE_P (regno))
12227 1285143630 : || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
12228 : /* Base is not valid. */
12229 : return false;
12230 : }
12231 :
12232 : /* Validate index register. */
12233 2240039723 : if (index)
12234 : {
12235 86656038 : rtx reg = ix86_validate_address_register (index);
12236 :
12237 86656038 : if (reg == NULL_RTX)
12238 : return false;
12239 :
12240 86617892 : unsigned int regno = REGNO (reg);
12241 86617892 : if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
12242 86610058 : || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
12243 : /* Index is not valid. */
12244 : return false;
12245 : }
12246 :
12247 : /* Index and base should have the same mode. */
12248 2239999597 : if (base && index
12249 77059703 : && GET_MODE (base) != GET_MODE (index))
12250 : return false;
12251 :
12252 : /* Address override works only on the (%reg) part of %fs:(%reg). */
12253 2239700026 : if (seg != ADDR_SPACE_GENERIC
12254 2239700026 : && ((base && GET_MODE (base) != word_mode)
12255 339653 : || (index && GET_MODE (index) != word_mode)))
12256 : return false;
12257 :
12258 : /* Validate scale factor. */
12259 2239699997 : if (scale != 1)
12260 : {
12261 39956016 : if (!index)
12262 : /* Scale without index. */
12263 : return false;
12264 :
12265 39956016 : if (scale != 2 && scale != 4 && scale != 8)
12266 : /* Scale is not a valid multiplier. */
12267 : return false;
12268 : }
12269 :
12270 : /* Validate displacement. */
12271 2236533674 : if (disp)
12272 : {
12273 2010337357 : if (ix86_endbr_immediate_operand (disp, VOIDmode))
12274 : return false;
12275 :
12276 2010337314 : if (GET_CODE (disp) == CONST
12277 149028150 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
12278 15408826 : && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12279 15408826 : switch (XINT (XEXP (disp, 0), 1))
12280 : {
12281 : /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
12282 : when used. While ABI specify also 32bit relocations, we
12283 : don't produce them at all and use IP relative instead.
12284 : Allow GOT in 32bit mode for both PIC and non-PIC if symbol
12285 : should be loaded via GOT. */
12286 2267041 : case UNSPEC_GOT:
12287 2267041 : if (!TARGET_64BIT
12288 2267041 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12289 0 : goto is_legitimate_pic;
12290 : /* FALLTHRU */
12291 4553663 : case UNSPEC_GOTOFF:
12292 4553663 : gcc_assert (flag_pic);
12293 4553663 : if (!TARGET_64BIT)
12294 4553460 : goto is_legitimate_pic;
12295 :
12296 : /* 64bit address unspec. */
12297 : return false;
12298 :
12299 9781415 : case UNSPEC_GOTPCREL:
12300 9781415 : if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12301 2534 : goto is_legitimate_pic;
12302 : /* FALLTHRU */
12303 9778881 : case UNSPEC_PCREL:
12304 9778881 : gcc_assert (flag_pic);
12305 9778881 : goto is_legitimate_pic;
12306 :
12307 : case UNSPEC_GOTTPOFF:
12308 : case UNSPEC_GOTNTPOFF:
12309 : case UNSPEC_INDNTPOFF:
12310 : case UNSPEC_NTPOFF:
12311 : case UNSPEC_DTPOFF:
12312 : case UNSPEC_SECREL32:
12313 : break;
12314 :
12315 : default:
12316 : /* Invalid address unspec. */
12317 : return false;
12318 : }
12319 :
12320 1268797020 : else if (SYMBOLIC_CONST (disp)
12321 2128547812 : && (flag_pic
12322 : #if TARGET_MACHO
12323 : || (MACHOPIC_INDIRECT
12324 : && !machopic_operand_p (disp))
12325 : #endif
12326 : ))
12327 : {
12328 :
12329 58167457 : is_legitimate_pic:
12330 58167457 : if (TARGET_64BIT && (index || base))
12331 : {
12332 : /* foo@dtpoff(%rX) is ok. */
12333 36996 : if (GET_CODE (disp) != CONST
12334 7002 : || GET_CODE (XEXP (disp, 0)) != PLUS
12335 7002 : || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12336 4637 : || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12337 4637 : || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12338 4637 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
12339 6 : && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12340 : /* Non-constant pic memory reference. */
12341 : return false;
12342 : }
12343 58130461 : else if ((!TARGET_MACHO || flag_pic)
12344 58130461 : && ! legitimate_pic_address_disp_p (disp))
12345 : /* Displacement is an invalid pic construct. */
12346 : return false;
12347 : #if TARGET_MACHO
12348 : else if (MACHO_DYNAMIC_NO_PIC_P
12349 : && !ix86_legitimate_constant_p (Pmode, disp))
12350 : /* displacment must be referenced via non_lazy_pointer */
12351 : return false;
12352 : #endif
12353 :
12354 : /* This code used to verify that a symbolic pic displacement
12355 : includes the pic_offset_table_rtx register.
12356 :
12357 : While this is good idea, unfortunately these constructs may
12358 : be created by "adds using lea" optimization for incorrect
12359 : code like:
12360 :
12361 : int a;
12362 : int foo(int i)
12363 : {
12364 : return *(&a+i);
12365 : }
12366 :
12367 : This code is nonsensical, but results in addressing
12368 : GOT table with pic_offset_table_rtx base. We can't
12369 : just refuse it easily, since it gets matched by
12370 : "addsi3" pattern, that later gets split to lea in the
12371 : case output register differs from input. While this
12372 : can be handled by separate addsi pattern for this case
12373 : that never results in lea, this seems to be easier and
12374 : correct fix for crash to disable this test. */
12375 : }
12376 1951095906 : else if (!LABEL_REF_P (disp)
12377 1950941830 : && !CONST_INT_P (disp)
12378 873296862 : && (GET_CODE (disp) != CONST
12379 135059641 : || !ix86_legitimate_constant_p (Pmode, disp))
12380 2692296948 : && (!SYMBOL_REF_P (disp)
12381 748785755 : || !ix86_legitimate_constant_p (Pmode, disp)))
12382 : /* Displacement is not constant. */
12383 57611089 : return false;
12384 1893484817 : else if (TARGET_64BIT
12385 1893484817 : && !x86_64_immediate_operand (disp, VOIDmode))
12386 : /* Displacement is out of range. */
12387 : return false;
12388 : /* In x32 mode, constant addresses are sign extended to 64bit, so
12389 : we have to prevent addresses from 0x80000000 to 0xffffffff. */
12390 45259 : else if (TARGET_X32 && !(index || base)
12391 17351 : && CONST_INT_P (disp)
12392 1892974236 : && val_signbit_known_set_p (SImode, INTVAL (disp)))
12393 : return false;
12394 : }
12395 :
12396 : /* Everything looks valid. */
12397 : return true;
12398 : }
12399 :
12400 : /* Determine if a given RTX is a valid constant address. */
12401 :
12402 : bool
12403 2793145531 : constant_address_p (rtx x)
12404 : {
12405 2873518736 : return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12406 : }
12407 :
12408 :
12409 : /* Return a legitimate reference for ORIG (an address) using the
12410 : register REG. If REG is 0, a new pseudo is generated.
12411 :
12412 : There are two types of references that must be handled:
12413 :
12414 : 1. Global data references must load the address from the GOT, via
12415 : the PIC reg. An insn is emitted to do this load, and the reg is
12416 : returned.
12417 :
12418 : 2. Static data references, constant pool addresses, and code labels
12419 : compute the address as an offset from the GOT, whose base is in
12420 : the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12421 : differentiate them from global data objects. The returned
12422 : address is the PIC reg + an unspec constant.
12423 :
12424 : TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12425 : reg also appears in the address. */
12426 :
12427 : rtx
12428 397806 : legitimize_pic_address (rtx orig, rtx reg)
12429 : {
12430 397806 : rtx addr = orig;
12431 397806 : rtx new_rtx = orig;
12432 :
12433 : #if TARGET_MACHO
12434 : if (TARGET_MACHO && !TARGET_64BIT)
12435 : {
12436 : if (reg == 0)
12437 : reg = gen_reg_rtx (Pmode);
12438 : /* Use the generic Mach-O PIC machinery. */
12439 : return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12440 : }
12441 : #endif
12442 :
12443 397806 : if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12444 : {
12445 : #if TARGET_PECOFF
12446 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12447 : if (tmp)
12448 : return tmp;
12449 : #endif
12450 : }
12451 :
12452 397806 : if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12453 : new_rtx = addr;
12454 301447 : else if ((!TARGET_64BIT
12455 101745 : || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12456 : && !TARGET_PECOFF
12457 501246 : && gotoff_operand (addr, Pmode))
12458 : {
12459 : /* This symbol may be referenced via a displacement
12460 : from the PIC base address (@GOTOFF). */
12461 96314 : if (GET_CODE (addr) == CONST)
12462 3047 : addr = XEXP (addr, 0);
12463 :
12464 96314 : if (GET_CODE (addr) == PLUS)
12465 : {
12466 6094 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12467 : UNSPEC_GOTOFF);
12468 6094 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12469 : }
12470 : else
12471 186505 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12472 :
12473 192599 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12474 :
12475 96314 : if (TARGET_64BIT)
12476 29 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12477 :
12478 96314 : if (reg != 0)
12479 : {
12480 3 : gcc_assert (REG_P (reg));
12481 3 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12482 : new_rtx, reg, 1, OPTAB_DIRECT);
12483 : }
12484 : else
12485 192596 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12486 : }
12487 383029 : else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
12488 : /* We can't always use @GOTOFF for text labels
12489 : on VxWorks, see gotoff_operand. */
12490 205133 : || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
12491 : {
12492 : #if TARGET_PECOFF
12493 : rtx tmp = legitimize_pe_coff_symbol (addr, true);
12494 : if (tmp)
12495 : return tmp;
12496 : #endif
12497 :
12498 : /* For x64 PE-COFF there is no GOT table,
12499 : so we use address directly. */
12500 177893 : if (TARGET_64BIT && TARGET_PECOFF)
12501 : {
12502 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12503 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12504 : }
12505 177893 : else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12506 : {
12507 94465 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12508 : UNSPEC_GOTPCREL);
12509 94465 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12510 94465 : new_rtx = gen_const_mem (Pmode, new_rtx);
12511 94462 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12512 : }
12513 : else
12514 : {
12515 : /* This symbol must be referenced via a load
12516 : from the Global Offset Table (@GOT). */
12517 166839 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12518 166839 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12519 :
12520 83431 : if (TARGET_64BIT)
12521 23 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12522 :
12523 83431 : if (reg != 0)
12524 : {
12525 0 : gcc_assert (REG_P (reg));
12526 0 : new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12527 : new_rtx, reg, 1, OPTAB_DIRECT);
12528 : }
12529 : else
12530 166839 : new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12531 :
12532 166839 : new_rtx = gen_const_mem (Pmode, new_rtx);
12533 83431 : set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12534 : }
12535 :
12536 261304 : new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12537 : }
12538 : else
12539 : {
12540 27240 : if (CONST_INT_P (addr)
12541 27240 : && !x86_64_immediate_operand (addr, VOIDmode))
12542 8 : new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12543 27232 : else if (GET_CODE (addr) == CONST)
12544 : {
12545 16723 : addr = XEXP (addr, 0);
12546 :
12547 : /* We must match stuff we generate before. Assume the only
12548 : unspecs that can get here are ours. Not that we could do
12549 : anything with them anyway.... */
12550 16723 : if (GET_CODE (addr) == UNSPEC
12551 8975 : || (GET_CODE (addr) == PLUS
12552 8975 : && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12553 : return orig;
12554 6852 : gcc_assert (GET_CODE (addr) == PLUS);
12555 : }
12556 :
12557 17369 : if (GET_CODE (addr) == PLUS)
12558 : {
12559 8685 : rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12560 :
12561 : /* Check first to see if this is a constant
12562 : offset from a @GOTOFF symbol reference. */
12563 8685 : if (!TARGET_PECOFF
12564 13754 : && gotoff_operand (op0, Pmode)
12565 8685 : && CONST_INT_P (op1))
12566 : {
12567 4 : if (!TARGET_64BIT)
12568 : {
12569 0 : new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12570 : UNSPEC_GOTOFF);
12571 0 : new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12572 0 : new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12573 :
12574 0 : if (reg != 0)
12575 : {
12576 0 : gcc_assert (REG_P (reg));
12577 0 : new_rtx = expand_simple_binop (Pmode, PLUS,
12578 : pic_offset_table_rtx,
12579 : new_rtx, reg, 1,
12580 : OPTAB_DIRECT);
12581 : }
12582 : else
12583 0 : new_rtx
12584 0 : = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12585 : }
12586 : else
12587 : {
12588 4 : if (INTVAL (op1) < -16*1024*1024
12589 4 : || INTVAL (op1) >= 16*1024*1024)
12590 : {
12591 4 : if (!x86_64_immediate_operand (op1, Pmode))
12592 4 : op1 = force_reg (Pmode, op1);
12593 :
12594 4 : new_rtx
12595 4 : = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12596 : }
12597 : }
12598 : }
12599 : else
12600 : {
12601 8681 : rtx base = legitimize_pic_address (op0, reg);
12602 8681 : machine_mode mode = GET_MODE (base);
12603 8681 : new_rtx
12604 8681 : = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12605 :
12606 8681 : if (CONST_INT_P (new_rtx))
12607 : {
12608 6840 : if (INTVAL (new_rtx) < -16*1024*1024
12609 6840 : || INTVAL (new_rtx) >= 16*1024*1024)
12610 : {
12611 0 : if (!x86_64_immediate_operand (new_rtx, mode))
12612 0 : new_rtx = force_reg (mode, new_rtx);
12613 :
12614 0 : new_rtx
12615 0 : = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12616 : }
12617 : else
12618 6840 : new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12619 : }
12620 : else
12621 : {
12622 : /* For %rip addressing, we have to use
12623 : just disp32, not base nor index. */
12624 1841 : if (TARGET_64BIT
12625 101 : && (SYMBOL_REF_P (base)
12626 101 : || LABEL_REF_P (base)))
12627 7 : base = force_reg (mode, base);
12628 1841 : if (GET_CODE (new_rtx) == PLUS
12629 1719 : && CONSTANT_P (XEXP (new_rtx, 1)))
12630 : {
12631 1715 : base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12632 1715 : new_rtx = XEXP (new_rtx, 1);
12633 : }
12634 1841 : new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12635 : }
12636 : }
12637 : }
12638 : }
12639 : return new_rtx;
12640 : }
12641 :
12642 : /* Load the thread pointer. If TO_REG is true, force it into a register. */
12643 :
12644 : static rtx
12645 24421 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
12646 : {
12647 24421 : rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12648 :
12649 24421 : if (GET_MODE (tp) != tp_mode)
12650 : {
12651 11 : gcc_assert (GET_MODE (tp) == SImode);
12652 11 : gcc_assert (tp_mode == DImode);
12653 :
12654 11 : tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12655 : }
12656 :
12657 24421 : if (to_reg)
12658 8112 : tp = copy_to_mode_reg (tp_mode, tp);
12659 :
12660 24421 : return tp;
12661 : }
12662 :
12663 : /* Construct the SYMBOL_REF for the _tls_index symbol. */
12664 :
12665 : static GTY(()) rtx ix86_tls_index_symbol;
12666 :
12667 : static rtx
12668 0 : ix86_tls_index (void)
12669 : {
12670 0 : if (!ix86_tls_index_symbol)
12671 0 : ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12672 :
12673 0 : if (flag_pic)
12674 0 : return gen_rtx_CONST (Pmode,
12675 : gen_rtx_UNSPEC (Pmode,
12676 : gen_rtvec (1, ix86_tls_index_symbol),
12677 : UNSPEC_PCREL));
12678 : else
12679 0 : return ix86_tls_index_symbol;
12680 : }
12681 :
12682 : /* Construct the SYMBOL_REF for the tls_get_addr function. */
12683 :
12684 : static GTY(()) rtx ix86_tls_symbol;
12685 :
12686 : rtx
12687 6715 : ix86_tls_get_addr (void)
12688 : {
12689 6715 : if (cfun->machine->call_saved_registers
12690 6715 : == TYPE_NO_CALLER_SAVED_REGISTERS)
12691 : {
12692 : /* __tls_get_addr doesn't preserve vector registers. When a
12693 : function with no_caller_saved_registers attribute calls
12694 : __tls_get_addr, YMM and ZMM registers will be clobbered.
12695 : Issue an error and suggest -mtls-dialect=gnu2 in this case. */
12696 3 : if (cfun->machine->func_type == TYPE_NORMAL)
12697 1 : error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
12698 : " with the %<no_caller_saved_registers%> attribute"));
12699 : else
12700 3 : error (cfun->machine->func_type == TYPE_EXCEPTION
12701 : ? G_("%<-mtls-dialect=gnu2%> must be used with an"
12702 : " exception service routine")
12703 : : G_("%<-mtls-dialect=gnu2%> must be used with an"
12704 : " interrupt service routine"));
12705 : /* Don't issue the same error twice. */
12706 3 : cfun->machine->func_type = TYPE_NORMAL;
12707 3 : cfun->machine->call_saved_registers
12708 3 : = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
12709 : }
12710 :
12711 6715 : if (!ix86_tls_symbol)
12712 : {
12713 204 : const char *sym
12714 241 : = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12715 241 : ? "___tls_get_addr" : "__tls_get_addr");
12716 :
12717 278 : ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12718 : }
12719 :
12720 6715 : if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12721 : {
12722 2 : rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12723 : UNSPEC_PLTOFF);
12724 2 : return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12725 : gen_rtx_CONST (Pmode, unspec));
12726 : }
12727 :
12728 6713 : return ix86_tls_symbol;
12729 : }
12730 :
12731 : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12732 :
12733 : static GTY(()) rtx ix86_tls_module_base_symbol;
12734 :
12735 : rtx
12736 98 : ix86_tls_module_base (void)
12737 : {
12738 98 : if (!ix86_tls_module_base_symbol)
12739 : {
12740 11 : ix86_tls_module_base_symbol
12741 11 : = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12742 :
12743 11 : SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12744 11 : |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12745 : }
12746 :
12747 98 : return ix86_tls_module_base_symbol;
12748 : }
12749 :
12750 : /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12751 : false if we expect this to be used for a memory address and true if
12752 : we expect to load the address into a register. */
12753 :
12754 : rtx
12755 30844 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12756 : {
12757 30844 : rtx dest, base, off;
12758 30844 : rtx pic = NULL_RTX, tp = NULL_RTX;
12759 30844 : machine_mode tp_mode = Pmode;
12760 30844 : int type;
12761 :
12762 : /* Windows implements a single form of TLS. */
12763 30844 : if (TARGET_WIN32_TLS)
12764 : {
12765 : /* Load the 32-bit index. */
12766 : rtx ind = gen_const_mem (SImode, ix86_tls_index ());
12767 : set_mem_alias_set (ind, GOT_ALIAS_SET);
12768 : if (TARGET_64BIT)
12769 : ind = convert_to_mode (Pmode, ind, 1);
12770 : ind = force_reg (Pmode, ind);
12771 :
12772 : /* Add it to the thread pointer and load the base. */
12773 : tp = get_thread_pointer (Pmode, true);
12774 : rtx addr = gen_rtx_PLUS (Pmode, tp,
12775 : gen_rtx_MULT (Pmode, ind,
12776 : GEN_INT (UNITS_PER_WORD)));
12777 : base = gen_const_mem (Pmode, addr);
12778 : set_mem_alias_set (base, GOT_ALIAS_SET);
12779 :
12780 : /* Add the 32-bit section-relative offset to the base. */
12781 : base = force_reg (Pmode, base);
12782 : off = gen_rtx_CONST (Pmode,
12783 : gen_rtx_UNSPEC (SImode,
12784 : gen_rtvec (1, x),
12785 : UNSPEC_SECREL32));
12786 : return gen_rtx_PLUS (Pmode, base, off);
12787 : }
12788 :
12789 : /* Fall back to global dynamic model if tool chain cannot support local
12790 : dynamic. */
12791 30844 : if (TARGET_SUN_TLS && !TARGET_64BIT
12792 : && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12793 : && model == TLS_MODEL_LOCAL_DYNAMIC)
12794 : model = TLS_MODEL_GLOBAL_DYNAMIC;
12795 :
12796 30844 : switch (model)
12797 : {
12798 6116 : case TLS_MODEL_GLOBAL_DYNAMIC:
12799 6116 : if (!TARGET_64BIT)
12800 : {
12801 1930 : if (flag_pic && !TARGET_PECOFF)
12802 1930 : pic = pic_offset_table_rtx;
12803 : else
12804 : {
12805 0 : pic = gen_reg_rtx (Pmode);
12806 0 : emit_insn (gen_set_got (pic));
12807 : }
12808 : }
12809 :
12810 6116 : if (TARGET_GNU2_TLS)
12811 : {
12812 53 : dest = gen_reg_rtx (ptr_mode);
12813 53 : if (TARGET_64BIT)
12814 53 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12815 : else
12816 0 : emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12817 :
12818 53 : tp = get_thread_pointer (ptr_mode, true);
12819 53 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12820 61 : if (GET_MODE (dest) != Pmode)
12821 6 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12822 61 : dest = force_reg (Pmode, dest);
12823 :
12824 61 : if (GET_MODE (x) != Pmode)
12825 3 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12826 :
12827 53 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12828 : }
12829 : else
12830 : {
12831 6063 : rtx caddr = ix86_tls_get_addr ();
12832 :
12833 7993 : dest = gen_reg_rtx (Pmode);
12834 6063 : if (TARGET_64BIT)
12835 : {
12836 4133 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12837 4133 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12838 4133 : rtx_insn *insns;
12839 :
12840 4133 : start_sequence ();
12841 4133 : emit_call_insn
12842 4133 : (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
12843 4133 : insns = end_sequence ();
12844 :
12845 4133 : if (GET_MODE (x) != Pmode)
12846 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12847 :
12848 4133 : RTL_CONST_CALL_P (insns) = 1;
12849 4133 : emit_libcall_block (insns, dest, rax, x);
12850 : }
12851 : else
12852 1930 : emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12853 : }
12854 : break;
12855 :
12856 386 : case TLS_MODEL_LOCAL_DYNAMIC:
12857 386 : if (!TARGET_64BIT)
12858 : {
12859 92 : if (flag_pic)
12860 92 : pic = pic_offset_table_rtx;
12861 : else
12862 : {
12863 0 : pic = gen_reg_rtx (Pmode);
12864 0 : emit_insn (gen_set_got (pic));
12865 : }
12866 : }
12867 :
12868 386 : if (TARGET_GNU2_TLS)
12869 : {
12870 26 : rtx tmp = ix86_tls_module_base ();
12871 :
12872 26 : base = gen_reg_rtx (ptr_mode);
12873 26 : if (TARGET_64BIT)
12874 26 : emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12875 : else
12876 0 : emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12877 :
12878 26 : tp = get_thread_pointer (ptr_mode, true);
12879 32 : if (GET_MODE (base) != Pmode)
12880 2 : base = gen_rtx_ZERO_EXTEND (Pmode, base);
12881 32 : base = force_reg (Pmode, base);
12882 : }
12883 : else
12884 : {
12885 360 : rtx caddr = ix86_tls_get_addr ();
12886 :
12887 452 : base = gen_reg_rtx (Pmode);
12888 360 : if (TARGET_64BIT)
12889 : {
12890 268 : rtx rax = gen_rtx_REG (Pmode, AX_REG);
12891 268 : rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12892 268 : rtx_insn *insns;
12893 268 : rtx eqv;
12894 :
12895 268 : start_sequence ();
12896 268 : emit_call_insn
12897 268 : (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
12898 268 : insns = end_sequence ();
12899 :
12900 : /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12901 : share the LD_BASE result with other LD model accesses. */
12902 268 : eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12903 : UNSPEC_TLS_LD_BASE);
12904 :
12905 268 : RTL_CONST_CALL_P (insns) = 1;
12906 268 : emit_libcall_block (insns, base, rax, eqv);
12907 : }
12908 : else
12909 92 : emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12910 : }
12911 :
12912 484 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12913 484 : off = gen_rtx_CONST (Pmode, off);
12914 :
12915 582 : dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12916 :
12917 386 : if (TARGET_GNU2_TLS)
12918 : {
12919 32 : if (GET_MODE (tp) != Pmode)
12920 : {
12921 2 : dest = lowpart_subreg (ptr_mode, dest, Pmode);
12922 2 : dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12923 2 : dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12924 : }
12925 : else
12926 24 : dest = gen_rtx_PLUS (Pmode, tp, dest);
12927 32 : dest = force_reg (Pmode, dest);
12928 :
12929 32 : if (GET_MODE (x) != Pmode)
12930 1 : x = gen_rtx_ZERO_EXTEND (Pmode, x);
12931 :
12932 26 : set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12933 : }
12934 : break;
12935 :
12936 10803 : case TLS_MODEL_INITIAL_EXEC:
12937 10803 : if (TARGET_64BIT)
12938 : {
12939 : /* Generate DImode references to avoid %fs:(%reg32)
12940 : problems and linker IE->LE relaxation bug. */
12941 : tp_mode = DImode;
12942 : pic = NULL;
12943 : type = UNSPEC_GOTNTPOFF;
12944 : }
12945 761 : else if (flag_pic)
12946 : {
12947 760 : pic = pic_offset_table_rtx;
12948 760 : type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12949 : }
12950 1 : else if (!TARGET_ANY_GNU_TLS)
12951 : {
12952 0 : pic = gen_reg_rtx (Pmode);
12953 0 : emit_insn (gen_set_got (pic));
12954 0 : type = UNSPEC_GOTTPOFF;
12955 : }
12956 : else
12957 : {
12958 : pic = NULL;
12959 : type = UNSPEC_INDNTPOFF;
12960 : }
12961 :
12962 10803 : off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12963 10803 : off = gen_rtx_CONST (tp_mode, off);
12964 10803 : if (pic)
12965 760 : off = gen_rtx_PLUS (tp_mode, pic, off);
12966 10803 : off = gen_const_mem (tp_mode, off);
12967 10803 : set_mem_alias_set (off, GOT_ALIAS_SET);
12968 :
12969 10803 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12970 : {
12971 10803 : base = get_thread_pointer (tp_mode,
12972 10803 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12973 10803 : off = force_reg (tp_mode, off);
12974 10803 : dest = gen_rtx_PLUS (tp_mode, base, off);
12975 11568 : if (tp_mode != Pmode)
12976 4 : dest = convert_to_mode (Pmode, dest, 1);
12977 : }
12978 : else
12979 : {
12980 0 : base = get_thread_pointer (Pmode, true);
12981 0 : dest = gen_reg_rtx (Pmode);
12982 0 : emit_insn (gen_sub3_insn (dest, base, off));
12983 : }
12984 : break;
12985 :
12986 13539 : case TLS_MODEL_LOCAL_EXEC:
12987 27846 : off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12988 : (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12989 : ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12990 14307 : off = gen_rtx_CONST (Pmode, off);
12991 :
12992 13539 : if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12993 : {
12994 14307 : base = get_thread_pointer (Pmode,
12995 13539 : for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12996 14307 : return gen_rtx_PLUS (Pmode, base, off);
12997 : }
12998 : else
12999 : {
13000 0 : base = get_thread_pointer (Pmode, true);
13001 0 : dest = gen_reg_rtx (Pmode);
13002 0 : emit_insn (gen_sub3_insn (dest, base, off));
13003 : }
13004 0 : break;
13005 :
13006 0 : default:
13007 0 : gcc_unreachable ();
13008 : }
13009 :
13010 : return dest;
13011 : }
13012 :
13013 : /* Return true if the TLS address requires insn using integer registers.
13014 : It's used to prevent KMOV/VMOV in TLS code sequences which require integer
13015 : MOV instructions, refer to PR103275. */
13016 : bool
13017 15242967 : ix86_gpr_tls_address_pattern_p (rtx mem)
13018 : {
13019 15242967 : gcc_assert (MEM_P (mem));
13020 :
13021 15242967 : rtx addr = XEXP (mem, 0);
13022 15242967 : subrtx_var_iterator::array_type array;
13023 53073126 : FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
13024 : {
13025 37837736 : rtx op = *iter;
13026 37837736 : if (GET_CODE (op) == UNSPEC)
13027 201427 : switch (XINT (op, 1))
13028 : {
13029 : case UNSPEC_GOTNTPOFF:
13030 7577 : return true;
13031 0 : case UNSPEC_TPOFF:
13032 0 : if (!TARGET_64BIT)
13033 : return true;
13034 : break;
13035 : default:
13036 : break;
13037 : }
13038 : }
13039 :
13040 15235390 : return false;
13041 15242967 : }
13042 :
13043 : /* Return true if OP refers to a TLS address. */
13044 : bool
13045 232886856 : ix86_tls_address_pattern_p (rtx op)
13046 : {
13047 232886856 : subrtx_var_iterator::array_type array;
13048 1385380295 : FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
13049 : {
13050 1152511478 : rtx op = *iter;
13051 1152511478 : if (MEM_P (op))
13052 : {
13053 105244636 : rtx *x = &XEXP (op, 0);
13054 166583251 : while (GET_CODE (*x) == PLUS)
13055 : {
13056 : int i;
13057 184033907 : for (i = 0; i < 2; i++)
13058 : {
13059 122695292 : rtx u = XEXP (*x, i);
13060 122695292 : if (GET_CODE (u) == ZERO_EXTEND)
13061 127893 : u = XEXP (u, 0);
13062 122695292 : if (GET_CODE (u) == UNSPEC
13063 18071 : && XINT (u, 1) == UNSPEC_TP)
13064 18039 : return true;
13065 : }
13066 61338615 : x = &XEXP (*x, 0);
13067 : }
13068 :
13069 105226597 : iter.skip_subrtxes ();
13070 : }
13071 : }
13072 :
13073 232868817 : return false;
13074 232886856 : }
13075 :
13076 : /* Rewrite *LOC so that it refers to a default TLS address space. */
13077 : static void
13078 18039 : ix86_rewrite_tls_address_1 (rtx *loc)
13079 : {
13080 18039 : subrtx_ptr_iterator::array_type array;
13081 53524 : FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
13082 : {
13083 53524 : rtx *loc = *iter;
13084 53524 : if (MEM_P (*loc))
13085 : {
13086 18226 : rtx addr = XEXP (*loc, 0);
13087 18226 : rtx *x = &addr;
13088 23062 : while (GET_CODE (*x) == PLUS)
13089 : {
13090 : int i;
13091 32570 : for (i = 0; i < 2; i++)
13092 : {
13093 27734 : rtx u = XEXP (*x, i);
13094 27734 : if (GET_CODE (u) == ZERO_EXTEND)
13095 19 : u = XEXP (u, 0);
13096 27734 : if (GET_CODE (u) == UNSPEC
13097 18039 : && XINT (u, 1) == UNSPEC_TP)
13098 : {
13099 : /* NB: Since address override only applies to the
13100 : (reg32) part in fs:(reg32), return if address
13101 : override is used. */
13102 19666 : if (Pmode != word_mode
13103 18039 : && REG_P (XEXP (*x, 1 - i)))
13104 18039 : return;
13105 :
13106 18037 : addr_space_t as = DEFAULT_TLS_SEG_REG;
13107 :
13108 18037 : *x = XEXP (*x, 1 - i);
13109 :
13110 18037 : *loc = replace_equiv_address_nv (*loc, addr, true);
13111 18037 : set_mem_addr_space (*loc, as);
13112 18037 : return;
13113 : }
13114 : }
13115 4836 : x = &XEXP (*x, 0);
13116 : }
13117 :
13118 187 : iter.skip_subrtxes ();
13119 : }
13120 : }
13121 18039 : }
13122 :
13123 : /* Rewrite instruction pattern involvning TLS address
13124 : so that it refers to a default TLS address space. */
13125 : rtx
13126 18039 : ix86_rewrite_tls_address (rtx pattern)
13127 : {
13128 18039 : pattern = copy_insn (pattern);
13129 18039 : ix86_rewrite_tls_address_1 (&pattern);
13130 18039 : return pattern;
13131 : }
13132 :
13133 : /* Try machine-dependent ways of modifying an illegitimate address
13134 : to be legitimate. If we find one, return the new, valid address.
13135 : This macro is used in only one place: `memory_address' in explow.cc.
13136 :
13137 : OLDX is the address as it was before break_out_memory_refs was called.
13138 : In some cases it is useful to look at this to decide what needs to be done.
13139 :
13140 : It is always safe for this macro to do nothing. It exists to recognize
13141 : opportunities to optimize the output.
13142 :
13143 : For the 80386, we handle X+REG by loading X into a register R and
13144 : using R+REG. R will go in a general reg and indexing will be used.
13145 : However, if REG is a broken-out memory address or multiplication,
13146 : nothing needs to be done because REG can certainly go in a general reg.
13147 :
13148 : When -fpic is used, special handling is needed for symbolic references.
13149 : See comments by legitimize_pic_address in i386.cc for details. */
13150 :
13151 : static rtx
13152 670789 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
13153 : {
13154 670789 : bool changed = false;
13155 670789 : unsigned log;
13156 :
13157 670789 : log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
13158 151109 : if (log)
13159 20725 : return legitimize_tls_address (x, (enum tls_model) log, false);
13160 650064 : if (GET_CODE (x) == CONST
13161 508 : && GET_CODE (XEXP (x, 0)) == PLUS
13162 508 : && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13163 650572 : && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13164 : {
13165 4 : rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13166 : (enum tls_model) log, false);
13167 5 : return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13168 : }
13169 :
13170 650060 : if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13171 : {
13172 : #if TARGET_PECOFF
13173 : rtx tmp = legitimize_pe_coff_symbol (x, true);
13174 : if (tmp)
13175 : return tmp;
13176 : #endif
13177 : }
13178 :
13179 650060 : if (flag_pic && SYMBOLIC_CONST (x))
13180 130767 : return legitimize_pic_address (x, 0);
13181 :
13182 : #if TARGET_MACHO
13183 : if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13184 : return machopic_indirect_data_reference (x, 0);
13185 : #endif
13186 :
13187 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13188 519293 : if (GET_CODE (x) == ASHIFT
13189 0 : && CONST_INT_P (XEXP (x, 1))
13190 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13191 : {
13192 0 : changed = true;
13193 0 : log = INTVAL (XEXP (x, 1));
13194 0 : x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13195 : GEN_INT (1 << log));
13196 : }
13197 :
13198 519293 : if (GET_CODE (x) == PLUS)
13199 : {
13200 : /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13201 :
13202 182774 : if (GET_CODE (XEXP (x, 0)) == ASHIFT
13203 594 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13204 594 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13205 : {
13206 594 : changed = true;
13207 594 : log = INTVAL (XEXP (XEXP (x, 0), 1));
13208 1738 : XEXP (x, 0) = gen_rtx_MULT (Pmode,
13209 : force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13210 : GEN_INT (1 << log));
13211 : }
13212 :
13213 182774 : if (GET_CODE (XEXP (x, 1)) == ASHIFT
13214 0 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13215 0 : && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13216 : {
13217 0 : changed = true;
13218 0 : log = INTVAL (XEXP (XEXP (x, 1), 1));
13219 0 : XEXP (x, 1) = gen_rtx_MULT (Pmode,
13220 : force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13221 : GEN_INT (1 << log));
13222 : }
13223 :
13224 : /* Put multiply first if it isn't already. */
13225 182774 : if (GET_CODE (XEXP (x, 1)) == MULT)
13226 : {
13227 0 : std::swap (XEXP (x, 0), XEXP (x, 1));
13228 0 : changed = true;
13229 : }
13230 :
13231 : /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13232 : into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13233 : created by virtual register instantiation, register elimination, and
13234 : similar optimizations. */
13235 182774 : if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13236 : {
13237 9796 : changed = true;
13238 15404 : x = gen_rtx_PLUS (Pmode,
13239 : gen_rtx_PLUS (Pmode, XEXP (x, 0),
13240 : XEXP (XEXP (x, 1), 0)),
13241 : XEXP (XEXP (x, 1), 1));
13242 : }
13243 :
13244 : /* Canonicalize
13245 : (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13246 : into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13247 172978 : else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13248 108753 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13249 51368 : && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13250 0 : && CONSTANT_P (XEXP (x, 1)))
13251 : {
13252 0 : rtx constant;
13253 0 : rtx other = NULL_RTX;
13254 :
13255 0 : if (CONST_INT_P (XEXP (x, 1)))
13256 : {
13257 0 : constant = XEXP (x, 1);
13258 0 : other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13259 : }
13260 0 : else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13261 : {
13262 : constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13263 : other = XEXP (x, 1);
13264 : }
13265 : else
13266 : constant = 0;
13267 :
13268 0 : if (constant)
13269 : {
13270 0 : changed = true;
13271 0 : x = gen_rtx_PLUS (Pmode,
13272 : gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13273 : XEXP (XEXP (XEXP (x, 0), 1), 0)),
13274 : plus_constant (Pmode, other,
13275 : INTVAL (constant)));
13276 : }
13277 : }
13278 :
13279 182774 : if (changed && ix86_legitimate_address_p (mode, x, false))
13280 9832 : return x;
13281 :
13282 172942 : if (GET_CODE (XEXP (x, 0)) == MULT)
13283 : {
13284 19715 : changed = true;
13285 19715 : XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13286 : }
13287 :
13288 172942 : if (GET_CODE (XEXP (x, 1)) == MULT)
13289 : {
13290 0 : changed = true;
13291 0 : XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13292 : }
13293 :
13294 172942 : if (changed
13295 19723 : && REG_P (XEXP (x, 1))
13296 16158 : && REG_P (XEXP (x, 0)))
13297 : return x;
13298 :
13299 156784 : if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13300 : {
13301 1833 : changed = true;
13302 1833 : x = legitimize_pic_address (x, 0);
13303 : }
13304 :
13305 156784 : if (changed && ix86_legitimate_address_p (mode, x, false))
13306 3843 : return x;
13307 :
13308 152941 : if (REG_P (XEXP (x, 0)))
13309 : {
13310 46475 : rtx temp = gen_reg_rtx (Pmode);
13311 43727 : rtx val = force_operand (XEXP (x, 1), temp);
13312 43727 : if (val != temp)
13313 : {
13314 35375 : val = convert_to_mode (Pmode, val, 1);
13315 35084 : emit_move_insn (temp, val);
13316 : }
13317 :
13318 43727 : XEXP (x, 1) = temp;
13319 43727 : return x;
13320 : }
13321 :
13322 109214 : else if (REG_P (XEXP (x, 1)))
13323 : {
13324 3312 : rtx temp = gen_reg_rtx (Pmode);
13325 2662 : rtx val = force_operand (XEXP (x, 0), temp);
13326 2662 : if (val != temp)
13327 : {
13328 0 : val = convert_to_mode (Pmode, val, 1);
13329 0 : emit_move_insn (temp, val);
13330 : }
13331 :
13332 2662 : XEXP (x, 0) = temp;
13333 2662 : return x;
13334 : }
13335 : }
13336 :
13337 : return x;
13338 : }
13339 :
13340 : /* Print an integer constant expression in assembler syntax. Addition
13341 : and subtraction are the only arithmetic that may appear in these
13342 : expressions. FILE is the stdio stream to write to, X is the rtx, and
13343 : CODE is the operand print code from the output string. */
13344 :
13345 : static void
13346 3694178 : output_pic_addr_const (FILE *file, rtx x, int code)
13347 : {
13348 3924063 : char buf[256];
13349 :
13350 3924063 : switch (GET_CODE (x))
13351 : {
13352 0 : case PC:
13353 0 : gcc_assert (flag_pic);
13354 0 : putc ('.', file);
13355 0 : break;
13356 :
13357 868830 : case SYMBOL_REF:
13358 868830 : if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
13359 868830 : output_addr_const (file, x);
13360 : else
13361 : {
13362 : const char *name = XSTR (x, 0);
13363 :
13364 : /* Mark the decl as referenced so that cgraph will
13365 : output the function. */
13366 : if (SYMBOL_REF_DECL (x))
13367 : mark_decl_referenced (SYMBOL_REF_DECL (x));
13368 :
13369 : #if TARGET_MACHO
13370 : if (MACHOPIC_INDIRECT
13371 : && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13372 : name = machopic_indirection_name (x, /*stub_p=*/true);
13373 : #endif
13374 : assemble_name (file, name);
13375 : }
13376 868830 : if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13377 868830 : && code == 'P' && ix86_call_use_plt_p (x))
13378 396758 : fputs ("@PLT", file);
13379 : break;
13380 :
13381 2641 : case LABEL_REF:
13382 2641 : x = XEXP (x, 0);
13383 : /* FALLTHRU */
13384 2641 : case CODE_LABEL:
13385 2641 : ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13386 2641 : assemble_name (asm_out_file, buf);
13387 2641 : break;
13388 :
13389 2621533 : CASE_CONST_SCALAR_INT:
13390 2621533 : output_addr_const (file, x);
13391 2621533 : break;
13392 :
13393 210797 : case CONST:
13394 : /* This used to output parentheses around the expression,
13395 : but that does not work on the 386 (either ATT or BSD assembler). */
13396 210797 : output_pic_addr_const (file, XEXP (x, 0), code);
13397 210797 : break;
13398 :
13399 0 : case CONST_DOUBLE:
13400 : /* We can't handle floating point constants;
13401 : TARGET_PRINT_OPERAND must handle them. */
13402 0 : output_operand_lossage ("floating constant misused");
13403 0 : break;
13404 :
13405 19088 : case PLUS:
13406 : /* Some assemblers need integer constants to appear first. */
13407 19088 : if (CONST_INT_P (XEXP (x, 0)))
13408 : {
13409 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13410 0 : putc ('+', file);
13411 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13412 : }
13413 : else
13414 : {
13415 19088 : gcc_assert (CONST_INT_P (XEXP (x, 1)));
13416 19088 : output_pic_addr_const (file, XEXP (x, 1), code);
13417 19088 : putc ('+', file);
13418 19088 : output_pic_addr_const (file, XEXP (x, 0), code);
13419 : }
13420 : break;
13421 :
13422 0 : case MINUS:
13423 0 : if (!TARGET_MACHO)
13424 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13425 0 : output_pic_addr_const (file, XEXP (x, 0), code);
13426 0 : putc ('-', file);
13427 0 : output_pic_addr_const (file, XEXP (x, 1), code);
13428 0 : if (!TARGET_MACHO)
13429 0 : putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13430 0 : break;
13431 :
13432 201174 : case UNSPEC:
13433 201174 : gcc_assert (XVECLEN (x, 0) == 1);
13434 201174 : output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13435 201174 : switch (XINT (x, 1))
13436 : {
13437 43344 : case UNSPEC_GOT:
13438 43344 : fputs ("@GOT", file);
13439 43344 : break;
13440 77802 : case UNSPEC_GOTOFF:
13441 77802 : fputs ("@GOTOFF", file);
13442 77802 : break;
13443 36 : case UNSPEC_PLTOFF:
13444 36 : fputs ("@PLTOFF", file);
13445 36 : break;
13446 0 : case UNSPEC_PCREL:
13447 0 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13448 : "(%rip)" : "[rip]", file);
13449 0 : break;
13450 75808 : case UNSPEC_GOTPCREL:
13451 75808 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13452 : "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13453 75808 : break;
13454 0 : case UNSPEC_GOTTPOFF:
13455 : /* FIXME: This might be @TPOFF in Sun ld too. */
13456 0 : fputs ("@gottpoff", file);
13457 0 : break;
13458 0 : case UNSPEC_TPOFF:
13459 0 : fputs ("@tpoff", file);
13460 0 : break;
13461 1459 : case UNSPEC_NTPOFF:
13462 1459 : if (TARGET_64BIT)
13463 1459 : fputs ("@tpoff", file);
13464 : else
13465 0 : fputs ("@ntpoff", file);
13466 : break;
13467 315 : case UNSPEC_DTPOFF:
13468 315 : fputs ("@dtpoff", file);
13469 315 : break;
13470 2410 : case UNSPEC_GOTNTPOFF:
13471 2410 : if (TARGET_64BIT)
13472 2147 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13473 : "@gottpoff(%rip)": "@gottpoff[rip]", file);
13474 : else
13475 263 : fputs ("@gotntpoff", file);
13476 : break;
13477 0 : case UNSPEC_INDNTPOFF:
13478 0 : fputs ("@indntpoff", file);
13479 0 : break;
13480 0 : case UNSPEC_SECREL32:
13481 0 : fputs ("@secrel32", file);
13482 0 : break;
13483 : #if TARGET_MACHO
13484 : case UNSPEC_MACHOPIC_OFFSET:
13485 : putc ('-', file);
13486 : machopic_output_function_base_name (file);
13487 : break;
13488 : #endif
13489 0 : default:
13490 0 : output_operand_lossage ("invalid UNSPEC as operand");
13491 0 : break;
13492 : }
13493 : break;
13494 :
13495 0 : default:
13496 0 : output_operand_lossage ("invalid expression as operand");
13497 : }
13498 3694178 : }
13499 :
13500 : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13501 : We need to emit DTP-relative relocations. */
13502 :
13503 : static void ATTRIBUTE_UNUSED
13504 667 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13505 : {
13506 667 : fputs (ASM_LONG, file);
13507 667 : output_addr_const (file, x);
13508 : #if TARGET_WIN32_TLS
13509 : fputs ("@secrel32", file);
13510 : #else
13511 667 : fputs ("@dtpoff", file);
13512 : #endif
13513 667 : switch (size)
13514 : {
13515 : case 4:
13516 : break;
13517 548 : case 8:
13518 548 : fputs (", 0", file);
13519 548 : break;
13520 0 : default:
13521 0 : gcc_unreachable ();
13522 : }
13523 667 : }
13524 :
13525 : /* Return true if X is a representation of the PIC register. This copes
13526 : with calls from ix86_find_base_term, where the register might have
13527 : been replaced by a cselib value. */
13528 :
13529 : static bool
13530 26852849 : ix86_pic_register_p (rtx x)
13531 : {
13532 26852849 : if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13533 750305 : return (pic_offset_table_rtx
13534 750305 : && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13535 26102544 : else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13536 : return true;
13537 26098027 : else if (!REG_P (x))
13538 : return false;
13539 25493977 : else if (pic_offset_table_rtx)
13540 : {
13541 25474312 : if (REGNO (x) == REGNO (pic_offset_table_rtx))
13542 : return true;
13543 403506 : if (HARD_REGISTER_P (x)
13544 382048 : && !HARD_REGISTER_P (pic_offset_table_rtx)
13545 785554 : && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13546 : return true;
13547 : return false;
13548 : }
13549 : else
13550 19665 : return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13551 : }
13552 :
13553 : /* Helper function for ix86_delegitimize_address.
13554 : Attempt to delegitimize TLS local-exec accesses. */
13555 :
13556 : static rtx
13557 3493528169 : ix86_delegitimize_tls_address (rtx orig_x)
13558 : {
13559 3493528169 : rtx x = orig_x, unspec;
13560 3493528169 : struct ix86_address addr;
13561 :
13562 3493528169 : if (!TARGET_TLS_DIRECT_SEG_REFS)
13563 : return orig_x;
13564 3493528169 : if (MEM_P (x))
13565 42913412 : x = XEXP (x, 0);
13566 5017868512 : if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13567 : return orig_x;
13568 1680213201 : if (ix86_decompose_address (x, &addr) == 0
13569 1942520411 : || addr.seg != DEFAULT_TLS_SEG_REG
13570 268212 : || addr.disp == NULL_RTX
13571 1680429367 : || GET_CODE (addr.disp) != CONST)
13572 : return orig_x;
13573 111823 : unspec = XEXP (addr.disp, 0);
13574 111823 : if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13575 65331 : unspec = XEXP (unspec, 0);
13576 111823 : if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13577 : return orig_x;
13578 111760 : x = XVECEXP (unspec, 0, 0);
13579 111760 : gcc_assert (SYMBOL_REF_P (x));
13580 111760 : if (unspec != XEXP (addr.disp, 0))
13581 86301 : x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13582 111760 : if (addr.index)
13583 : {
13584 185 : rtx idx = addr.index;
13585 185 : if (addr.scale != 1)
13586 185 : idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13587 185 : x = gen_rtx_PLUS (Pmode, idx, x);
13588 : }
13589 111760 : if (addr.base)
13590 2 : x = gen_rtx_PLUS (Pmode, addr.base, x);
13591 111760 : if (MEM_P (orig_x))
13592 198 : x = replace_equiv_address_nv (orig_x, x);
13593 : return x;
13594 : }
13595 :
13596 : /* In the name of slightly smaller debug output, and to cater to
13597 : general assembler lossage, recognize PIC+GOTOFF and turn it back
13598 : into a direct symbol reference.
13599 :
13600 : On Darwin, this is necessary to avoid a crash, because Darwin
13601 : has a different PIC label for each routine but the DWARF debugging
13602 : information is not associated with any particular routine, so it's
13603 : necessary to remove references to the PIC label from RTL stored by
13604 : the DWARF output code.
13605 :
13606 : This helper is used in the normal ix86_delegitimize_address
13607 : entrypoint (e.g. used in the target delegitimization hook) and
13608 : in ix86_find_base_term. As compile time memory optimization, we
13609 : avoid allocating rtxes that will not change anything on the outcome
13610 : of the callers (find_base_value and find_base_term). */
13611 :
13612 : static inline rtx
13613 3518361084 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13614 : {
13615 3518361084 : rtx orig_x = delegitimize_mem_from_attrs (x);
13616 : /* addend is NULL or some rtx if x is something+GOTOFF where
13617 : something doesn't include the PIC register. */
13618 3518361084 : rtx addend = NULL_RTX;
13619 : /* reg_addend is NULL or a multiple of some register. */
13620 3518361084 : rtx reg_addend = NULL_RTX;
13621 : /* const_addend is NULL or a const_int. */
13622 3518361084 : rtx const_addend = NULL_RTX;
13623 : /* This is the result, or NULL. */
13624 3518361084 : rtx result = NULL_RTX;
13625 :
13626 3518361084 : x = orig_x;
13627 :
13628 3518361084 : if (MEM_P (x))
13629 62115782 : x = XEXP (x, 0);
13630 :
13631 3518361084 : if (TARGET_64BIT)
13632 : {
13633 253696974 : if (GET_CODE (x) == CONST
13634 8666660 : && GET_CODE (XEXP (x, 0)) == PLUS
13635 6730827 : && GET_MODE (XEXP (x, 0)) == Pmode
13636 6730778 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13637 6730778 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13638 253701108 : && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13639 : {
13640 : /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13641 : base. A CONST can't be arg_pointer_rtx based. */
13642 0 : if (base_term_p && MEM_P (orig_x))
13643 : return orig_x;
13644 0 : rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13645 0 : x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13646 0 : if (MEM_P (orig_x))
13647 0 : x = replace_equiv_address_nv (orig_x, x);
13648 0 : return x;
13649 : }
13650 :
13651 253696974 : if (GET_CODE (x) == CONST
13652 8666660 : && GET_CODE (XEXP (x, 0)) == UNSPEC
13653 1935882 : && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13654 654674 : || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13655 1281208 : && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13656 : {
13657 295663 : x = XVECEXP (XEXP (x, 0), 0, 0);
13658 295663 : if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13659 : {
13660 9 : x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13661 9 : if (x == NULL_RTX)
13662 : return orig_x;
13663 : }
13664 295663 : return x;
13665 : }
13666 :
13667 253401311 : if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13668 253399650 : return ix86_delegitimize_tls_address (orig_x);
13669 :
13670 : /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13671 : and -mcmodel=medium -fpic. */
13672 : }
13673 :
13674 3264665771 : if (GET_CODE (x) != PLUS
13675 1548877367 : || GET_CODE (XEXP (x, 1)) != CONST)
13676 3238362399 : return ix86_delegitimize_tls_address (orig_x);
13677 :
13678 26303372 : if (ix86_pic_register_p (XEXP (x, 0)))
13679 : /* %ebx + GOT/GOTOFF */
13680 : ;
13681 1275534 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
13682 : {
13683 : /* %ebx + %reg * scale + GOT/GOTOFF */
13684 472513 : reg_addend = XEXP (x, 0);
13685 472513 : if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13686 395549 : reg_addend = XEXP (reg_addend, 1);
13687 76964 : else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13688 45509 : reg_addend = XEXP (reg_addend, 0);
13689 : else
13690 : {
13691 31455 : reg_addend = NULL_RTX;
13692 31455 : addend = XEXP (x, 0);
13693 : }
13694 : }
13695 : else
13696 : addend = XEXP (x, 0);
13697 :
13698 26303372 : x = XEXP (XEXP (x, 1), 0);
13699 26303372 : if (GET_CODE (x) == PLUS
13700 1443934 : && CONST_INT_P (XEXP (x, 1)))
13701 : {
13702 1443934 : const_addend = XEXP (x, 1);
13703 1443934 : x = XEXP (x, 0);
13704 : }
13705 :
13706 26303372 : if (GET_CODE (x) == UNSPEC
13707 25631942 : && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13708 6725235 : || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13709 1094694 : || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13710 4 : && !MEM_P (orig_x) && !addend)))
13711 24537252 : result = XVECEXP (x, 0, 0);
13712 :
13713 24537252 : if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13714 : && !MEM_P (orig_x))
13715 : result = XVECEXP (x, 0, 0);
13716 :
13717 24537252 : if (! result)
13718 1766120 : return ix86_delegitimize_tls_address (orig_x);
13719 :
13720 : /* For (PLUS something CONST_INT) both find_base_{value,term} just
13721 : recurse on the first operand. */
13722 24537252 : if (const_addend && !base_term_p)
13723 354438 : result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13724 24537252 : if (reg_addend)
13725 857804 : result = gen_rtx_PLUS (Pmode, reg_addend, result);
13726 24537252 : if (addend)
13727 : {
13728 : /* If the rest of original X doesn't involve the PIC register, add
13729 : addend and subtract pic_offset_table_rtx. This can happen e.g.
13730 : for code like:
13731 : leal (%ebx, %ecx, 4), %ecx
13732 : ...
13733 : movl foo@GOTOFF(%ecx), %edx
13734 : in which case we return (%ecx - %ebx) + foo
13735 : or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13736 : and reload has completed. Don't do the latter for debug,
13737 : as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13738 135807 : if (pic_offset_table_rtx
13739 135807 : && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13740 2370 : result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13741 : pic_offset_table_rtx),
13742 : result);
13743 135017 : else if (base_term_p
13744 128685 : && pic_offset_table_rtx
13745 : && !TARGET_MACHO
13746 : && !TARGET_VXWORKS_VAROFF)
13747 : {
13748 257370 : rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13749 257370 : tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13750 257370 : result = gen_rtx_PLUS (Pmode, tmp, result);
13751 128685 : }
13752 : else
13753 : return orig_x;
13754 : }
13755 49061755 : if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13756 : {
13757 0 : result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13758 0 : if (result == NULL_RTX)
13759 : return orig_x;
13760 : }
13761 : return result;
13762 : }
13763 :
13764 : /* The normal instantiation of the above template. */
13765 :
13766 : static rtx
13767 324376186 : ix86_delegitimize_address (rtx x)
13768 : {
13769 324376186 : return ix86_delegitimize_address_1 (x, false);
13770 : }
13771 :
13772 : /* If X is a machine specific address (i.e. a symbol or label being
13773 : referenced as a displacement from the GOT implemented using an
13774 : UNSPEC), then return the base term. Otherwise return X. */
13775 :
13776 : rtx
13777 6676884574 : ix86_find_base_term (rtx x)
13778 : {
13779 6676884574 : rtx term;
13780 :
13781 6676884574 : if (TARGET_64BIT)
13782 : {
13783 3482899676 : if (GET_CODE (x) != CONST)
13784 : return x;
13785 45070592 : term = XEXP (x, 0);
13786 45070592 : if (GET_CODE (term) == PLUS
13787 45055719 : && CONST_INT_P (XEXP (term, 1)))
13788 45055719 : term = XEXP (term, 0);
13789 45070592 : if (GET_CODE (term) != UNSPEC
13790 40579 : || (XINT (term, 1) != UNSPEC_GOTPCREL
13791 40579 : && XINT (term, 1) != UNSPEC_PCREL))
13792 : return x;
13793 :
13794 0 : return XVECEXP (term, 0, 0);
13795 : }
13796 :
13797 3193984898 : return ix86_delegitimize_address_1 (x, true);
13798 : }
13799 :
13800 : /* Return true if X shouldn't be emitted into the debug info.
13801 : Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13802 : symbol easily into the .debug_info section, so we need not to
13803 : delegitimize, but instead assemble as @gotoff.
13804 : Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13805 : assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13806 :
13807 : static bool
13808 1879396 : ix86_const_not_ok_for_debug_p (rtx x)
13809 : {
13810 1879396 : if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13811 : return true;
13812 :
13813 1879376 : if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13814 0 : return true;
13815 :
13816 : return false;
13817 : }
13818 :
13819 : static void
13820 7141600 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13821 : bool fp, FILE *file)
13822 : {
13823 7141600 : const char *suffix;
13824 :
13825 7141600 : if (mode == CCFPmode)
13826 : {
13827 564911 : code = ix86_fp_compare_code_to_integer (code);
13828 564911 : mode = CCmode;
13829 : }
13830 7141600 : if (reverse)
13831 209123 : code = reverse_condition (code);
13832 :
13833 7141600 : switch (code)
13834 : {
13835 2772440 : case EQ:
13836 2772440 : gcc_assert (mode != CCGZmode);
13837 2772440 : switch (mode)
13838 : {
13839 : case E_CCAmode:
13840 : suffix = "a";
13841 : break;
13842 : case E_CCCmode:
13843 26316 : suffix = "c";
13844 : break;
13845 : case E_CCOmode:
13846 7141600 : suffix = "o";
13847 : break;
13848 : case E_CCPmode:
13849 233716 : suffix = "p";
13850 : break;
13851 : case E_CCSmode:
13852 121529 : suffix = "s";
13853 : break;
13854 2752732 : default:
13855 2752732 : suffix = "e";
13856 2752732 : break;
13857 : }
13858 : break;
13859 2318476 : case NE:
13860 2318476 : gcc_assert (mode != CCGZmode);
13861 2318476 : switch (mode)
13862 : {
13863 : case E_CCAmode:
13864 : suffix = "na";
13865 : break;
13866 : case E_CCCmode:
13867 12039 : suffix = "nc";
13868 : break;
13869 10769 : case E_CCOmode:
13870 10769 : suffix = "no";
13871 10769 : break;
13872 : case E_CCPmode:
13873 4433 : suffix = "np";
13874 : break;
13875 : case E_CCSmode:
13876 50693 : suffix = "ns";
13877 : break;
13878 2305821 : default:
13879 2305821 : suffix = "ne";
13880 2305821 : break;
13881 : }
13882 : break;
13883 256062 : case GT:
13884 256062 : gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13885 : suffix = "g";
13886 : break;
13887 173737 : case GTU:
13888 : /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13889 : Those same assemblers have the same but opposite lossage on cmov. */
13890 173737 : if (mode == CCmode)
13891 173799 : suffix = fp ? "nbe" : "a";
13892 : else
13893 0 : gcc_unreachable ();
13894 : break;
13895 236818 : case LT:
13896 236818 : switch (mode)
13897 : {
13898 : case E_CCNOmode:
13899 : case E_CCGOCmode:
13900 : suffix = "s";
13901 : break;
13902 :
13903 : case E_CCmode:
13904 : case E_CCGCmode:
13905 : case E_CCGZmode:
13906 7141600 : suffix = "l";
13907 : break;
13908 :
13909 0 : default:
13910 0 : gcc_unreachable ();
13911 : }
13912 : break;
13913 445407 : case LTU:
13914 445407 : if (mode == CCmode || mode == CCGZmode)
13915 : suffix = "b";
13916 24985 : else if (mode == CCCmode)
13917 26316 : suffix = fp ? "b" : "c";
13918 : else
13919 0 : gcc_unreachable ();
13920 : break;
13921 145364 : case GE:
13922 145364 : switch (mode)
13923 : {
13924 : case E_CCNOmode:
13925 : case E_CCGOCmode:
13926 : suffix = "ns";
13927 : break;
13928 :
13929 : case E_CCmode:
13930 : case E_CCGCmode:
13931 : case E_CCGZmode:
13932 7141600 : suffix = "ge";
13933 : break;
13934 :
13935 0 : default:
13936 0 : gcc_unreachable ();
13937 : }
13938 : break;
13939 189961 : case GEU:
13940 189961 : if (mode == CCmode || mode == CCGZmode)
13941 : suffix = "nb";
13942 10173 : else if (mode == CCCmode)
13943 12039 : suffix = fp ? "nb" : "nc";
13944 : else
13945 0 : gcc_unreachable ();
13946 : break;
13947 246640 : case LE:
13948 246640 : gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13949 : suffix = "le";
13950 : break;
13951 118544 : case LEU:
13952 118544 : if (mode == CCmode)
13953 : suffix = "be";
13954 : else
13955 0 : gcc_unreachable ();
13956 : break;
13957 233716 : case UNORDERED:
13958 233723 : suffix = fp ? "u" : "p";
13959 : break;
13960 4435 : case ORDERED:
13961 4440 : suffix = fp ? "nu" : "np";
13962 : break;
13963 0 : default:
13964 0 : gcc_unreachable ();
13965 : }
13966 7141600 : fputs (suffix, file);
13967 7141600 : }
13968 :
13969 : /* Print the name of register X to FILE based on its machine mode and number.
13970 : If CODE is 'w', pretend the mode is HImode.
13971 : If CODE is 'b', pretend the mode is QImode.
13972 : If CODE is 'k', pretend the mode is SImode.
13973 : If CODE is 'q', pretend the mode is DImode.
13974 : If CODE is 'x', pretend the mode is V4SFmode.
13975 : If CODE is 't', pretend the mode is V8SFmode.
13976 : If CODE is 'g', pretend the mode is V16SFmode.
13977 : If CODE is 'h', pretend the reg is the 'high' byte register.
13978 : If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13979 : If CODE is 'd', duplicate the operand for AVX instruction.
13980 : If CODE is 'V', print naked full integer register name without %.
13981 : */
13982 :
13983 : void
13984 123594049 : print_reg (rtx x, int code, FILE *file)
13985 : {
13986 123594049 : const char *reg;
13987 123594049 : int msize;
13988 123594049 : unsigned int regno;
13989 123594049 : bool duplicated;
13990 :
13991 123594049 : if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13992 123591596 : putc ('%', file);
13993 :
13994 123594049 : if (x == pc_rtx)
13995 : {
13996 5746829 : gcc_assert (TARGET_64BIT);
13997 5746829 : fputs ("rip", file);
13998 5746829 : return;
13999 : }
14000 :
14001 117847220 : if (code == 'y' && STACK_TOP_P (x))
14002 : {
14003 289920 : fputs ("st(0)", file);
14004 289920 : return;
14005 : }
14006 :
14007 117557300 : if (code == 'w')
14008 : msize = 2;
14009 : else if (code == 'b')
14010 : msize = 1;
14011 : else if (code == 'k')
14012 : msize = 4;
14013 : else if (code == 'q')
14014 : msize = 8;
14015 : else if (code == 'h')
14016 : msize = 0;
14017 : else if (code == 'x')
14018 : msize = 16;
14019 : else if (code == 't')
14020 : msize = 32;
14021 : else if (code == 'g')
14022 : msize = 64;
14023 : else
14024 200906834 : msize = GET_MODE_SIZE (GET_MODE (x));
14025 :
14026 117557300 : regno = REGNO (x);
14027 :
14028 117557300 : if (regno == ARG_POINTER_REGNUM
14029 117557300 : || regno == FRAME_POINTER_REGNUM
14030 117557300 : || regno == FPSR_REG)
14031 : {
14032 0 : output_operand_lossage
14033 0 : ("invalid use of register '%s'", reg_names[regno]);
14034 0 : return;
14035 : }
14036 117557300 : else if (regno == FLAGS_REG)
14037 : {
14038 1 : output_operand_lossage ("invalid use of asm flag output");
14039 1 : return;
14040 : }
14041 :
14042 117557299 : if (code == 'V')
14043 : {
14044 1 : if (GENERAL_REGNO_P (regno))
14045 2 : msize = GET_MODE_SIZE (word_mode);
14046 : else
14047 0 : error ("%<V%> modifier on non-integer register");
14048 : }
14049 :
14050 117557299 : duplicated = code == 'd' && TARGET_AVX;
14051 :
14052 117557299 : switch (msize)
14053 : {
14054 78101021 : case 16:
14055 78101021 : case 12:
14056 78101021 : case 8:
14057 146139931 : if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
14058 5 : warning (0, "unsupported size for integer register");
14059 : /* FALLTHRU */
14060 114123371 : case 4:
14061 114123371 : if (LEGACY_INT_REGNO_P (regno))
14062 123395879 : putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
14063 : /* FALLTHRU */
14064 115013775 : case 2:
14065 22267992 : normal:
14066 115013775 : reg = hi_reg_name[regno];
14067 115013775 : break;
14068 2280182 : case 1:
14069 2280182 : if (regno >= ARRAY_SIZE (qi_reg_name))
14070 274566 : goto normal;
14071 2005616 : if (!ANY_QI_REGNO_P (regno))
14072 0 : error ("unsupported size for integer register");
14073 2005616 : reg = qi_reg_name[regno];
14074 2005616 : break;
14075 27049 : case 0:
14076 27049 : if (regno >= ARRAY_SIZE (qi_high_reg_name))
14077 0 : goto normal;
14078 27049 : reg = qi_high_reg_name[regno];
14079 27049 : break;
14080 510859 : case 32:
14081 510859 : case 64:
14082 510859 : if (SSE_REGNO_P (regno))
14083 : {
14084 510859 : gcc_assert (!duplicated);
14085 715875 : putc (msize == 32 ? 'y' : 'z', file);
14086 510859 : reg = hi_reg_name[regno] + 1;
14087 510859 : break;
14088 : }
14089 0 : goto normal;
14090 0 : default:
14091 0 : gcc_unreachable ();
14092 : }
14093 :
14094 117557299 : fputs (reg, file);
14095 :
14096 : /* Irritatingly, AMD extended registers use
14097 : different naming convention: "r%d[bwd]" */
14098 117557299 : if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
14099 : {
14100 10477374 : gcc_assert (TARGET_64BIT);
14101 10477374 : switch (msize)
14102 : {
14103 0 : case 0:
14104 0 : error ("extended registers have no high halves");
14105 0 : break;
14106 182530 : case 1:
14107 182530 : putc ('b', file);
14108 182530 : break;
14109 28302 : case 2:
14110 28302 : putc ('w', file);
14111 28302 : break;
14112 2542511 : case 4:
14113 2542511 : putc ('d', file);
14114 2542511 : break;
14115 : case 8:
14116 : /* no suffix */
14117 : break;
14118 0 : default:
14119 0 : error ("unsupported operand size for extended register");
14120 0 : break;
14121 : }
14122 10477374 : return;
14123 : }
14124 :
14125 107079925 : if (duplicated)
14126 : {
14127 16877 : if (ASSEMBLER_DIALECT == ASM_ATT)
14128 16856 : fprintf (file, ", %%%s", reg);
14129 : else
14130 21 : fprintf (file, ", %s", reg);
14131 : }
14132 : }
14133 :
14134 : /* Meaning of CODE:
14135 : L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14136 : C -- print opcode suffix for set/cmov insn.
14137 : c -- like C, but print reversed condition
14138 : F,f -- likewise, but for floating-point.
14139 : O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14140 : otherwise nothing
14141 : R -- print embedded rounding and sae.
14142 : r -- print only sae.
14143 : z -- print the opcode suffix for the size of the current operand.
14144 : Z -- likewise, with special suffixes for x87 instructions.
14145 : * -- print a star (in certain assembler syntax)
14146 : A -- print an absolute memory reference.
14147 : E -- print address with DImode register names if TARGET_64BIT.
14148 : w -- print the operand as if it's a "word" (HImode) even if it isn't.
14149 : s -- print a shift double count, followed by the assemblers argument
14150 : delimiter.
14151 : b -- print the QImode name of the register for the indicated operand.
14152 : %b0 would print %al if operands[0] is reg 0.
14153 : w -- likewise, print the HImode name of the register.
14154 : k -- likewise, print the SImode name of the register.
14155 : q -- likewise, print the DImode name of the register.
14156 : x -- likewise, print the V4SFmode name of the register.
14157 : t -- likewise, print the V8SFmode name of the register.
14158 : g -- likewise, print the V16SFmode name of the register.
14159 : h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14160 : y -- print "st(0)" instead of "st" as a register.
14161 : d -- print duplicated register operand for AVX instruction.
14162 : D -- print condition for SSE cmp instruction.
14163 : P -- if PIC, print an @PLT suffix. For -fno-plt, load function
14164 : address from GOT.
14165 : p -- print raw symbol name.
14166 : X -- don't print any sort of PIC '@' suffix for a symbol.
14167 : & -- print some in-use local-dynamic symbol name.
14168 : H -- print a memory address offset by 8; used for sse high-parts
14169 : Y -- print condition for XOP pcom* instruction.
14170 : V -- print naked full integer register name without %.
14171 : v -- print segment override prefix
14172 : + -- print a branch hint as 'cs' or 'ds' prefix
14173 : ; -- print a semicolon (after prefixes due to bug in older gas).
14174 : ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14175 : ^ -- print addr32 prefix if Pmode != word_mode
14176 : M -- print addr32 prefix for TARGET_X32 with VSIB address.
14177 : ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
14178 : N -- print maskz if it's constant 0 operand.
14179 : G -- print embedded flag for ccmp/ctest.
14180 : */
14181 :
14182 : void
14183 176999509 : ix86_print_operand (FILE *file, rtx x, int code)
14184 : {
14185 177189503 : if (code)
14186 : {
14187 62151842 : switch (code)
14188 : {
14189 189990 : case 'A':
14190 189990 : switch (ASSEMBLER_DIALECT)
14191 : {
14192 189990 : case ASM_ATT:
14193 189990 : putc ('*', file);
14194 189990 : break;
14195 :
14196 0 : case ASM_INTEL:
14197 : /* Intel syntax. For absolute addresses, registers should not
14198 : be surrounded by braces. */
14199 0 : if (!REG_P (x))
14200 : {
14201 0 : putc ('[', file);
14202 0 : ix86_print_operand (file, x, 0);
14203 0 : putc (']', file);
14204 0 : return;
14205 : }
14206 : break;
14207 :
14208 0 : default:
14209 0 : gcc_unreachable ();
14210 : }
14211 :
14212 189990 : ix86_print_operand (file, x, 0);
14213 189990 : return;
14214 :
14215 3557388 : case 'E':
14216 : /* Wrap address in an UNSPEC to declare special handling. */
14217 3557388 : if (TARGET_64BIT)
14218 3072316 : x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14219 :
14220 3557388 : output_address (VOIDmode, x);
14221 3557388 : return;
14222 :
14223 0 : case 'L':
14224 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14225 0 : putc ('l', file);
14226 0 : return;
14227 :
14228 0 : case 'W':
14229 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14230 0 : putc ('w', file);
14231 0 : return;
14232 :
14233 0 : case 'B':
14234 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14235 0 : putc ('b', file);
14236 0 : return;
14237 :
14238 0 : case 'Q':
14239 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14240 0 : putc ('l', file);
14241 0 : return;
14242 :
14243 0 : case 'S':
14244 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14245 0 : putc ('s', file);
14246 0 : return;
14247 :
14248 0 : case 'T':
14249 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14250 0 : putc ('t', file);
14251 0 : return;
14252 :
14253 : case 'O':
14254 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14255 : if (ASSEMBLER_DIALECT != ASM_ATT)
14256 : return;
14257 :
14258 : switch (GET_MODE_SIZE (GET_MODE (x)))
14259 : {
14260 : case 2:
14261 : putc ('w', file);
14262 : break;
14263 :
14264 : case 4:
14265 : putc ('l', file);
14266 : break;
14267 :
14268 : case 8:
14269 : putc ('q', file);
14270 : break;
14271 :
14272 : default:
14273 : output_operand_lossage ("invalid operand size for operand "
14274 : "code 'O'");
14275 : return;
14276 : }
14277 :
14278 : putc ('.', file);
14279 : #endif
14280 : return;
14281 :
14282 38031 : case 'z':
14283 38031 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14284 : {
14285 : /* Opcodes don't get size suffixes if using Intel opcodes. */
14286 38029 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14287 : return;
14288 :
14289 76058 : switch (GET_MODE_SIZE (GET_MODE (x)))
14290 : {
14291 6 : case 1:
14292 6 : putc ('b', file);
14293 6 : return;
14294 :
14295 6 : case 2:
14296 6 : putc ('w', file);
14297 6 : return;
14298 :
14299 37534 : case 4:
14300 37534 : putc ('l', file);
14301 37534 : return;
14302 :
14303 483 : case 8:
14304 483 : putc ('q', file);
14305 483 : return;
14306 :
14307 0 : default:
14308 0 : output_operand_lossage ("invalid operand size for operand "
14309 : "code 'z'");
14310 0 : return;
14311 : }
14312 : }
14313 :
14314 2 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14315 : {
14316 1 : if (this_is_asm_operands)
14317 1 : warning_for_asm (this_is_asm_operands,
14318 : "non-integer operand used with operand code %<z%>");
14319 : else
14320 0 : warning (0, "non-integer operand used with operand code %<z%>");
14321 : }
14322 : /* FALLTHRU */
14323 :
14324 378667 : case 'Z':
14325 : /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14326 378667 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14327 : return;
14328 :
14329 378667 : if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14330 : {
14331 29314 : switch (GET_MODE_SIZE (GET_MODE (x)))
14332 : {
14333 3525 : case 2:
14334 : #ifdef HAVE_AS_IX86_FILDS
14335 3525 : putc ('s', file);
14336 : #endif
14337 3525 : return;
14338 :
14339 3941 : case 4:
14340 3941 : putc ('l', file);
14341 3941 : return;
14342 :
14343 7191 : case 8:
14344 : #ifdef HAVE_AS_IX86_FILDQ
14345 7191 : putc ('q', file);
14346 : #else
14347 : fputs ("ll", file);
14348 : #endif
14349 7191 : return;
14350 :
14351 : default:
14352 : break;
14353 : }
14354 : }
14355 364010 : else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14356 : {
14357 : /* 387 opcodes don't get size suffixes
14358 : if the operands are registers. */
14359 364008 : if (STACK_REG_P (x))
14360 : return;
14361 :
14362 683488 : switch (GET_MODE_SIZE (GET_MODE (x)))
14363 : {
14364 23317 : case 4:
14365 23317 : putc ('s', file);
14366 23317 : return;
14367 :
14368 32727 : case 8:
14369 32727 : putc ('l', file);
14370 32727 : return;
14371 :
14372 285698 : case 12:
14373 285698 : case 16:
14374 285698 : putc ('t', file);
14375 285698 : return;
14376 :
14377 : default:
14378 : break;
14379 : }
14380 : }
14381 : else
14382 : {
14383 2 : output_operand_lossage ("invalid operand type used with "
14384 : "operand code '%c'", code);
14385 2 : return;
14386 : }
14387 :
14388 2 : output_operand_lossage ("invalid operand size for operand code '%c'",
14389 : code);
14390 2 : return;
14391 :
14392 : case 'd':
14393 : case 'b':
14394 : case 'w':
14395 : case 'k':
14396 : case 'q':
14397 : case 'h':
14398 : case 't':
14399 : case 'g':
14400 : case 'y':
14401 : case 'x':
14402 : case 'X':
14403 : case 'P':
14404 : case 'p':
14405 : case 'V':
14406 : break;
14407 :
14408 0 : case 's':
14409 0 : if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14410 : {
14411 0 : ix86_print_operand (file, x, 0);
14412 0 : fputs (", ", file);
14413 : }
14414 0 : return;
14415 :
14416 494 : case 'Y':
14417 494 : switch (GET_CODE (x))
14418 : {
14419 182 : case NE:
14420 182 : fputs ("neq", file);
14421 182 : break;
14422 32 : case EQ:
14423 32 : fputs ("eq", file);
14424 32 : break;
14425 64 : case GE:
14426 64 : case GEU:
14427 64 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14428 64 : break;
14429 40 : case GT:
14430 40 : case GTU:
14431 40 : fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14432 40 : break;
14433 64 : case LE:
14434 64 : case LEU:
14435 64 : fputs ("le", file);
14436 64 : break;
14437 112 : case LT:
14438 112 : case LTU:
14439 112 : fputs ("lt", file);
14440 112 : break;
14441 0 : case UNORDERED:
14442 0 : fputs ("unord", file);
14443 0 : break;
14444 0 : case ORDERED:
14445 0 : fputs ("ord", file);
14446 0 : break;
14447 0 : case UNEQ:
14448 0 : fputs ("ueq", file);
14449 0 : break;
14450 0 : case UNGE:
14451 0 : fputs ("nlt", file);
14452 0 : break;
14453 0 : case UNGT:
14454 0 : fputs ("nle", file);
14455 0 : break;
14456 0 : case UNLE:
14457 0 : fputs ("ule", file);
14458 0 : break;
14459 0 : case UNLT:
14460 0 : fputs ("ult", file);
14461 0 : break;
14462 0 : case LTGT:
14463 0 : fputs ("une", file);
14464 0 : break;
14465 0 : default:
14466 0 : output_operand_lossage ("operand is not a condition code, "
14467 : "invalid operand code 'Y'");
14468 0 : return;
14469 : }
14470 494 : return;
14471 :
14472 9327 : case 'D':
14473 : /* Little bit of braindamage here. The SSE compare instructions
14474 : does use completely different names for the comparisons that the
14475 : fp conditional moves. */
14476 9327 : switch (GET_CODE (x))
14477 : {
14478 3 : case UNEQ:
14479 3 : if (TARGET_AVX)
14480 : {
14481 3 : fputs ("eq_us", file);
14482 3 : break;
14483 : }
14484 : /* FALLTHRU */
14485 4635 : case EQ:
14486 4635 : fputs ("eq", file);
14487 4635 : break;
14488 0 : case UNLT:
14489 0 : if (TARGET_AVX)
14490 : {
14491 0 : fputs ("nge", file);
14492 0 : break;
14493 : }
14494 : /* FALLTHRU */
14495 1628 : case LT:
14496 1628 : fputs ("lt", file);
14497 1628 : break;
14498 0 : case UNLE:
14499 0 : if (TARGET_AVX)
14500 : {
14501 0 : fputs ("ngt", file);
14502 0 : break;
14503 : }
14504 : /* FALLTHRU */
14505 795 : case LE:
14506 795 : fputs ("le", file);
14507 795 : break;
14508 95 : case UNORDERED:
14509 95 : fputs ("unord", file);
14510 95 : break;
14511 24 : case LTGT:
14512 24 : if (TARGET_AVX)
14513 : {
14514 24 : fputs ("neq_oq", file);
14515 24 : break;
14516 : }
14517 : /* FALLTHRU */
14518 893 : case NE:
14519 893 : fputs ("neq", file);
14520 893 : break;
14521 0 : case GE:
14522 0 : if (TARGET_AVX)
14523 : {
14524 0 : fputs ("ge", file);
14525 0 : break;
14526 : }
14527 : /* FALLTHRU */
14528 403 : case UNGE:
14529 403 : fputs ("nlt", file);
14530 403 : break;
14531 0 : case GT:
14532 0 : if (TARGET_AVX)
14533 : {
14534 0 : fputs ("gt", file);
14535 0 : break;
14536 : }
14537 : /* FALLTHRU */
14538 768 : case UNGT:
14539 768 : fputs ("nle", file);
14540 768 : break;
14541 83 : case ORDERED:
14542 83 : fputs ("ord", file);
14543 83 : break;
14544 0 : default:
14545 0 : output_operand_lossage ("operand is not a condition code, "
14546 : "invalid operand code 'D'");
14547 0 : return;
14548 : }
14549 9327 : return;
14550 :
14551 7141600 : case 'F':
14552 7141600 : case 'f':
14553 : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14554 : if (ASSEMBLER_DIALECT == ASM_ATT)
14555 : putc ('.', file);
14556 : gcc_fallthrough ();
14557 : #endif
14558 :
14559 7141600 : case 'C':
14560 7141600 : case 'c':
14561 7141600 : if (!COMPARISON_P (x))
14562 : {
14563 0 : output_operand_lossage ("operand is not a condition code, "
14564 : "invalid operand code '%c'", code);
14565 0 : return;
14566 : }
14567 7141600 : put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14568 7141600 : code == 'c' || code == 'f',
14569 7141600 : code == 'F' || code == 'f',
14570 : file);
14571 7141600 : return;
14572 :
14573 21 : case 'G':
14574 21 : {
14575 21 : int dfv = INTVAL (x);
14576 21 : const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14577 21 : fputs (dfv_suffix, file);
14578 : }
14579 21 : return;
14580 :
14581 1434 : case 'H':
14582 1434 : if (!offsettable_memref_p (x))
14583 : {
14584 1 : output_operand_lossage ("operand is not an offsettable memory "
14585 : "reference, invalid operand code 'H'");
14586 1 : return;
14587 : }
14588 : /* It doesn't actually matter what mode we use here, as we're
14589 : only going to use this for printing. */
14590 1433 : x = adjust_address_nv (x, DImode, 8);
14591 : /* Output 'qword ptr' for intel assembler dialect. */
14592 1433 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14593 0 : code = 'q';
14594 : break;
14595 :
14596 75584 : case 'K':
14597 75584 : if (!CONST_INT_P (x))
14598 : {
14599 1 : output_operand_lossage ("operand is not an integer, invalid "
14600 : "operand code 'K'");
14601 1 : return;
14602 : }
14603 :
14604 75583 : if (INTVAL (x) & IX86_HLE_ACQUIRE)
14605 : #ifdef HAVE_AS_IX86_HLE
14606 22 : fputs ("xacquire ", file);
14607 : #else
14608 : fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14609 : #endif
14610 75561 : else if (INTVAL (x) & IX86_HLE_RELEASE)
14611 : #ifdef HAVE_AS_IX86_HLE
14612 24 : fputs ("xrelease ", file);
14613 : #else
14614 : fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14615 : #endif
14616 : /* We do not want to print value of the operand. */
14617 75583 : return;
14618 :
14619 43036 : case 'N':
14620 43036 : if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14621 15485 : fputs ("{z}", file);
14622 43036 : return;
14623 :
14624 4008 : case 'r':
14625 4008 : if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14626 : {
14627 2 : output_operand_lossage ("operand is not a specific integer, "
14628 : "invalid operand code 'r'");
14629 2 : return;
14630 : }
14631 :
14632 4006 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14633 1 : fputs (", ", file);
14634 :
14635 4006 : fputs ("{sae}", file);
14636 :
14637 4006 : if (ASSEMBLER_DIALECT == ASM_ATT)
14638 4005 : fputs (", ", file);
14639 :
14640 4006 : return;
14641 :
14642 5988 : case 'R':
14643 5988 : if (!CONST_INT_P (x))
14644 : {
14645 1 : output_operand_lossage ("operand is not an integer, invalid "
14646 : "operand code 'R'");
14647 1 : return;
14648 : }
14649 :
14650 5987 : if (ASSEMBLER_DIALECT == ASM_INTEL)
14651 6 : fputs (", ", file);
14652 :
14653 5987 : switch (INTVAL (x))
14654 : {
14655 5172 : case ROUND_NEAREST_INT | ROUND_SAE:
14656 5172 : fputs ("{rn-sae}", file);
14657 5172 : break;
14658 637 : case ROUND_NEG_INF | ROUND_SAE:
14659 637 : fputs ("{rd-sae}", file);
14660 637 : break;
14661 56 : case ROUND_POS_INF | ROUND_SAE:
14662 56 : fputs ("{ru-sae}", file);
14663 56 : break;
14664 121 : case ROUND_ZERO | ROUND_SAE:
14665 121 : fputs ("{rz-sae}", file);
14666 121 : break;
14667 1 : default:
14668 1 : output_operand_lossage ("operand is not a specific integer, "
14669 : "invalid operand code 'R'");
14670 : }
14671 :
14672 5987 : if (ASSEMBLER_DIALECT == ASM_ATT)
14673 5981 : fputs (", ", file);
14674 :
14675 5987 : return;
14676 :
14677 10522 : case 'v':
14678 10522 : if (MEM_P (x))
14679 : {
14680 10641 : switch (MEM_ADDR_SPACE (x))
14681 : {
14682 : case ADDR_SPACE_GENERIC:
14683 : break;
14684 0 : case ADDR_SPACE_SEG_FS:
14685 0 : fputs ("fs ", file);
14686 0 : break;
14687 0 : case ADDR_SPACE_SEG_GS:
14688 0 : fputs ("gs ", file);
14689 0 : break;
14690 0 : default:
14691 0 : gcc_unreachable ();
14692 : }
14693 : }
14694 : else
14695 0 : output_operand_lossage ("operand is not a memory reference, "
14696 : "invalid operand code 'v'");
14697 10522 : return;
14698 :
14699 0 : case '*':
14700 0 : if (ASSEMBLER_DIALECT == ASM_ATT)
14701 0 : putc ('*', file);
14702 0 : return;
14703 :
14704 202 : case '&':
14705 202 : {
14706 202 : const char *name = get_some_local_dynamic_name ();
14707 202 : if (name == NULL)
14708 1 : output_operand_lossage ("'%%&' used without any "
14709 : "local dynamic TLS references");
14710 : else
14711 201 : assemble_name (file, name);
14712 202 : return;
14713 : }
14714 :
14715 6492266 : case '+':
14716 6492266 : {
14717 6492266 : rtx x;
14718 :
14719 6492266 : if (!optimize
14720 5076378 : || optimize_function_for_size_p (cfun)
14721 11380393 : || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14722 4888127 : && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14723 6492266 : return;
14724 :
14725 0 : x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14726 0 : if (x)
14727 : {
14728 0 : int pred_val = profile_probability::from_reg_br_prob_note
14729 0 : (XINT (x, 0)).to_reg_br_prob_base ();
14730 :
14731 0 : bool taken = pred_val > REG_BR_PROB_BASE / 2;
14732 : /* We use 3e (DS) prefix for taken branches and
14733 : 2e (CS) prefix for not taken branches. */
14734 0 : if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14735 0 : fputs ("ds ; ", file);
14736 0 : else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14737 0 : fputs ("cs ; ", file);
14738 : }
14739 0 : return;
14740 : }
14741 :
14742 : case ';':
14743 : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14744 : putc (';', file);
14745 : #endif
14746 : return;
14747 :
14748 3391 : case '~':
14749 3391 : putc (TARGET_AVX2 ? 'i' : 'f', file);
14750 3391 : return;
14751 :
14752 1675 : case 'M':
14753 1675 : if (TARGET_X32)
14754 : {
14755 : /* NB: 32-bit indices in VSIB address are sign-extended
14756 : to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14757 : sign-extended to 0xfffffffff7fa3010 which is invalid
14758 : address. Add addr32 prefix if there is no base
14759 : register nor symbol. */
14760 40 : bool ok;
14761 40 : struct ix86_address parts;
14762 40 : ok = ix86_decompose_address (x, &parts);
14763 40 : gcc_assert (ok && parts.index == NULL_RTX);
14764 40 : if (parts.base == NULL_RTX
14765 40 : && (parts.disp == NULL_RTX
14766 34 : || !symbolic_operand (parts.disp,
14767 34 : GET_MODE (parts.disp))))
14768 34 : fputs ("addr32 ", file);
14769 : }
14770 1675 : return;
14771 :
14772 22277 : case '^':
14773 25470 : if (Pmode != word_mode)
14774 0 : fputs ("addr32 ", file);
14775 22277 : return;
14776 :
14777 14849028 : case '!':
14778 14849028 : if (ix86_notrack_prefixed_insn_p (current_output_insn))
14779 3776 : fputs ("notrack ", file);
14780 14849028 : return;
14781 :
14782 1 : default:
14783 1 : output_operand_lossage ("invalid operand code '%c'", code);
14784 : }
14785 : }
14786 :
14787 143964090 : if (REG_P (x))
14788 85745866 : print_reg (x, code, file);
14789 :
14790 58218224 : else if (MEM_P (x))
14791 : {
14792 33360734 : rtx addr = XEXP (x, 0);
14793 :
14794 : /* No `byte ptr' prefix for call instructions ... */
14795 33360734 : if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14796 : {
14797 321 : machine_mode mode = GET_MODE (x);
14798 321 : const char *size;
14799 :
14800 : /* Check for explicit size override codes. */
14801 321 : if (code == 'b')
14802 : size = "BYTE";
14803 : else if (code == 'w')
14804 : size = "WORD";
14805 : else if (code == 'k')
14806 : size = "DWORD";
14807 : else if (code == 'q')
14808 : size = "QWORD";
14809 : else if (code == 'x')
14810 : size = "XMMWORD";
14811 : else if (code == 't')
14812 : size = "YMMWORD";
14813 : else if (code == 'g')
14814 : size = "ZMMWORD";
14815 235 : else if (mode == BLKmode)
14816 : /* ... or BLKmode operands, when not overridden. */
14817 : size = NULL;
14818 : else
14819 466 : switch (GET_MODE_SIZE (mode))
14820 : {
14821 : case 1: size = "BYTE"; break;
14822 : case 2: size = "WORD"; break;
14823 : case 4: size = "DWORD"; break;
14824 : case 8: size = "QWORD"; break;
14825 : case 12: size = "TBYTE"; break;
14826 7 : case 16:
14827 7 : if (mode == XFmode)
14828 : size = "TBYTE";
14829 : else
14830 : size = "XMMWORD";
14831 : break;
14832 : case 32: size = "YMMWORD"; break;
14833 : case 64: size = "ZMMWORD"; break;
14834 0 : default:
14835 0 : gcc_unreachable ();
14836 : }
14837 : if (size)
14838 : {
14839 319 : fputs (size, file);
14840 319 : fputs (" PTR ", file);
14841 : }
14842 : }
14843 :
14844 33360734 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14845 0 : output_operand_lossage ("invalid constraints for operand");
14846 : else
14847 33360734 : ix86_print_operand_address_as
14848 34032351 : (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14849 : }
14850 :
14851 24857490 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14852 : {
14853 762 : long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14854 762 : REAL_MODE_FORMAT (HFmode));
14855 762 : if (ASSEMBLER_DIALECT == ASM_ATT)
14856 762 : putc ('$', file);
14857 762 : fprintf (file, "0x%04x", (unsigned int) l);
14858 762 : }
14859 :
14860 24856728 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14861 : {
14862 20624 : long l;
14863 :
14864 20624 : REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14865 :
14866 20624 : if (ASSEMBLER_DIALECT == ASM_ATT)
14867 20624 : putc ('$', file);
14868 : /* Sign extend 32bit SFmode immediate to 8 bytes. */
14869 20624 : if (code == 'q')
14870 327 : fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14871 : (unsigned long long) (int) l);
14872 : else
14873 20297 : fprintf (file, "0x%08x", (unsigned int) l);
14874 : }
14875 :
14876 24836104 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14877 : {
14878 3278 : long l[2];
14879 :
14880 3278 : REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14881 :
14882 3278 : if (ASSEMBLER_DIALECT == ASM_ATT)
14883 3278 : putc ('$', file);
14884 3278 : fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14885 3278 : }
14886 :
14887 : /* These float cases don't actually occur as immediate operands. */
14888 24832826 : else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14889 : {
14890 0 : char dstr[30];
14891 :
14892 0 : real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14893 0 : fputs (dstr, file);
14894 0 : }
14895 :
14896 : /* Print bcst_mem_operand. */
14897 24832826 : else if (GET_CODE (x) == VEC_DUPLICATE)
14898 : {
14899 313 : machine_mode vmode = GET_MODE (x);
14900 : /* Must be bcst_memory_operand. */
14901 313 : gcc_assert (bcst_mem_operand (x, vmode));
14902 :
14903 313 : rtx mem = XEXP (x,0);
14904 313 : ix86_print_operand (file, mem, 0);
14905 :
14906 313 : switch (vmode)
14907 : {
14908 28 : case E_V2DImode:
14909 28 : case E_V2DFmode:
14910 28 : fputs ("{1to2}", file);
14911 28 : break;
14912 74 : case E_V4SImode:
14913 74 : case E_V4SFmode:
14914 74 : case E_V4DImode:
14915 74 : case E_V4DFmode:
14916 74 : fputs ("{1to4}", file);
14917 74 : break;
14918 93 : case E_V8SImode:
14919 93 : case E_V8SFmode:
14920 93 : case E_V8DFmode:
14921 93 : case E_V8DImode:
14922 93 : case E_V8HFmode:
14923 93 : fputs ("{1to8}", file);
14924 93 : break;
14925 110 : case E_V16SFmode:
14926 110 : case E_V16SImode:
14927 110 : case E_V16HFmode:
14928 110 : fputs ("{1to16}", file);
14929 110 : break;
14930 8 : case E_V32HFmode:
14931 8 : fputs ("{1to32}", file);
14932 8 : break;
14933 0 : default:
14934 0 : gcc_unreachable ();
14935 : }
14936 : }
14937 :
14938 : else
14939 : {
14940 : /* We have patterns that allow zero sets of memory, for instance.
14941 : In 64-bit mode, we should probably support all 8-byte vectors,
14942 : since we can in fact encode that into an immediate. */
14943 24832513 : if (CONST_VECTOR_P (x))
14944 : {
14945 3264 : if (x != CONST0_RTX (GET_MODE (x)))
14946 2 : output_operand_lossage ("invalid vector immediate");
14947 3264 : x = const0_rtx;
14948 : }
14949 :
14950 24832513 : if (code == 'P')
14951 : {
14952 5941241 : if (ix86_force_load_from_GOT_p (x, true))
14953 : {
14954 : /* For inline assembly statement, load function address
14955 : from GOT with 'P' operand modifier to avoid PLT. */
14956 4 : x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14957 : (TARGET_64BIT
14958 : ? UNSPEC_GOTPCREL
14959 : : UNSPEC_GOT));
14960 4 : x = gen_rtx_CONST (Pmode, x);
14961 4 : x = gen_const_mem (Pmode, x);
14962 4 : ix86_print_operand (file, x, 'A');
14963 4 : return;
14964 : }
14965 : }
14966 18891272 : else if (code != 'p')
14967 : {
14968 18891163 : if (CONST_INT_P (x))
14969 : {
14970 15605835 : if (ASSEMBLER_DIALECT == ASM_ATT)
14971 15605607 : putc ('$', file);
14972 : }
14973 3285328 : else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
14974 9390 : || LABEL_REF_P (x))
14975 : {
14976 3285326 : if (ASSEMBLER_DIALECT == ASM_ATT)
14977 3285302 : putc ('$', file);
14978 : else
14979 24 : fputs ("OFFSET FLAT:", file);
14980 : }
14981 : }
14982 24832509 : if (CONST_INT_P (x))
14983 15605921 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14984 9226588 : else if (flag_pic || MACHOPIC_INDIRECT)
14985 529823 : output_pic_addr_const (file, x, code);
14986 : else
14987 8696765 : output_addr_const (file, x);
14988 : }
14989 : }
14990 :
14991 : static bool
14992 21447618 : ix86_print_operand_punct_valid_p (unsigned char code)
14993 : {
14994 21447618 : return (code == '*' || code == '+' || code == '&' || code == ';'
14995 14871305 : || code == '~' || code == '^' || code == '!');
14996 : }
14997 :
14998 : /* Print a memory operand whose address is ADDR. */
14999 :
15000 : static void
15001 36920388 : ix86_print_operand_address_as (FILE *file, rtx addr,
15002 : addr_space_t as, bool raw)
15003 : {
15004 36920388 : struct ix86_address parts;
15005 36920388 : rtx base, index, disp;
15006 36920388 : int scale;
15007 36920388 : int ok;
15008 36920388 : bool vsib = false;
15009 36920388 : int code = 0;
15010 :
15011 36920388 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15012 : {
15013 1675 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15014 1675 : gcc_assert (parts.index == NULL_RTX);
15015 1675 : parts.index = XVECEXP (addr, 0, 1);
15016 1675 : parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15017 1675 : addr = XVECEXP (addr, 0, 0);
15018 1675 : vsib = true;
15019 : }
15020 36918713 : else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15021 : {
15022 3072316 : gcc_assert (TARGET_64BIT);
15023 3072316 : ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15024 3072316 : code = 'q';
15025 : }
15026 : else
15027 33846397 : ok = ix86_decompose_address (addr, &parts);
15028 :
15029 36920388 : gcc_assert (ok);
15030 :
15031 36920388 : base = parts.base;
15032 36920388 : index = parts.index;
15033 36920388 : disp = parts.disp;
15034 36920388 : scale = parts.scale;
15035 :
15036 36920388 : if (ADDR_SPACE_GENERIC_P (as))
15037 36638603 : as = parts.seg;
15038 : else
15039 281785 : gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
15040 :
15041 36920388 : if (!ADDR_SPACE_GENERIC_P (as) && !raw)
15042 : {
15043 281800 : if (ASSEMBLER_DIALECT == ASM_ATT)
15044 281798 : putc ('%', file);
15045 :
15046 281800 : switch (as)
15047 : {
15048 182111 : case ADDR_SPACE_SEG_FS:
15049 182111 : fputs ("fs:", file);
15050 182111 : break;
15051 99689 : case ADDR_SPACE_SEG_GS:
15052 99689 : fputs ("gs:", file);
15053 99689 : break;
15054 0 : default:
15055 0 : gcc_unreachable ();
15056 : }
15057 : }
15058 :
15059 : /* Use one byte shorter RIP relative addressing for 64bit mode. */
15060 36920388 : if (TARGET_64BIT && !base && !index && !raw)
15061 : {
15062 6008308 : rtx symbol = disp;
15063 :
15064 6008308 : if (GET_CODE (disp) == CONST
15065 2184738 : && GET_CODE (XEXP (disp, 0)) == PLUS
15066 2099597 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15067 2099597 : symbol = XEXP (XEXP (disp, 0), 0);
15068 :
15069 6008308 : if (LABEL_REF_P (symbol)
15070 6008308 : || (SYMBOL_REF_P (symbol)
15071 5746959 : && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15072 5746829 : base = pc_rtx;
15073 : }
15074 :
15075 36920388 : if (!base && !index)
15076 : {
15077 : /* Displacement only requires special attention. */
15078 601038 : if (CONST_INT_P (disp))
15079 : {
15080 269320 : if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
15081 0 : fputs ("ds:", file);
15082 269320 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15083 : }
15084 : /* Load the external function address via the GOT slot to avoid PLT. */
15085 331718 : else if (GET_CODE (disp) == CONST
15086 113439 : && GET_CODE (XEXP (disp, 0)) == UNSPEC
15087 85379 : && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
15088 9571 : || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
15089 407526 : && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15090 24 : output_pic_addr_const (file, disp, 0);
15091 331694 : else if (flag_pic)
15092 114810 : output_pic_addr_const (file, disp, 0);
15093 : else
15094 216884 : output_addr_const (file, disp);
15095 : }
15096 : else
15097 : {
15098 : /* Print SImode register names to force addr32 prefix. */
15099 36319350 : if (SImode_address_operand (addr, VOIDmode))
15100 : {
15101 37 : if (flag_checking)
15102 : {
15103 37 : gcc_assert (TARGET_64BIT);
15104 37 : switch (GET_CODE (addr))
15105 : {
15106 0 : case SUBREG:
15107 0 : gcc_assert (GET_MODE (addr) == SImode);
15108 0 : gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15109 : break;
15110 37 : case ZERO_EXTEND:
15111 37 : case AND:
15112 37 : gcc_assert (GET_MODE (addr) == DImode);
15113 : break;
15114 0 : default:
15115 0 : gcc_unreachable ();
15116 : }
15117 : }
15118 37 : gcc_assert (!code);
15119 : code = 'k';
15120 : }
15121 36319313 : else if (code == 0
15122 33248629 : && TARGET_X32
15123 482 : && disp
15124 410 : && CONST_INT_P (disp)
15125 311 : && INTVAL (disp) < -16*1024*1024)
15126 : {
15127 : /* X32 runs in 64-bit mode, where displacement, DISP, in
15128 : address DISP(%r64), is encoded as 32-bit immediate sign-
15129 : extended from 32-bit to 64-bit. For -0x40000300(%r64),
15130 : address is %r64 + 0xffffffffbffffd00. When %r64 <
15131 : 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15132 : which is invalid for x32. The correct address is %r64
15133 : - 0x40000300 == 0xf7ffdd64. To properly encode
15134 : -0x40000300(%r64) for x32, we zero-extend negative
15135 : displacement by forcing addr32 prefix which truncates
15136 : 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15137 : zero-extend all negative displacements, including -1(%rsp).
15138 : However, for small negative displacements, sign-extension
15139 : won't cause overflow. We only zero-extend negative
15140 : displacements if they < -16*1024*1024, which is also used
15141 : to check legitimate address displacements for PIC. */
15142 38 : code = 'k';
15143 : }
15144 :
15145 : /* Since the upper 32 bits of RSP are always zero for x32,
15146 : we can encode %esp as %rsp to avoid 0x67 prefix if
15147 : there is no index register. */
15148 976 : if (TARGET_X32 && Pmode == SImode
15149 36319754 : && !index && base && REG_P (base) && REGNO (base) == SP_REG)
15150 : code = 'q';
15151 :
15152 36319350 : if (ASSEMBLER_DIALECT == ASM_ATT)
15153 : {
15154 36318980 : if (disp)
15155 : {
15156 32251830 : if (flag_pic)
15157 2829259 : output_pic_addr_const (file, disp, 0);
15158 29422571 : else if (LABEL_REF_P (disp))
15159 5157 : output_asm_label (disp);
15160 : else
15161 29417414 : output_addr_const (file, disp);
15162 : }
15163 :
15164 36318980 : putc ('(', file);
15165 36318980 : if (base)
15166 35901870 : print_reg (base, code, file);
15167 36318980 : if (index)
15168 : {
15169 1945890 : putc (',', file);
15170 3890153 : print_reg (index, vsib ? 0 : code, file);
15171 1945890 : if (scale != 1 || vsib)
15172 1031319 : fprintf (file, ",%d", scale);
15173 : }
15174 36318980 : putc (')', file);
15175 : }
15176 : else
15177 : {
15178 370 : rtx offset = NULL_RTX;
15179 :
15180 370 : if (disp)
15181 : {
15182 : /* Pull out the offset of a symbol; print any symbol itself. */
15183 290 : if (GET_CODE (disp) == CONST
15184 18 : && GET_CODE (XEXP (disp, 0)) == PLUS
15185 18 : && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15186 : {
15187 18 : offset = XEXP (XEXP (disp, 0), 1);
15188 18 : disp = gen_rtx_CONST (VOIDmode,
15189 : XEXP (XEXP (disp, 0), 0));
15190 : }
15191 :
15192 290 : if (flag_pic)
15193 0 : output_pic_addr_const (file, disp, 0);
15194 290 : else if (LABEL_REF_P (disp))
15195 0 : output_asm_label (disp);
15196 290 : else if (CONST_INT_P (disp))
15197 : offset = disp;
15198 : else
15199 123 : output_addr_const (file, disp);
15200 : }
15201 :
15202 370 : putc ('[', file);
15203 370 : if (base)
15204 : {
15205 329 : print_reg (base, code, file);
15206 329 : if (offset)
15207 : {
15208 185 : if (INTVAL (offset) >= 0)
15209 20 : putc ('+', file);
15210 185 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15211 : }
15212 : }
15213 41 : else if (offset)
15214 0 : fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15215 : else
15216 41 : putc ('0', file);
15217 :
15218 370 : if (index)
15219 : {
15220 94 : putc ('+', file);
15221 140 : print_reg (index, vsib ? 0 : code, file);
15222 94 : if (scale != 1 || vsib)
15223 92 : fprintf (file, "*%d", scale);
15224 : }
15225 370 : putc (']', file);
15226 : }
15227 : }
15228 36920388 : }
15229 :
15230 : static void
15231 3559655 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
15232 : {
15233 3559655 : if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
15234 1 : output_operand_lossage ("invalid constraints for operand");
15235 : else
15236 3559654 : ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
15237 3559655 : }
15238 :
15239 : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15240 :
15241 : static bool
15242 15368 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
15243 : {
15244 15368 : rtx op;
15245 :
15246 15368 : if (GET_CODE (x) != UNSPEC)
15247 : return false;
15248 :
15249 15368 : op = XVECEXP (x, 0, 0);
15250 15368 : switch (XINT (x, 1))
15251 : {
15252 1358 : case UNSPEC_GOTOFF:
15253 1358 : output_addr_const (file, op);
15254 1358 : fputs ("@gotoff", file);
15255 1358 : break;
15256 0 : case UNSPEC_GOTTPOFF:
15257 0 : output_addr_const (file, op);
15258 : /* FIXME: This might be @TPOFF in Sun ld. */
15259 0 : fputs ("@gottpoff", file);
15260 0 : break;
15261 0 : case UNSPEC_TPOFF:
15262 0 : output_addr_const (file, op);
15263 0 : fputs ("@tpoff", file);
15264 0 : break;
15265 10914 : case UNSPEC_NTPOFF:
15266 10914 : output_addr_const (file, op);
15267 10914 : if (TARGET_64BIT)
15268 10168 : fputs ("@tpoff", file);
15269 : else
15270 746 : fputs ("@ntpoff", file);
15271 : break;
15272 0 : case UNSPEC_DTPOFF:
15273 0 : output_addr_const (file, op);
15274 0 : fputs ("@dtpoff", file);
15275 0 : break;
15276 3095 : case UNSPEC_GOTNTPOFF:
15277 3095 : output_addr_const (file, op);
15278 3095 : if (TARGET_64BIT)
15279 3095 : fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15280 : "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15281 : else
15282 0 : fputs ("@gotntpoff", file);
15283 : break;
15284 1 : case UNSPEC_INDNTPOFF:
15285 1 : output_addr_const (file, op);
15286 1 : fputs ("@indntpoff", file);
15287 1 : break;
15288 0 : case UNSPEC_SECREL32:
15289 0 : output_addr_const (file, op);
15290 0 : fputs ("@secrel32", file);
15291 0 : break;
15292 : #if TARGET_MACHO
15293 : case UNSPEC_MACHOPIC_OFFSET:
15294 : output_addr_const (file, op);
15295 : putc ('-', file);
15296 : machopic_output_function_base_name (file);
15297 : break;
15298 : #endif
15299 :
15300 : default:
15301 : return false;
15302 : }
15303 :
15304 : return true;
15305 : }
15306 :
15307 :
15308 : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15309 : MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15310 : is the expression of the binary operation. The output may either be
15311 : emitted here, or returned to the caller, like all output_* functions.
15312 :
15313 : There is no guarantee that the operands are the same mode, as they
15314 : might be within FLOAT or FLOAT_EXTEND expressions. */
15315 :
15316 : #ifndef SYSV386_COMPAT
15317 : /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15318 : wants to fix the assemblers because that causes incompatibility
15319 : with gcc. No-one wants to fix gcc because that causes
15320 : incompatibility with assemblers... You can use the option of
15321 : -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15322 : #define SYSV386_COMPAT 1
15323 : #endif
15324 :
15325 : const char *
15326 606614 : output_387_binary_op (rtx_insn *insn, rtx *operands)
15327 : {
15328 606614 : static char buf[40];
15329 606614 : const char *p;
15330 606614 : bool is_sse
15331 606614 : = (SSE_REG_P (operands[0])
15332 661833 : || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
15333 :
15334 55219 : if (is_sse)
15335 : p = "%v";
15336 55219 : else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15337 55212 : || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15338 : p = "fi";
15339 : else
15340 606614 : p = "f";
15341 :
15342 606614 : strcpy (buf, p);
15343 :
15344 606614 : switch (GET_CODE (operands[3]))
15345 : {
15346 : case PLUS:
15347 : p = "add"; break;
15348 : case MINUS:
15349 : p = "sub"; break;
15350 94531 : case MULT:
15351 94531 : p = "mul"; break;
15352 27657 : case DIV:
15353 27657 : p = "div"; break;
15354 0 : default:
15355 0 : gcc_unreachable ();
15356 : }
15357 :
15358 606614 : strcat (buf, p);
15359 :
15360 606614 : if (is_sse)
15361 : {
15362 551395 : p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
15363 551395 : strcat (buf, p);
15364 :
15365 551395 : if (TARGET_AVX)
15366 : p = "\t{%2, %1, %0|%0, %1, %2}";
15367 : else
15368 534937 : p = "\t{%2, %0|%0, %2}";
15369 :
15370 551395 : strcat (buf, p);
15371 551395 : return buf;
15372 : }
15373 :
15374 : /* Even if we do not want to check the inputs, this documents input
15375 : constraints. Which helps in understanding the following code. */
15376 55219 : if (flag_checking)
15377 : {
15378 55218 : if (STACK_REG_P (operands[0])
15379 55218 : && ((REG_P (operands[1])
15380 53638 : && REGNO (operands[0]) == REGNO (operands[1])
15381 49647 : && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15382 5571 : || (REG_P (operands[2])
15383 5571 : && REGNO (operands[0]) == REGNO (operands[2])
15384 5571 : && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15385 110436 : && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15386 : ; /* ok */
15387 : else
15388 0 : gcc_unreachable ();
15389 : }
15390 :
15391 55219 : switch (GET_CODE (operands[3]))
15392 : {
15393 40407 : case MULT:
15394 40407 : case PLUS:
15395 40407 : if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15396 1989 : std::swap (operands[1], operands[2]);
15397 :
15398 : /* know operands[0] == operands[1]. */
15399 :
15400 40407 : if (MEM_P (operands[2]))
15401 : {
15402 : p = "%Z2\t%2";
15403 : break;
15404 : }
15405 :
15406 36048 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15407 : {
15408 21075 : if (STACK_TOP_P (operands[0]))
15409 : /* How is it that we are storing to a dead operand[2]?
15410 : Well, presumably operands[1] is dead too. We can't
15411 : store the result to st(0) as st(0) gets popped on this
15412 : instruction. Instead store to operands[2] (which I
15413 : think has to be st(1)). st(1) will be popped later.
15414 : gcc <= 2.8.1 didn't have this check and generated
15415 : assembly code that the Unixware assembler rejected. */
15416 : p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15417 : else
15418 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15419 : break;
15420 : }
15421 :
15422 14973 : if (STACK_TOP_P (operands[0]))
15423 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15424 : else
15425 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15426 : break;
15427 :
15428 14812 : case MINUS:
15429 14812 : case DIV:
15430 14812 : if (MEM_P (operands[1]))
15431 : {
15432 : p = "r%Z1\t%1";
15433 : break;
15434 : }
15435 :
15436 14376 : if (MEM_P (operands[2]))
15437 : {
15438 : p = "%Z2\t%2";
15439 : break;
15440 : }
15441 :
15442 12764 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15443 : {
15444 : #if SYSV386_COMPAT
15445 : /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15446 : derived assemblers, confusingly reverse the direction of
15447 : the operation for fsub{r} and fdiv{r} when the
15448 : destination register is not st(0). The Intel assembler
15449 : doesn't have this brain damage. Read !SYSV386_COMPAT to
15450 : figure out what the hardware really does. */
15451 6175 : if (STACK_TOP_P (operands[0]))
15452 : p = "{p\t%0, %2|rp\t%2, %0}";
15453 : else
15454 : p = "{rp\t%2, %0|p\t%0, %2}";
15455 : #else
15456 : if (STACK_TOP_P (operands[0]))
15457 : /* As above for fmul/fadd, we can't store to st(0). */
15458 : p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15459 : else
15460 : p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15461 : #endif
15462 : break;
15463 : }
15464 :
15465 6589 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15466 : {
15467 : #if SYSV386_COMPAT
15468 3076 : if (STACK_TOP_P (operands[0]))
15469 : p = "{rp\t%0, %1|p\t%1, %0}";
15470 : else
15471 : p = "{p\t%1, %0|rp\t%0, %1}";
15472 : #else
15473 : if (STACK_TOP_P (operands[0]))
15474 : p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15475 : else
15476 : p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15477 : #endif
15478 : break;
15479 : }
15480 :
15481 3513 : if (STACK_TOP_P (operands[0]))
15482 : {
15483 2670 : if (STACK_TOP_P (operands[1]))
15484 : p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15485 : else
15486 : p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15487 : break;
15488 : }
15489 843 : else if (STACK_TOP_P (operands[1]))
15490 : {
15491 : #if SYSV386_COMPAT
15492 : p = "{\t%1, %0|r\t%0, %1}";
15493 : #else
15494 : p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15495 : #endif
15496 : }
15497 : else
15498 : {
15499 : #if SYSV386_COMPAT
15500 : p = "{r\t%2, %0|\t%0, %2}";
15501 : #else
15502 : p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15503 : #endif
15504 : }
15505 : break;
15506 :
15507 0 : default:
15508 0 : gcc_unreachable ();
15509 : }
15510 :
15511 55219 : strcat (buf, p);
15512 55219 : return buf;
15513 : }
15514 :
15515 : /* Return needed mode for entity in optimize_mode_switching pass. */
15516 :
15517 : static int
15518 1656 : ix86_dirflag_mode_needed (rtx_insn *insn)
15519 : {
15520 1656 : if (CALL_P (insn))
15521 : {
15522 339 : if (cfun->machine->func_type == TYPE_NORMAL)
15523 : return X86_DIRFLAG_ANY;
15524 : else
15525 : /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15526 339 : return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15527 : }
15528 :
15529 1317 : if (recog_memoized (insn) < 0)
15530 : return X86_DIRFLAG_ANY;
15531 :
15532 1315 : if (get_attr_type (insn) == TYPE_STR)
15533 : {
15534 : /* Emit cld instruction if stringops are used in the function. */
15535 1 : if (cfun->machine->func_type == TYPE_NORMAL)
15536 0 : return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15537 : else
15538 : return X86_DIRFLAG_RESET;
15539 : }
15540 :
15541 : return X86_DIRFLAG_ANY;
15542 : }
15543 :
15544 : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15545 :
15546 : static bool
15547 2211789 : ix86_check_avx_upper_register (const_rtx exp)
15548 : {
15549 : /* construct_container may return a parallel with expr_list
15550 : which contains the real reg and mode */
15551 2211789 : subrtx_iterator::array_type array;
15552 8461493 : FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15553 : {
15554 6410803 : const_rtx x = *iter;
15555 2576503 : if (SSE_REG_P (x)
15556 832760 : && !EXT_REX_SSE_REG_P (x)
15557 8063395 : && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15558 161099 : return true;
15559 : }
15560 :
15561 2050690 : return false;
15562 2211789 : }
15563 :
15564 : /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15565 :
15566 : static void
15567 51779 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15568 : {
15569 51779 : if (SSE_REG_P (dest)
15570 12859 : && !EXT_REX_SSE_REG_P (dest)
15571 77497 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15572 : {
15573 760 : bool *used = (bool *) data;
15574 760 : *used = true;
15575 : }
15576 51779 : }
15577 :
15578 : /* Return needed mode for entity in optimize_mode_switching pass. */
15579 :
15580 : static int
15581 2065601 : ix86_avx_u128_mode_needed (rtx_insn *insn)
15582 : {
15583 2065601 : if (DEBUG_INSN_P (insn))
15584 : return AVX_U128_ANY;
15585 :
15586 2065601 : if (CALL_P (insn))
15587 : {
15588 49568 : rtx link;
15589 :
15590 : /* Needed mode is set to AVX_U128_CLEAN if there are
15591 : no 256bit or 512bit modes used in function arguments. */
15592 49568 : for (link = CALL_INSN_FUNCTION_USAGE (insn);
15593 134768 : link;
15594 85200 : link = XEXP (link, 1))
15595 : {
15596 86252 : if (GET_CODE (XEXP (link, 0)) == USE)
15597 : {
15598 84842 : rtx arg = XEXP (XEXP (link, 0), 0);
15599 :
15600 84842 : if (ix86_check_avx_upper_register (arg))
15601 : return AVX_U128_DIRTY;
15602 : }
15603 : }
15604 :
15605 : /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15606 : nor 512bit registers used in the function return register. */
15607 48516 : bool avx_upper_reg_found = false;
15608 48516 : note_stores (insn, ix86_check_avx_upper_stores,
15609 : &avx_upper_reg_found);
15610 48516 : if (avx_upper_reg_found)
15611 : return AVX_U128_DIRTY;
15612 :
15613 : /* If the function is known to preserve some SSE registers,
15614 : RA and previous passes can legitimately rely on that for
15615 : modes wider than 256 bits. It's only safe to issue a
15616 : vzeroupper if all SSE registers are clobbered. */
15617 48332 : const function_abi &abi = insn_callee_abi (insn);
15618 48332 : if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15619 : /* Should be safe to issue an vzeroupper before sibling_call_p.
15620 : Also there not mode_exit for sibling_call, so there could be
15621 : missing vzeroupper for that. */
15622 48332 : || !(SIBLING_CALL_P (insn)
15623 47048 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15624 47048 : abi.mode_clobbers (V4DImode))))
15625 8436 : return AVX_U128_ANY;
15626 :
15627 39896 : return AVX_U128_CLEAN;
15628 : }
15629 :
15630 2016033 : rtx set = single_set (insn);
15631 2016033 : if (set)
15632 : {
15633 1943069 : rtx dest = SET_DEST (set);
15634 1943069 : rtx src = SET_SRC (set);
15635 1461680 : if (SSE_REG_P (dest)
15636 552260 : && !EXT_REX_SSE_REG_P (dest)
15637 3035347 : && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15638 : {
15639 : /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15640 : source isn't zero. */
15641 168567 : if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15642 : return AVX_U128_DIRTY;
15643 : else
15644 : return AVX_U128_ANY;
15645 : }
15646 : else
15647 : {
15648 1774502 : if (ix86_check_avx_upper_register (src))
15649 : return AVX_U128_DIRTY;
15650 : }
15651 :
15652 : /* This isn't YMM/ZMM load/store. */
15653 : return AVX_U128_ANY;
15654 : }
15655 :
15656 : /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15657 : Hardware changes state only when a 256bit register is written to,
15658 : but we need to prevent the compiler from moving optimal insertion
15659 : point above eventual read from 256bit or 512 bit register. */
15660 72964 : if (ix86_check_avx_upper_register (PATTERN (insn)))
15661 : return AVX_U128_DIRTY;
15662 :
15663 : return AVX_U128_ANY;
15664 : }
15665 :
15666 : /* Return mode that i387 must be switched into
15667 : prior to the execution of insn. */
15668 :
15669 : static int
15670 417050 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
15671 : {
15672 417050 : enum attr_i387_cw mode;
15673 :
15674 : /* The mode UNINITIALIZED is used to store control word after a
15675 : function call or ASM pattern. The mode ANY specify that function
15676 : has no requirements on the control word and make no changes in the
15677 : bits we are interested in. */
15678 :
15679 417050 : if (CALL_P (insn)
15680 417050 : || (NONJUMP_INSN_P (insn)
15681 341245 : && (asm_noperands (PATTERN (insn)) >= 0
15682 341192 : || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15683 14637 : return I387_CW_UNINITIALIZED;
15684 :
15685 402413 : if (recog_memoized (insn) < 0)
15686 : return I387_CW_ANY;
15687 :
15688 401471 : mode = get_attr_i387_cw (insn);
15689 :
15690 401471 : switch (entity)
15691 : {
15692 0 : case I387_ROUNDEVEN:
15693 0 : if (mode == I387_CW_ROUNDEVEN)
15694 : return mode;
15695 : break;
15696 :
15697 396824 : case I387_TRUNC:
15698 396824 : if (mode == I387_CW_TRUNC)
15699 : return mode;
15700 : break;
15701 :
15702 3617 : case I387_FLOOR:
15703 3617 : if (mode == I387_CW_FLOOR)
15704 : return mode;
15705 : break;
15706 :
15707 1030 : case I387_CEIL:
15708 1030 : if (mode == I387_CW_CEIL)
15709 : return mode;
15710 : break;
15711 :
15712 0 : default:
15713 0 : gcc_unreachable ();
15714 : }
15715 :
15716 : return I387_CW_ANY;
15717 : }
15718 :
15719 : /* Return mode that entity must be switched into
15720 : prior to the execution of insn. */
15721 :
15722 : static int
15723 2484307 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15724 : {
15725 2484307 : switch (entity)
15726 : {
15727 1656 : case X86_DIRFLAG:
15728 1656 : return ix86_dirflag_mode_needed (insn);
15729 2065601 : case AVX_U128:
15730 2065601 : return ix86_avx_u128_mode_needed (insn);
15731 417050 : case I387_ROUNDEVEN:
15732 417050 : case I387_TRUNC:
15733 417050 : case I387_FLOOR:
15734 417050 : case I387_CEIL:
15735 417050 : return ix86_i387_mode_needed (entity, insn);
15736 0 : default:
15737 0 : gcc_unreachable ();
15738 : }
15739 : return 0;
15740 : }
15741 :
15742 : /* Calculate mode of upper 128bit AVX registers after the insn. */
15743 :
15744 : static int
15745 2065601 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15746 : {
15747 2065601 : rtx pat = PATTERN (insn);
15748 :
15749 2065601 : if (vzeroupper_pattern (pat, VOIDmode)
15750 2065601 : || vzeroall_pattern (pat, VOIDmode))
15751 175 : return AVX_U128_CLEAN;
15752 :
15753 : /* We know that state is clean after CALL insn if there are no
15754 : 256bit or 512bit registers used in the function return register. */
15755 2065426 : if (CALL_P (insn))
15756 : {
15757 49522 : bool avx_upper_reg_found = false;
15758 49522 : note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15759 :
15760 49522 : if (avx_upper_reg_found)
15761 : return AVX_U128_DIRTY;
15762 :
15763 : /* If the function desn't clobber any sse registers or only clobber
15764 : 128-bit part, Then vzeroupper isn't issued before the function exit.
15765 : the status not CLEAN but ANY after the function. */
15766 48946 : const function_abi &abi = insn_callee_abi (insn);
15767 48946 : if (!(SIBLING_CALL_P (insn)
15768 47667 : || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15769 47667 : abi.mode_clobbers (V4DImode))))
15770 8732 : return AVX_U128_ANY;
15771 :
15772 40214 : return AVX_U128_CLEAN;
15773 : }
15774 :
15775 : /* Otherwise, return current mode. Remember that if insn
15776 : references AVX 256bit or 512bit registers, the mode was already
15777 : changed to DIRTY from MODE_NEEDED. */
15778 : return mode;
15779 : }
15780 :
15781 : /* Return the mode that an insn results in. */
15782 :
15783 : static int
15784 2483462 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15785 : {
15786 2483462 : switch (entity)
15787 : {
15788 : case X86_DIRFLAG:
15789 : return mode;
15790 2065601 : case AVX_U128:
15791 2065601 : return ix86_avx_u128_mode_after (mode, insn);
15792 : case I387_ROUNDEVEN:
15793 : case I387_TRUNC:
15794 : case I387_FLOOR:
15795 : case I387_CEIL:
15796 : return mode;
15797 0 : default:
15798 0 : gcc_unreachable ();
15799 : }
15800 : }
15801 :
15802 : static int
15803 120 : ix86_dirflag_mode_entry (void)
15804 : {
15805 : /* For TARGET_CLD or in the interrupt handler we can't assume
15806 : direction flag state at function entry. */
15807 120 : if (TARGET_CLD
15808 118 : || cfun->machine->func_type != TYPE_NORMAL)
15809 120 : return X86_DIRFLAG_ANY;
15810 :
15811 : return X86_DIRFLAG_RESET;
15812 : }
15813 :
15814 : static int
15815 122982 : ix86_avx_u128_mode_entry (void)
15816 : {
15817 122982 : tree arg;
15818 :
15819 : /* Entry mode is set to AVX_U128_DIRTY if there are
15820 : 256bit or 512bit modes used in function arguments. */
15821 310471 : for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15822 187489 : arg = TREE_CHAIN (arg))
15823 : {
15824 221457 : rtx incoming = DECL_INCOMING_RTL (arg);
15825 :
15826 221457 : if (incoming && ix86_check_avx_upper_register (incoming))
15827 : return AVX_U128_DIRTY;
15828 : }
15829 :
15830 : return AVX_U128_CLEAN;
15831 : }
15832 :
15833 : /* Return a mode that ENTITY is assumed to be
15834 : switched to at function entry. */
15835 :
15836 : static int
15837 75825 : ix86_mode_entry (int entity)
15838 : {
15839 75825 : switch (entity)
15840 : {
15841 120 : case X86_DIRFLAG:
15842 120 : return ix86_dirflag_mode_entry ();
15843 74570 : case AVX_U128:
15844 74570 : return ix86_avx_u128_mode_entry ();
15845 : case I387_ROUNDEVEN:
15846 : case I387_TRUNC:
15847 : case I387_FLOOR:
15848 : case I387_CEIL:
15849 : return I387_CW_ANY;
15850 0 : default:
15851 0 : gcc_unreachable ();
15852 : }
15853 : }
15854 :
15855 : static int
15856 73321 : ix86_avx_u128_mode_exit (void)
15857 : {
15858 73321 : rtx reg = crtl->return_rtx;
15859 :
15860 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15861 : or 512 bit modes used in the function return register. */
15862 73321 : if (reg && ix86_check_avx_upper_register (reg))
15863 : return AVX_U128_DIRTY;
15864 :
15865 : /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15866 : modes used in function arguments, otherwise return AVX_U128_CLEAN.
15867 : */
15868 48412 : return ix86_avx_u128_mode_entry ();
15869 : }
15870 :
15871 : /* Return a mode that ENTITY is assumed to be
15872 : switched to at function exit. */
15873 :
15874 : static int
15875 74431 : ix86_mode_exit (int entity)
15876 : {
15877 74431 : switch (entity)
15878 : {
15879 : case X86_DIRFLAG:
15880 : return X86_DIRFLAG_ANY;
15881 73321 : case AVX_U128:
15882 73321 : return ix86_avx_u128_mode_exit ();
15883 1076 : case I387_ROUNDEVEN:
15884 1076 : case I387_TRUNC:
15885 1076 : case I387_FLOOR:
15886 1076 : case I387_CEIL:
15887 1076 : return I387_CW_ANY;
15888 0 : default:
15889 0 : gcc_unreachable ();
15890 : }
15891 : }
15892 :
15893 : static int
15894 2175346 : ix86_mode_priority (int, int n)
15895 : {
15896 2175346 : return n;
15897 : }
15898 :
15899 : /* Output code to initialize control word copies used by trunc?f?i and
15900 : rounding patterns. CURRENT_MODE is set to current control word,
15901 : while NEW_MODE is set to new control word. */
15902 :
15903 : static void
15904 3296 : emit_i387_cw_initialization (int mode)
15905 : {
15906 3296 : rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15907 3296 : rtx new_mode;
15908 :
15909 3296 : enum ix86_stack_slot slot;
15910 :
15911 3296 : rtx reg = gen_reg_rtx (HImode);
15912 :
15913 3296 : emit_insn (gen_x86_fnstcw_1 (stored_mode));
15914 3296 : emit_move_insn (reg, copy_rtx (stored_mode));
15915 :
15916 3296 : switch (mode)
15917 : {
15918 0 : case I387_CW_ROUNDEVEN:
15919 : /* round to nearest */
15920 0 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15921 0 : slot = SLOT_CW_ROUNDEVEN;
15922 0 : break;
15923 :
15924 3100 : case I387_CW_TRUNC:
15925 : /* round toward zero (truncate) */
15926 3100 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15927 3100 : slot = SLOT_CW_TRUNC;
15928 3100 : break;
15929 :
15930 137 : case I387_CW_FLOOR:
15931 : /* round down toward -oo */
15932 137 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15933 137 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15934 137 : slot = SLOT_CW_FLOOR;
15935 137 : break;
15936 :
15937 59 : case I387_CW_CEIL:
15938 : /* round up toward +oo */
15939 59 : emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15940 59 : emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15941 59 : slot = SLOT_CW_CEIL;
15942 59 : break;
15943 :
15944 0 : default:
15945 0 : gcc_unreachable ();
15946 : }
15947 :
15948 3296 : gcc_assert (slot < MAX_386_STACK_LOCALS);
15949 :
15950 3296 : new_mode = assign_386_stack_local (HImode, slot);
15951 3296 : emit_move_insn (new_mode, reg);
15952 3296 : }
15953 :
15954 : /* Generate one or more insns to set ENTITY to MODE. */
15955 :
15956 : static void
15957 51590 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15958 : HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15959 : {
15960 51590 : switch (entity)
15961 : {
15962 265 : case X86_DIRFLAG:
15963 265 : if (mode == X86_DIRFLAG_RESET)
15964 265 : emit_insn (gen_cld ());
15965 : break;
15966 43149 : case AVX_U128:
15967 43149 : if (mode == AVX_U128_CLEAN)
15968 21843 : ix86_expand_avx_vzeroupper ();
15969 : break;
15970 8176 : case I387_ROUNDEVEN:
15971 8176 : case I387_TRUNC:
15972 8176 : case I387_FLOOR:
15973 8176 : case I387_CEIL:
15974 8176 : if (mode != I387_CW_ANY
15975 8176 : && mode != I387_CW_UNINITIALIZED)
15976 3296 : emit_i387_cw_initialization (mode);
15977 : break;
15978 0 : default:
15979 0 : gcc_unreachable ();
15980 : }
15981 51590 : }
15982 :
15983 : /* Output code for INSN to convert a float to a signed int. OPERANDS
15984 : are the insn operands. The output may be [HSD]Imode and the input
15985 : operand may be [SDX]Fmode. */
15986 :
15987 : const char *
15988 7437 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15989 : {
15990 7437 : bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15991 7437 : bool dimode_p = GET_MODE (operands[0]) == DImode;
15992 7437 : int round_mode = get_attr_i387_cw (insn);
15993 :
15994 7437 : static char buf[40];
15995 7437 : const char *p;
15996 :
15997 : /* Jump through a hoop or two for DImode, since the hardware has no
15998 : non-popping instruction. We used to do this a different way, but
15999 : that was somewhat fragile and broke with post-reload splitters. */
16000 7437 : if ((dimode_p || fisttp) && !stack_top_dies)
16001 25 : output_asm_insn ("fld\t%y1", operands);
16002 :
16003 7437 : gcc_assert (STACK_TOP_P (operands[1]));
16004 7437 : gcc_assert (MEM_P (operands[0]));
16005 7437 : gcc_assert (GET_MODE (operands[1]) != TFmode);
16006 :
16007 7437 : if (fisttp)
16008 : return "fisttp%Z0\t%0";
16009 :
16010 7436 : strcpy (buf, "fist");
16011 :
16012 7436 : if (round_mode != I387_CW_ANY)
16013 7392 : output_asm_insn ("fldcw\t%3", operands);
16014 :
16015 7436 : p = "p%Z0\t%0";
16016 7436 : strcat (buf, p + !(stack_top_dies || dimode_p));
16017 :
16018 7436 : output_asm_insn (buf, operands);
16019 :
16020 7436 : if (round_mode != I387_CW_ANY)
16021 7392 : output_asm_insn ("fldcw\t%2", operands);
16022 :
16023 : return "";
16024 : }
16025 :
16026 : /* Output code for x87 ffreep insn. The OPNO argument, which may only
16027 : have the values zero or one, indicates the ffreep insn's operand
16028 : from the OPERANDS array. */
16029 :
16030 : static const char *
16031 275113 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16032 : {
16033 0 : if (TARGET_USE_FFREEP)
16034 : #ifdef HAVE_AS_IX86_FFREEP
16035 0 : return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16036 : #else
16037 : {
16038 : static char retval[32];
16039 : int regno = REGNO (operands[opno]);
16040 :
16041 : gcc_assert (STACK_REGNO_P (regno));
16042 :
16043 : regno -= FIRST_STACK_REG;
16044 :
16045 : snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16046 : return retval;
16047 : }
16048 : #endif
16049 :
16050 0 : return opno ? "fstp\t%y1" : "fstp\t%y0";
16051 : }
16052 :
16053 :
16054 : /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16055 : should be used. UNORDERED_P is true when fucom should be used. */
16056 :
16057 : const char *
16058 107670 : output_fp_compare (rtx_insn *insn, rtx *operands,
16059 : bool eflags_p, bool unordered_p)
16060 : {
16061 107670 : rtx *xops = eflags_p ? &operands[0] : &operands[1];
16062 107670 : bool stack_top_dies;
16063 :
16064 107670 : static char buf[40];
16065 107670 : const char *p;
16066 :
16067 107670 : gcc_assert (STACK_TOP_P (xops[0]));
16068 :
16069 107670 : stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
16070 :
16071 107670 : if (eflags_p)
16072 : {
16073 107670 : p = unordered_p ? "fucomi" : "fcomi";
16074 107670 : strcpy (buf, p);
16075 :
16076 107670 : p = "p\t{%y1, %0|%0, %y1}";
16077 107670 : strcat (buf, p + !stack_top_dies);
16078 :
16079 107670 : return buf;
16080 : }
16081 :
16082 0 : if (STACK_REG_P (xops[1])
16083 0 : && stack_top_dies
16084 0 : && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
16085 : {
16086 0 : gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
16087 :
16088 : /* If both the top of the 387 stack die, and the other operand
16089 : is also a stack register that dies, then this must be a
16090 : `fcompp' float compare. */
16091 0 : p = unordered_p ? "fucompp" : "fcompp";
16092 0 : strcpy (buf, p);
16093 : }
16094 0 : else if (const0_operand (xops[1], VOIDmode))
16095 : {
16096 0 : gcc_assert (!unordered_p);
16097 0 : strcpy (buf, "ftst");
16098 : }
16099 : else
16100 : {
16101 0 : if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
16102 : {
16103 0 : gcc_assert (!unordered_p);
16104 : p = "ficom";
16105 : }
16106 : else
16107 0 : p = unordered_p ? "fucom" : "fcom";
16108 :
16109 0 : strcpy (buf, p);
16110 :
16111 0 : p = "p%Z2\t%y2";
16112 0 : strcat (buf, p + !stack_top_dies);
16113 : }
16114 :
16115 0 : output_asm_insn (buf, operands);
16116 0 : return "fnstsw\t%0";
16117 : }
16118 :
16119 : void
16120 112693 : ix86_output_addr_vec_elt (FILE *file, int value)
16121 : {
16122 112693 : const char *directive = ASM_LONG;
16123 :
16124 : #ifdef ASM_QUAD
16125 112693 : if (TARGET_LP64)
16126 101051 : directive = ASM_QUAD;
16127 : #else
16128 : gcc_assert (!TARGET_64BIT);
16129 : #endif
16130 :
16131 112693 : fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16132 112693 : }
16133 :
16134 : void
16135 25762 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16136 : {
16137 25762 : const char *directive = ASM_LONG;
16138 :
16139 : #ifdef ASM_QUAD
16140 38563 : if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16141 : directive = ASM_QUAD;
16142 : #else
16143 : gcc_assert (!TARGET_64BIT);
16144 : #endif
16145 : /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16146 25762 : if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
16147 12801 : fprintf (file, "%s%s%d-%s%d\n",
16148 : directive, LPREFIX, value, LPREFIX, rel);
16149 : #if TARGET_MACHO
16150 : else if (TARGET_MACHO)
16151 : {
16152 : fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16153 : machopic_output_function_base_name (file);
16154 : putc ('\n', file);
16155 : }
16156 : #endif
16157 12961 : else if (HAVE_AS_GOTOFF_IN_DATA)
16158 12961 : fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16159 : else
16160 : asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16161 : GOT_SYMBOL_NAME, LPREFIX, value);
16162 25762 : }
16163 :
16164 : #define LEA_MAX_STALL (3)
16165 : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16166 :
16167 : /* Increase given DISTANCE in half-cycles according to
16168 : dependencies between PREV and NEXT instructions.
16169 : Add 1 half-cycle if there is no dependency and
16170 : go to next cycle if there is some dependecy. */
16171 :
16172 : static unsigned int
16173 2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
16174 : {
16175 2129 : df_ref def, use;
16176 :
16177 2129 : if (!prev || !next)
16178 748 : return distance + (distance & 1) + 2;
16179 :
16180 1381 : if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16181 226 : return distance + 1;
16182 :
16183 1920 : FOR_EACH_INSN_USE (use, next)
16184 2448 : FOR_EACH_INSN_DEF (def, prev)
16185 1683 : if (!DF_REF_IS_ARTIFICIAL (def)
16186 1683 : && DF_REF_REGNO (use) == DF_REF_REGNO (def))
16187 735 : return distance + (distance & 1) + 2;
16188 :
16189 420 : return distance + 1;
16190 : }
16191 :
16192 : /* Function checks if instruction INSN defines register number
16193 : REGNO1 or REGNO2. */
16194 :
16195 : bool
16196 2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
16197 : rtx_insn *insn)
16198 : {
16199 2073 : df_ref def;
16200 :
16201 3739 : FOR_EACH_INSN_DEF (def, insn)
16202 2070 : if (DF_REF_REG_DEF_P (def)
16203 2070 : && !DF_REF_IS_ARTIFICIAL (def)
16204 2070 : && (regno1 == DF_REF_REGNO (def)
16205 1682 : || regno2 == DF_REF_REGNO (def)))
16206 : return true;
16207 :
16208 : return false;
16209 : }
16210 :
16211 : /* Function checks if instruction INSN uses register number
16212 : REGNO as a part of address expression. */
16213 :
16214 : static bool
16215 1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
16216 : {
16217 1182 : df_ref use;
16218 :
16219 2475 : FOR_EACH_INSN_USE (use, insn)
16220 1384 : if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
16221 : return true;
16222 :
16223 : return false;
16224 : }
16225 :
16226 : /* Search backward for non-agu definition of register number REGNO1
16227 : or register number REGNO2 in basic block starting from instruction
16228 : START up to head of basic block or instruction INSN.
16229 :
16230 : Function puts true value into *FOUND var if definition was found
16231 : and false otherwise.
16232 :
16233 : Distance in half-cycles between START and found instruction or head
16234 : of BB is added to DISTANCE and returned. */
16235 :
16236 : static int
16237 624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16238 : rtx_insn *insn, int distance,
16239 : rtx_insn *start, bool *found)
16240 : {
16241 624 : basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16242 624 : rtx_insn *prev = start;
16243 624 : rtx_insn *next = NULL;
16244 :
16245 624 : *found = false;
16246 :
16247 624 : while (prev
16248 1861 : && prev != insn
16249 1861 : && distance < LEA_SEARCH_THRESHOLD)
16250 : {
16251 1660 : if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16252 : {
16253 947 : distance = increase_distance (prev, next, distance);
16254 947 : if (insn_defines_reg (regno1, regno2, prev))
16255 : {
16256 243 : if (recog_memoized (prev) < 0
16257 243 : || get_attr_type (prev) != TYPE_LEA)
16258 : {
16259 200 : *found = true;
16260 200 : return distance;
16261 : }
16262 : }
16263 :
16264 : next = prev;
16265 : }
16266 1460 : if (prev == BB_HEAD (bb))
16267 : break;
16268 :
16269 1237 : prev = PREV_INSN (prev);
16270 : }
16271 :
16272 : return distance;
16273 : }
16274 :
16275 : /* Search backward for non-agu definition of register number REGNO1
16276 : or register number REGNO2 in INSN's basic block until
16277 : 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16278 : 2. Reach neighbor BBs boundary, or
16279 : 3. Reach agu definition.
16280 : Returns the distance between the non-agu definition point and INSN.
16281 : If no definition point, returns -1. */
16282 :
16283 : static int
16284 429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16285 : rtx_insn *insn)
16286 : {
16287 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16288 429 : int distance = 0;
16289 429 : bool found = false;
16290 :
16291 429 : if (insn != BB_HEAD (bb))
16292 429 : distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16293 : distance, PREV_INSN (insn),
16294 : &found);
16295 :
16296 429 : if (!found && distance < LEA_SEARCH_THRESHOLD)
16297 : {
16298 167 : edge e;
16299 167 : edge_iterator ei;
16300 167 : bool simple_loop = false;
16301 :
16302 336 : FOR_EACH_EDGE (e, ei, bb->preds)
16303 206 : if (e->src == bb)
16304 : {
16305 : simple_loop = true;
16306 : break;
16307 : }
16308 :
16309 167 : if (simple_loop)
16310 37 : distance = distance_non_agu_define_in_bb (regno1, regno2,
16311 : insn, distance,
16312 37 : BB_END (bb), &found);
16313 : else
16314 : {
16315 130 : int shortest_dist = -1;
16316 130 : bool found_in_bb = false;
16317 :
16318 288 : FOR_EACH_EDGE (e, ei, bb->preds)
16319 : {
16320 158 : int bb_dist
16321 316 : = distance_non_agu_define_in_bb (regno1, regno2,
16322 : insn, distance,
16323 158 : BB_END (e->src),
16324 : &found_in_bb);
16325 158 : if (found_in_bb)
16326 : {
16327 24 : if (shortest_dist < 0)
16328 : shortest_dist = bb_dist;
16329 0 : else if (bb_dist > 0)
16330 0 : shortest_dist = MIN (bb_dist, shortest_dist);
16331 :
16332 24 : found = true;
16333 : }
16334 : }
16335 :
16336 130 : distance = shortest_dist;
16337 : }
16338 : }
16339 :
16340 429 : if (!found)
16341 : return -1;
16342 :
16343 200 : return distance >> 1;
16344 : }
16345 :
16346 : /* Return the distance in half-cycles between INSN and the next
16347 : insn that uses register number REGNO in memory address added
16348 : to DISTANCE. Return -1 if REGNO0 is set.
16349 :
16350 : Put true value into *FOUND if register usage was found and
16351 : false otherwise.
16352 : Put true value into *REDEFINED if register redefinition was
16353 : found and false otherwise. */
16354 :
16355 : static int
16356 767 : distance_agu_use_in_bb (unsigned int regno,
16357 : rtx_insn *insn, int distance, rtx_insn *start,
16358 : bool *found, bool *redefined)
16359 : {
16360 767 : basic_block bb = NULL;
16361 767 : rtx_insn *next = start;
16362 767 : rtx_insn *prev = NULL;
16363 :
16364 767 : *found = false;
16365 767 : *redefined = false;
16366 :
16367 767 : if (start != NULL_RTX)
16368 : {
16369 750 : bb = BLOCK_FOR_INSN (start);
16370 750 : if (start != BB_HEAD (bb))
16371 : /* If insn and start belong to the same bb, set prev to insn,
16372 : so the call to increase_distance will increase the distance
16373 : between insns by 1. */
16374 412 : prev = insn;
16375 : }
16376 :
16377 2566 : while (next
16378 2566 : && next != insn
16379 2566 : && distance < LEA_SEARCH_THRESHOLD)
16380 : {
16381 2378 : if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16382 : {
16383 1182 : distance = increase_distance(prev, next, distance);
16384 1182 : if (insn_uses_reg_mem (regno, next))
16385 : {
16386 : /* Return DISTANCE if OP0 is used in memory
16387 : address in NEXT. */
16388 91 : *found = true;
16389 91 : return distance;
16390 : }
16391 :
16392 1091 : if (insn_defines_reg (regno, INVALID_REGNUM, next))
16393 : {
16394 : /* Return -1 if OP0 is set in NEXT. */
16395 156 : *redefined = true;
16396 156 : return -1;
16397 : }
16398 :
16399 : prev = next;
16400 : }
16401 :
16402 2131 : if (next == BB_END (bb))
16403 : break;
16404 :
16405 1799 : next = NEXT_INSN (next);
16406 : }
16407 :
16408 : return distance;
16409 : }
16410 :
16411 : /* Return the distance between INSN and the next insn that uses
16412 : register number REGNO0 in memory address. Return -1 if no such
16413 : a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16414 :
16415 : static int
16416 429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
16417 : {
16418 429 : basic_block bb = BLOCK_FOR_INSN (insn);
16419 429 : int distance = 0;
16420 429 : bool found = false;
16421 429 : bool redefined = false;
16422 :
16423 429 : if (insn != BB_END (bb))
16424 412 : distance = distance_agu_use_in_bb (regno0, insn, distance,
16425 : NEXT_INSN (insn),
16426 : &found, &redefined);
16427 :
16428 429 : if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16429 : {
16430 250 : edge e;
16431 250 : edge_iterator ei;
16432 250 : bool simple_loop = false;
16433 :
16434 535 : FOR_EACH_EDGE (e, ei, bb->succs)
16435 355 : if (e->dest == bb)
16436 : {
16437 : simple_loop = true;
16438 : break;
16439 : }
16440 :
16441 250 : if (simple_loop)
16442 70 : distance = distance_agu_use_in_bb (regno0, insn,
16443 : distance, BB_HEAD (bb),
16444 : &found, &redefined);
16445 : else
16446 : {
16447 180 : int shortest_dist = -1;
16448 180 : bool found_in_bb = false;
16449 180 : bool redefined_in_bb = false;
16450 :
16451 465 : FOR_EACH_EDGE (e, ei, bb->succs)
16452 : {
16453 285 : int bb_dist
16454 570 : = distance_agu_use_in_bb (regno0, insn,
16455 285 : distance, BB_HEAD (e->dest),
16456 : &found_in_bb, &redefined_in_bb);
16457 285 : if (found_in_bb)
16458 : {
16459 17 : if (shortest_dist < 0)
16460 : shortest_dist = bb_dist;
16461 2 : else if (bb_dist > 0)
16462 2 : shortest_dist = MIN (bb_dist, shortest_dist);
16463 :
16464 17 : found = true;
16465 : }
16466 : }
16467 :
16468 180 : distance = shortest_dist;
16469 : }
16470 : }
16471 :
16472 429 : if (!found || redefined)
16473 : return -1;
16474 :
16475 89 : return distance >> 1;
16476 : }
16477 :
16478 : /* Define this macro to tune LEA priority vs ADD, it take effect when
16479 : there is a dilemma of choosing LEA or ADD
16480 : Negative value: ADD is more preferred than LEA
16481 : Zero: Neutral
16482 : Positive value: LEA is more preferred than ADD. */
16483 : #define IX86_LEA_PRIORITY 0
16484 :
16485 : /* Return true if usage of lea INSN has performance advantage
16486 : over a sequence of instructions. Instructions sequence has
16487 : SPLIT_COST cycles higher latency than lea latency. */
16488 :
16489 : static bool
16490 1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16491 : unsigned int regno2, int split_cost, bool has_scale)
16492 : {
16493 1629 : int dist_define, dist_use;
16494 :
16495 : /* For Atom processors newer than Bonnell, if using a 2-source or
16496 : 3-source LEA for non-destructive destination purposes, or due to
16497 : wanting ability to use SCALE, the use of LEA is justified. */
16498 1629 : if (!TARGET_CPU_P (BONNELL))
16499 : {
16500 1200 : if (has_scale)
16501 : return true;
16502 1181 : if (split_cost < 1)
16503 : return false;
16504 406 : if (regno0 == regno1 || regno0 == regno2)
16505 : return false;
16506 : return true;
16507 : }
16508 :
16509 : /* Remember recog_data content. */
16510 429 : struct recog_data_d recog_data_save = recog_data;
16511 :
16512 429 : dist_define = distance_non_agu_define (regno1, regno2, insn);
16513 429 : dist_use = distance_agu_use (regno0, insn);
16514 :
16515 : /* distance_non_agu_define can call get_attr_type which can call
16516 : recog_memoized, restore recog_data back to previous content. */
16517 429 : recog_data = recog_data_save;
16518 :
16519 429 : if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16520 : {
16521 : /* If there is no non AGU operand definition, no AGU
16522 : operand usage and split cost is 0 then both lea
16523 : and non lea variants have same priority. Currently
16524 : we prefer lea for 64 bit code and non lea on 32 bit
16525 : code. */
16526 232 : if (dist_use < 0 && split_cost == 0)
16527 98 : return TARGET_64BIT || IX86_LEA_PRIORITY;
16528 : else
16529 : return true;
16530 : }
16531 :
16532 : /* With longer definitions distance lea is more preferable.
16533 : Here we change it to take into account splitting cost and
16534 : lea priority. */
16535 197 : dist_define += split_cost + IX86_LEA_PRIORITY;
16536 :
16537 : /* If there is no use in memory addess then we just check
16538 : that split cost exceeds AGU stall. */
16539 197 : if (dist_use < 0)
16540 193 : return dist_define > LEA_MAX_STALL;
16541 :
16542 : /* If this insn has both backward non-agu dependence and forward
16543 : agu dependence, the one with short distance takes effect. */
16544 4 : return dist_define >= dist_use;
16545 : }
16546 :
16547 : /* Return true if we need to split op0 = op1 + op2 into a sequence of
16548 : move and add to avoid AGU stalls. */
16549 :
16550 : bool
16551 9142047 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16552 : {
16553 9142047 : unsigned int regno0, regno1, regno2;
16554 :
16555 : /* Check if we need to optimize. */
16556 9142047 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16557 9141232 : return false;
16558 :
16559 815 : regno0 = true_regnum (operands[0]);
16560 815 : regno1 = true_regnum (operands[1]);
16561 815 : regno2 = true_regnum (operands[2]);
16562 :
16563 : /* We need to split only adds with non destructive
16564 : destination operand. */
16565 815 : if (regno0 == regno1 || regno0 == regno2)
16566 : return false;
16567 : else
16568 245 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
16569 : }
16570 :
16571 : /* Return true if we should emit lea instruction instead of mov
16572 : instruction. */
16573 :
16574 : bool
16575 29560590 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16576 : {
16577 29560590 : unsigned int regno0, regno1;
16578 :
16579 : /* Check if we need to optimize. */
16580 29560590 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16581 29558304 : return false;
16582 :
16583 : /* Use lea for reg to reg moves only. */
16584 2286 : if (!REG_P (operands[0]) || !REG_P (operands[1]))
16585 : return false;
16586 :
16587 464 : regno0 = true_regnum (operands[0]);
16588 464 : regno1 = true_regnum (operands[1]);
16589 :
16590 464 : return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
16591 : }
16592 :
16593 : /* Return true if we need to split lea into a sequence of
16594 : instructions to avoid AGU stalls during peephole2. */
16595 :
16596 : bool
16597 11166010 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16598 : {
16599 11166010 : unsigned int regno0, regno1, regno2;
16600 11166010 : int split_cost;
16601 11166010 : struct ix86_address parts;
16602 11166010 : int ok;
16603 :
16604 : /* The "at least two components" test below might not catch simple
16605 : move or zero extension insns if parts.base is non-NULL and parts.disp
16606 : is const0_rtx as the only components in the address, e.g. if the
16607 : register is %rbp or %r13. As this test is much cheaper and moves or
16608 : zero extensions are the common case, do this check first. */
16609 11166010 : if (REG_P (operands[1])
16610 11166010 : || (SImode_address_operand (operands[1], VOIDmode)
16611 148384 : && REG_P (XEXP (operands[1], 0))))
16612 4091817 : return false;
16613 :
16614 7074193 : ok = ix86_decompose_address (operands[1], &parts);
16615 7074193 : gcc_assert (ok);
16616 :
16617 : /* There should be at least two components in the address. */
16618 7074193 : if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16619 7074193 : + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16620 : return false;
16621 :
16622 : /* We should not split into add if non legitimate pic
16623 : operand is used as displacement. */
16624 2684263 : if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16625 : return false;
16626 :
16627 2634261 : regno0 = true_regnum (operands[0]) ;
16628 2634261 : regno1 = INVALID_REGNUM;
16629 2634261 : regno2 = INVALID_REGNUM;
16630 :
16631 2634261 : if (parts.base)
16632 2559485 : regno1 = true_regnum (parts.base);
16633 2634261 : if (parts.index)
16634 486018 : regno2 = true_regnum (parts.index);
16635 :
16636 : /* Use add for a = a + b and a = b + a since it is faster and shorter
16637 : than lea for most processors. For the processors like BONNELL, if
16638 : the destination register of LEA holds an actual address which will
16639 : be used soon, LEA is better and otherwise ADD is better. */
16640 2634261 : if (!TARGET_CPU_P (BONNELL)
16641 2634132 : && parts.scale == 1
16642 2389693 : && (!parts.disp || parts.disp == const0_rtx)
16643 176900 : && (regno0 == regno1 || regno0 == regno2))
16644 : return true;
16645 :
16646 : /* Split with -Oz if the encoding requires fewer bytes. */
16647 2628376 : if (optimize_size > 1
16648 27 : && parts.scale > 1
16649 4 : && !parts.base
16650 4 : && (!parts.disp || parts.disp == const0_rtx))
16651 : return true;
16652 :
16653 : /* Check we need to optimize. */
16654 2628372 : if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16655 2628031 : return false;
16656 :
16657 341 : split_cost = 0;
16658 :
16659 : /* Compute how many cycles we will add to execution time
16660 : if split lea into a sequence of instructions. */
16661 341 : if (parts.base || parts.index)
16662 : {
16663 : /* Have to use mov instruction if non desctructive
16664 : destination form is used. */
16665 341 : if (regno1 != regno0 && regno2 != regno0)
16666 266 : split_cost += 1;
16667 :
16668 : /* Have to add index to base if both exist. */
16669 341 : if (parts.base && parts.index)
16670 54 : split_cost += 1;
16671 :
16672 : /* Have to use shift and adds if scale is 2 or greater. */
16673 341 : if (parts.scale > 1)
16674 : {
16675 29 : if (regno0 != regno1)
16676 23 : split_cost += 1;
16677 6 : else if (regno2 == regno0)
16678 0 : split_cost += 4;
16679 : else
16680 6 : split_cost += parts.scale;
16681 : }
16682 :
16683 : /* Have to use add instruction with immediate if
16684 : disp is non zero. */
16685 341 : if (parts.disp && parts.disp != const0_rtx)
16686 280 : split_cost += 1;
16687 :
16688 : /* Subtract the price of lea. */
16689 341 : split_cost -= 1;
16690 : }
16691 :
16692 341 : return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16693 341 : parts.scale > 1);
16694 : }
16695 :
16696 : /* Return true if it is ok to optimize an ADD operation to LEA
16697 : operation to avoid flag register consumation. For most processors,
16698 : ADD is faster than LEA. For the processors like BONNELL, if the
16699 : destination register of LEA holds an actual address which will be
16700 : used soon, LEA is better and otherwise ADD is better. */
16701 :
16702 : bool
16703 9199993 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16704 : {
16705 9199993 : unsigned int regno0 = true_regnum (operands[0]);
16706 9199993 : unsigned int regno1 = true_regnum (operands[1]);
16707 9199993 : unsigned int regno2 = true_regnum (operands[2]);
16708 :
16709 : /* If a = b + c, (a!=b && a!=c), must use lea form. */
16710 9199993 : if (regno0 != regno1 && regno0 != regno2)
16711 : return true;
16712 :
16713 7163545 : if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16714 7162966 : return false;
16715 :
16716 579 : return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16717 : }
16718 :
16719 : /* Return true if destination reg of SET_BODY is shift count of
16720 : USE_BODY. */
16721 :
16722 : static bool
16723 89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16724 : {
16725 89 : rtx set_dest;
16726 89 : rtx shift_rtx;
16727 89 : int i;
16728 :
16729 : /* Retrieve destination of SET_BODY. */
16730 89 : switch (GET_CODE (set_body))
16731 : {
16732 73 : case SET:
16733 73 : set_dest = SET_DEST (set_body);
16734 73 : if (!set_dest || !REG_P (set_dest))
16735 : return false;
16736 72 : break;
16737 8 : case PARALLEL:
16738 24 : for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16739 16 : if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16740 : use_body))
16741 : return true;
16742 : /* FALLTHROUGH */
16743 : default:
16744 : return false;
16745 : }
16746 :
16747 : /* Retrieve shift count of USE_BODY. */
16748 72 : switch (GET_CODE (use_body))
16749 : {
16750 24 : case SET:
16751 24 : shift_rtx = XEXP (use_body, 1);
16752 24 : break;
16753 24 : case PARALLEL:
16754 72 : for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16755 48 : if (ix86_dep_by_shift_count_body (set_body,
16756 48 : XVECEXP (use_body, 0, i)))
16757 : return true;
16758 : /* FALLTHROUGH */
16759 : default:
16760 : return false;
16761 : }
16762 :
16763 24 : if (shift_rtx
16764 24 : && (GET_CODE (shift_rtx) == ASHIFT
16765 21 : || GET_CODE (shift_rtx) == LSHIFTRT
16766 5 : || GET_CODE (shift_rtx) == ASHIFTRT
16767 0 : || GET_CODE (shift_rtx) == ROTATE
16768 0 : || GET_CODE (shift_rtx) == ROTATERT))
16769 : {
16770 24 : rtx shift_count = XEXP (shift_rtx, 1);
16771 :
16772 : /* Return true if shift count is dest of SET_BODY. */
16773 24 : if (REG_P (shift_count))
16774 : {
16775 : /* Add check since it can be invoked before register
16776 : allocation in pre-reload schedule. */
16777 0 : if (reload_completed
16778 0 : && true_regnum (set_dest) == true_regnum (shift_count))
16779 : return true;
16780 0 : else if (REGNO(set_dest) == REGNO(shift_count))
16781 : return true;
16782 : }
16783 : }
16784 :
16785 : return false;
16786 : }
16787 :
16788 : /* Return true if destination reg of SET_INSN is shift count of
16789 : USE_INSN. */
16790 :
16791 : bool
16792 25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16793 : {
16794 25 : return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16795 25 : PATTERN (use_insn));
16796 : }
16797 :
16798 : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16799 : are ok, keeping in mind the possible movddup alternative. */
16800 :
16801 : bool
16802 92334 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16803 : {
16804 92334 : if (MEM_P (operands[0]))
16805 2025 : return rtx_equal_p (operands[0], operands[1 + high]);
16806 90309 : if (MEM_P (operands[1]) && MEM_P (operands[2]))
16807 1009 : return false;
16808 : return true;
16809 : }
16810 :
16811 : /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16812 : then replicate the value for all elements of the vector
16813 : register. */
16814 :
16815 : rtx
16816 74736 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16817 : {
16818 74736 : int i, n_elt;
16819 74736 : rtvec v;
16820 74736 : machine_mode scalar_mode;
16821 :
16822 74736 : switch (mode)
16823 : {
16824 1280 : case E_V64QImode:
16825 1280 : case E_V32QImode:
16826 1280 : case E_V16QImode:
16827 1280 : case E_V32HImode:
16828 1280 : case E_V16HImode:
16829 1280 : case E_V8HImode:
16830 1280 : case E_V16SImode:
16831 1280 : case E_V8SImode:
16832 1280 : case E_V4SImode:
16833 1280 : case E_V2SImode:
16834 1280 : case E_V8DImode:
16835 1280 : case E_V4DImode:
16836 1280 : case E_V2DImode:
16837 1280 : gcc_assert (vect);
16838 : /* FALLTHRU */
16839 74736 : case E_V2HFmode:
16840 74736 : case E_V4HFmode:
16841 74736 : case E_V8HFmode:
16842 74736 : case E_V16HFmode:
16843 74736 : case E_V32HFmode:
16844 74736 : case E_V16SFmode:
16845 74736 : case E_V8SFmode:
16846 74736 : case E_V4SFmode:
16847 74736 : case E_V2SFmode:
16848 74736 : case E_V8DFmode:
16849 74736 : case E_V4DFmode:
16850 74736 : case E_V2DFmode:
16851 74736 : case E_V32BFmode:
16852 74736 : case E_V16BFmode:
16853 74736 : case E_V8BFmode:
16854 74736 : case E_V4BFmode:
16855 74736 : case E_V2BFmode:
16856 74736 : n_elt = GET_MODE_NUNITS (mode);
16857 74736 : v = rtvec_alloc (n_elt);
16858 74736 : scalar_mode = GET_MODE_INNER (mode);
16859 :
16860 74736 : RTVEC_ELT (v, 0) = value;
16861 :
16862 231856 : for (i = 1; i < n_elt; ++i)
16863 157120 : RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16864 :
16865 74736 : return gen_rtx_CONST_VECTOR (mode, v);
16866 :
16867 0 : default:
16868 0 : gcc_unreachable ();
16869 : }
16870 : }
16871 :
16872 : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16873 : and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16874 : for an SSE register. If VECT is true, then replicate the mask for
16875 : all elements of the vector register. If INVERT is true, then create
16876 : a mask excluding the sign bit. */
16877 :
16878 : rtx
16879 76117 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16880 : {
16881 76117 : machine_mode vec_mode, imode;
16882 76117 : wide_int w;
16883 76117 : rtx mask, v;
16884 :
16885 76117 : switch (mode)
16886 : {
16887 : case E_V2HFmode:
16888 : case E_V4HFmode:
16889 : case E_V8HFmode:
16890 : case E_V16HFmode:
16891 : case E_V32HFmode:
16892 : case E_V32BFmode:
16893 : case E_V16BFmode:
16894 : case E_V8BFmode:
16895 : case E_V4BFmode:
16896 : case E_V2BFmode:
16897 : vec_mode = mode;
16898 : imode = HImode;
16899 : break;
16900 :
16901 34132 : case E_V16SImode:
16902 34132 : case E_V16SFmode:
16903 34132 : case E_V8SImode:
16904 34132 : case E_V4SImode:
16905 34132 : case E_V8SFmode:
16906 34132 : case E_V4SFmode:
16907 34132 : case E_V2SFmode:
16908 34132 : case E_V2SImode:
16909 34132 : vec_mode = mode;
16910 34132 : imode = SImode;
16911 34132 : break;
16912 :
16913 39126 : case E_V8DImode:
16914 39126 : case E_V4DImode:
16915 39126 : case E_V2DImode:
16916 39126 : case E_V8DFmode:
16917 39126 : case E_V4DFmode:
16918 39126 : case E_V2DFmode:
16919 39126 : vec_mode = mode;
16920 39126 : imode = DImode;
16921 39126 : break;
16922 :
16923 2360 : case E_TImode:
16924 2360 : case E_TFmode:
16925 2360 : vec_mode = VOIDmode;
16926 2360 : imode = TImode;
16927 2360 : break;
16928 :
16929 0 : default:
16930 0 : gcc_unreachable ();
16931 : }
16932 :
16933 76117 : machine_mode inner_mode = GET_MODE_INNER (mode);
16934 152234 : w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16935 152234 : GET_MODE_BITSIZE (inner_mode));
16936 76117 : if (invert)
16937 39835 : w = wi::bit_not (w);
16938 :
16939 : /* Force this value into the low part of a fp vector constant. */
16940 76117 : mask = immed_wide_int_const (w, imode);
16941 76117 : mask = gen_lowpart (inner_mode, mask);
16942 :
16943 76117 : if (vec_mode == VOIDmode)
16944 2360 : return force_reg (inner_mode, mask);
16945 :
16946 73757 : v = ix86_build_const_vector (vec_mode, vect, mask);
16947 73757 : return force_reg (vec_mode, v);
16948 76117 : }
16949 :
16950 : /* Return HOST_WIDE_INT for const vector OP in MODE. */
16951 :
16952 : HOST_WIDE_INT
16953 136383 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16954 : {
16955 288980 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16956 0 : gcc_unreachable ();
16957 :
16958 136383 : int nunits = GET_MODE_NUNITS (mode);
16959 272766 : wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16960 136383 : machine_mode innermode = GET_MODE_INNER (mode);
16961 136383 : unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16962 :
16963 136383 : switch (mode)
16964 : {
16965 : case E_V2QImode:
16966 : case E_V4QImode:
16967 : case E_V2HImode:
16968 : case E_V8QImode:
16969 : case E_V4HImode:
16970 : case E_V2SImode:
16971 468471 : for (int i = 0; i < nunits; ++i)
16972 : {
16973 336646 : int v = INTVAL (XVECEXP (op, 0, i));
16974 336646 : wide_int wv = wi::shwi (v, innermode_bits);
16975 336646 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16976 336646 : }
16977 : break;
16978 88 : case E_V1SImode:
16979 88 : case E_V1DImode:
16980 88 : op = CONST_VECTOR_ELT (op, 0);
16981 88 : return INTVAL (op);
16982 : case E_V2HFmode:
16983 : case E_V2BFmode:
16984 : case E_V4HFmode:
16985 : case E_V4BFmode:
16986 : case E_V2SFmode:
16987 13432 : for (int i = 0; i < nunits; ++i)
16988 : {
16989 8962 : rtx x = XVECEXP (op, 0, i);
16990 8962 : int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16991 8962 : REAL_MODE_FORMAT (innermode));
16992 8962 : wide_int wv = wi::shwi (v, innermode_bits);
16993 8962 : val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16994 8962 : }
16995 : break;
16996 0 : default:
16997 0 : gcc_unreachable ();
16998 : }
16999 :
17000 136295 : return val.to_shwi ();
17001 136383 : }
17002 :
17003 32 : int ix86_get_flags_cc (rtx_code code)
17004 : {
17005 32 : switch (code)
17006 : {
17007 : case NE: return X86_CCNE;
17008 : case EQ: return X86_CCE;
17009 : case GE: return X86_CCNL;
17010 : case GT: return X86_CCNLE;
17011 : case LE: return X86_CCLE;
17012 : case LT: return X86_CCL;
17013 : case GEU: return X86_CCNB;
17014 : case GTU: return X86_CCNBE;
17015 : case LEU: return X86_CCBE;
17016 : case LTU: return X86_CCB;
17017 : default: return -1;
17018 : }
17019 : }
17020 :
17021 : /* Return TRUE or FALSE depending on whether the first SET in INSN
17022 : has source and destination with matching CC modes, and that the
17023 : CC mode is at least as constrained as REQ_MODE. */
17024 :
17025 : bool
17026 53906303 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
17027 : {
17028 53906303 : rtx set;
17029 53906303 : machine_mode set_mode;
17030 :
17031 53906303 : set = PATTERN (insn);
17032 53906303 : if (GET_CODE (set) == PARALLEL)
17033 496796 : set = XVECEXP (set, 0, 0);
17034 53906303 : gcc_assert (GET_CODE (set) == SET);
17035 53906303 : gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17036 :
17037 53906303 : set_mode = GET_MODE (SET_DEST (set));
17038 53906303 : switch (set_mode)
17039 : {
17040 1418412 : case E_CCNOmode:
17041 1418412 : if (req_mode != CCNOmode
17042 97669 : && (req_mode != CCmode
17043 0 : || XEXP (SET_SRC (set), 1) != const0_rtx))
17044 : return false;
17045 : break;
17046 5576464 : case E_CCmode:
17047 5576464 : if (req_mode == CCGCmode)
17048 : return false;
17049 : /* FALLTHRU */
17050 9178195 : case E_CCGCmode:
17051 9178195 : if (req_mode == CCGOCmode || req_mode == CCNOmode)
17052 : return false;
17053 : /* FALLTHRU */
17054 10234808 : case E_CCGOCmode:
17055 10234808 : if (req_mode == CCZmode)
17056 : return false;
17057 : /* FALLTHRU */
17058 : case E_CCZmode:
17059 : break;
17060 :
17061 0 : case E_CCGZmode:
17062 :
17063 0 : case E_CCAmode:
17064 0 : case E_CCCmode:
17065 0 : case E_CCOmode:
17066 0 : case E_CCPmode:
17067 0 : case E_CCSmode:
17068 0 : if (set_mode != req_mode)
17069 : return false;
17070 : break;
17071 :
17072 0 : default:
17073 0 : gcc_unreachable ();
17074 : }
17075 :
17076 53800017 : return GET_MODE (SET_SRC (set)) == set_mode;
17077 : }
17078 :
17079 : machine_mode
17080 13622090 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17081 : {
17082 13622090 : machine_mode mode = GET_MODE (op0);
17083 :
17084 13622090 : if (SCALAR_FLOAT_MODE_P (mode))
17085 : {
17086 143988 : gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17087 : return CCFPmode;
17088 : }
17089 :
17090 13478102 : switch (code)
17091 : {
17092 : /* Only zero flag is needed. */
17093 : case EQ: /* ZF=0 */
17094 : case NE: /* ZF!=0 */
17095 : return CCZmode;
17096 : /* Codes needing carry flag. */
17097 986831 : case GEU: /* CF=0 */
17098 986831 : case LTU: /* CF=1 */
17099 986831 : rtx geu;
17100 : /* Detect overflow checks. They need just the carry flag. */
17101 986831 : if (GET_CODE (op0) == PLUS
17102 986831 : && (rtx_equal_p (op1, XEXP (op0, 0))
17103 129238 : || rtx_equal_p (op1, XEXP (op0, 1))))
17104 17412 : return CCCmode;
17105 : /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
17106 : Match LTU of op0
17107 : (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
17108 : and op1
17109 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
17110 : where CC_CCC is either CC or CCC. */
17111 969419 : else if (code == LTU
17112 372247 : && GET_CODE (op0) == NEG
17113 18 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
17114 0 : && REG_P (XEXP (geu, 0))
17115 0 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
17116 0 : || GET_MODE (XEXP (geu, 0)) == CCmode)
17117 0 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
17118 0 : && XEXP (geu, 1) == const0_rtx
17119 0 : && GET_CODE (op1) == LTU
17120 0 : && REG_P (XEXP (op1, 0))
17121 0 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
17122 0 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
17123 969419 : && XEXP (op1, 1) == const0_rtx)
17124 : return CCCmode;
17125 : /* Similarly for *x86_cmc pattern.
17126 : Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
17127 : and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
17128 : It is sufficient to test that the operand modes are CCCmode. */
17129 969419 : else if (code == LTU
17130 372247 : && GET_CODE (op0) == NEG
17131 18 : && GET_CODE (XEXP (op0, 0)) == LTU
17132 0 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
17133 0 : && GET_CODE (op1) == GEU
17134 0 : && GET_MODE (XEXP (op1, 0)) == CCCmode)
17135 : return CCCmode;
17136 : /* Similarly for the comparison of addcarry/subborrow pattern. */
17137 372247 : else if (code == LTU
17138 372247 : && GET_CODE (op0) == ZERO_EXTEND
17139 15254 : && GET_CODE (op1) == PLUS
17140 11004 : && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
17141 11004 : && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
17142 : return CCCmode;
17143 : else
17144 958415 : return CCmode;
17145 : case GTU: /* CF=0 & ZF=0 */
17146 : case LEU: /* CF=1 | ZF=1 */
17147 : return CCmode;
17148 : /* Codes possibly doable only with sign flag when
17149 : comparing against zero. */
17150 784194 : case GE: /* SF=OF or SF=0 */
17151 784194 : case LT: /* SF<>OF or SF=1 */
17152 784194 : if (op1 == const0_rtx)
17153 : return CCGOCmode;
17154 : else
17155 : /* For other cases Carry flag is not required. */
17156 443127 : return CCGCmode;
17157 : /* Codes doable only with sign flag when comparing
17158 : against zero, but we miss jump instruction for it
17159 : so we need to use relational tests against overflow
17160 : that thus needs to be zero. */
17161 900356 : case GT: /* ZF=0 & SF=OF */
17162 900356 : case LE: /* ZF=1 | SF<>OF */
17163 900356 : if (op1 == const0_rtx)
17164 : return CCNOmode;
17165 : else
17166 597759 : return CCGCmode;
17167 : default:
17168 : /* CCmode should be used in all other cases. */
17169 : return CCmode;
17170 : }
17171 : }
17172 :
17173 : /* Return TRUE or FALSE depending on whether the ptest instruction
17174 : INSN has source and destination with suitable matching CC modes. */
17175 :
17176 : bool
17177 91619 : ix86_match_ptest_ccmode (rtx insn)
17178 : {
17179 91619 : rtx set, src;
17180 91619 : machine_mode set_mode;
17181 :
17182 91619 : set = PATTERN (insn);
17183 91619 : gcc_assert (GET_CODE (set) == SET);
17184 91619 : src = SET_SRC (set);
17185 91619 : gcc_assert (GET_CODE (src) == UNSPEC
17186 : && XINT (src, 1) == UNSPEC_PTEST);
17187 :
17188 91619 : set_mode = GET_MODE (src);
17189 91619 : if (set_mode != CCZmode
17190 : && set_mode != CCCmode
17191 : && set_mode != CCmode)
17192 : return false;
17193 91619 : return GET_MODE (SET_DEST (set)) == set_mode;
17194 : }
17195 :
17196 : /* Return the fixed registers used for condition codes. */
17197 :
17198 : static bool
17199 18731686 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17200 : {
17201 18731686 : *p1 = FLAGS_REG;
17202 18731686 : *p2 = INVALID_REGNUM;
17203 18731686 : return true;
17204 : }
17205 :
17206 : /* If two condition code modes are compatible, return a condition code
17207 : mode which is compatible with both. Otherwise, return
17208 : VOIDmode. */
17209 :
17210 : static machine_mode
17211 30435 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
17212 : {
17213 30435 : if (m1 == m2)
17214 : return m1;
17215 :
17216 29663 : if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17217 : return VOIDmode;
17218 :
17219 29663 : if ((m1 == CCGCmode && m2 == CCGOCmode)
17220 29663 : || (m1 == CCGOCmode && m2 == CCGCmode))
17221 : return CCGCmode;
17222 :
17223 29663 : if ((m1 == CCNOmode && m2 == CCGOCmode)
17224 29482 : || (m1 == CCGOCmode && m2 == CCNOmode))
17225 : return CCNOmode;
17226 :
17227 29371 : if (m1 == CCZmode
17228 15667 : && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
17229 : return m2;
17230 16910 : else if (m2 == CCZmode
17231 13448 : && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
17232 : return m1;
17233 :
17234 6777 : switch (m1)
17235 : {
17236 0 : default:
17237 0 : gcc_unreachable ();
17238 :
17239 6777 : case E_CCmode:
17240 6777 : case E_CCGCmode:
17241 6777 : case E_CCGOCmode:
17242 6777 : case E_CCNOmode:
17243 6777 : case E_CCAmode:
17244 6777 : case E_CCCmode:
17245 6777 : case E_CCOmode:
17246 6777 : case E_CCPmode:
17247 6777 : case E_CCSmode:
17248 6777 : case E_CCZmode:
17249 6777 : switch (m2)
17250 : {
17251 : default:
17252 : return VOIDmode;
17253 :
17254 : case E_CCmode:
17255 : case E_CCGCmode:
17256 : case E_CCGOCmode:
17257 : case E_CCNOmode:
17258 : case E_CCAmode:
17259 : case E_CCCmode:
17260 : case E_CCOmode:
17261 : case E_CCPmode:
17262 : case E_CCSmode:
17263 : case E_CCZmode:
17264 : return CCmode;
17265 : }
17266 :
17267 : case E_CCFPmode:
17268 : /* These are only compatible with themselves, which we already
17269 : checked above. */
17270 : return VOIDmode;
17271 : }
17272 : }
17273 :
17274 : /* Return strategy to use for floating-point. We assume that fcomi is always
17275 : preferrable where available, since that is also true when looking at size
17276 : (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17277 :
17278 : enum ix86_fpcmp_strategy
17279 5538155 : ix86_fp_comparison_strategy (enum rtx_code)
17280 : {
17281 : /* Do fcomi/sahf based test when profitable. */
17282 :
17283 5538155 : if (TARGET_CMOVE)
17284 : return IX86_FPCMP_COMI;
17285 :
17286 0 : if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
17287 0 : return IX86_FPCMP_SAHF;
17288 :
17289 : return IX86_FPCMP_ARITH;
17290 : }
17291 :
17292 : /* Convert comparison codes we use to represent FP comparison to integer
17293 : code that will result in proper branch. Return UNKNOWN if no such code
17294 : is available. */
17295 :
17296 : enum rtx_code
17297 583971 : ix86_fp_compare_code_to_integer (enum rtx_code code)
17298 : {
17299 583971 : switch (code)
17300 : {
17301 : case GT:
17302 : return GTU;
17303 17870 : case GE:
17304 17870 : return GEU;
17305 : case ORDERED:
17306 : case UNORDERED:
17307 : return code;
17308 118836 : case UNEQ:
17309 118836 : return EQ;
17310 17368 : case UNLT:
17311 17368 : return LTU;
17312 31457 : case UNLE:
17313 31457 : return LEU;
17314 113455 : case LTGT:
17315 113455 : return NE;
17316 683 : case EQ:
17317 683 : case NE:
17318 683 : if (TARGET_AVX10_2)
17319 : return code;
17320 : /* FALLTHRU. */
17321 225 : default:
17322 225 : return UNKNOWN;
17323 : }
17324 : }
17325 :
17326 : /* Zero extend possibly SImode EXP to Pmode register. */
17327 : rtx
17328 45971 : ix86_zero_extend_to_Pmode (rtx exp)
17329 : {
17330 57867 : return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
17331 : }
17332 :
17333 : /* Return true if the function is called via PLT. */
17334 :
17335 : bool
17336 996153 : ix86_call_use_plt_p (rtx call_op)
17337 : {
17338 996153 : if (SYMBOL_REF_LOCAL_P (call_op))
17339 : {
17340 198677 : if (SYMBOL_REF_DECL (call_op)
17341 198677 : && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
17342 : {
17343 : /* NB: All ifunc functions must be called via PLT. */
17344 115449 : cgraph_node *node
17345 115449 : = cgraph_node::get (SYMBOL_REF_DECL (call_op));
17346 115449 : if (node && node->ifunc_resolver)
17347 : return true;
17348 : }
17349 198657 : return false;
17350 : }
17351 : return true;
17352 : }
17353 :
17354 : /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
17355 : the PLT entry will be used as the function address for local IFUNC
17356 : functions. When the PIC register is needed for PLT call, indirect
17357 : call via the PLT entry will fail since the PIC register may not be
17358 : set up properly for indirect call. In this case, we should return
17359 : false. */
17360 :
17361 : static bool
17362 770335642 : ix86_ifunc_ref_local_ok (void)
17363 : {
17364 770335642 : return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
17365 : }
17366 :
17367 : /* Return true if the function being called was marked with attribute
17368 : "noplt" or using -fno-plt and we are compiling for non-PIC. We need
17369 : to handle the non-PIC case in the backend because there is no easy
17370 : interface for the front-end to force non-PLT calls to use the GOT.
17371 : This is currently used only with 64-bit or 32-bit GOT32X ELF targets
17372 : to call the function marked "noplt" indirectly. */
17373 :
17374 : bool
17375 5925460 : ix86_nopic_noplt_attribute_p (rtx call_op)
17376 : {
17377 5433042 : if (flag_pic || ix86_cmodel == CM_LARGE
17378 : || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17379 : || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17380 11358502 : || SYMBOL_REF_LOCAL_P (call_op))
17381 : return false;
17382 :
17383 3806704 : tree symbol_decl = SYMBOL_REF_DECL (call_op);
17384 :
17385 3806704 : if (!flag_plt
17386 3806704 : || (symbol_decl != NULL_TREE
17387 3806672 : && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
17388 34 : return true;
17389 :
17390 : return false;
17391 : }
17392 :
17393 : /* Helper to output the jmp/call. */
17394 : static void
17395 33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17396 : {
17397 33 : if (thunk_name != NULL)
17398 : {
17399 22 : if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17400 1 : && ix86_indirect_branch_cs_prefix)
17401 1 : fprintf (asm_out_file, "\tcs\n");
17402 22 : fprintf (asm_out_file, "\tjmp\t");
17403 22 : assemble_name (asm_out_file, thunk_name);
17404 22 : putc ('\n', asm_out_file);
17405 22 : if ((ix86_harden_sls & harden_sls_indirect_jmp))
17406 2 : fputs ("\tint3\n", asm_out_file);
17407 : }
17408 : else
17409 11 : output_indirect_thunk (regno);
17410 33 : }
17411 :
17412 : /* Output indirect branch via a call and return thunk. CALL_OP is a
17413 : register which contains the branch target. XASM is the assembly
17414 : template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17415 : A normal call is converted to:
17416 :
17417 : call __x86_indirect_thunk_reg
17418 :
17419 : and a tail call is converted to:
17420 :
17421 : jmp __x86_indirect_thunk_reg
17422 : */
17423 :
17424 : static void
17425 50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17426 : {
17427 50 : char thunk_name_buf[32];
17428 50 : char *thunk_name;
17429 50 : enum indirect_thunk_prefix need_prefix
17430 50 : = indirect_thunk_need_prefix (current_output_insn);
17431 50 : int regno = REGNO (call_op);
17432 :
17433 50 : if (cfun->machine->indirect_branch_type
17434 50 : != indirect_branch_thunk_inline)
17435 : {
17436 39 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17437 16 : SET_HARD_REG_BIT (indirect_thunks_used, regno);
17438 :
17439 39 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17440 39 : thunk_name = thunk_name_buf;
17441 : }
17442 : else
17443 : thunk_name = NULL;
17444 :
17445 50 : if (sibcall_p)
17446 27 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17447 : else
17448 : {
17449 23 : if (thunk_name != NULL)
17450 : {
17451 17 : if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17452 1 : && ix86_indirect_branch_cs_prefix)
17453 1 : fprintf (asm_out_file, "\tcs\n");
17454 17 : fprintf (asm_out_file, "\tcall\t");
17455 17 : assemble_name (asm_out_file, thunk_name);
17456 17 : putc ('\n', asm_out_file);
17457 17 : return;
17458 : }
17459 :
17460 6 : char indirectlabel1[32];
17461 6 : char indirectlabel2[32];
17462 :
17463 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17464 : INDIRECT_LABEL,
17465 : indirectlabelno++);
17466 6 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17467 : INDIRECT_LABEL,
17468 : indirectlabelno++);
17469 :
17470 : /* Jump. */
17471 6 : fputs ("\tjmp\t", asm_out_file);
17472 6 : assemble_name_raw (asm_out_file, indirectlabel2);
17473 6 : fputc ('\n', asm_out_file);
17474 :
17475 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17476 :
17477 6 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17478 :
17479 6 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17480 :
17481 : /* Call. */
17482 6 : fputs ("\tcall\t", asm_out_file);
17483 6 : assemble_name_raw (asm_out_file, indirectlabel1);
17484 6 : fputc ('\n', asm_out_file);
17485 : }
17486 : }
17487 :
17488 : /* Output indirect branch via a call and return thunk. CALL_OP is
17489 : the branch target. XASM is the assembly template for CALL_OP.
17490 : Branch is a tail call if SIBCALL_P is true. A normal call is
17491 : converted to:
17492 :
17493 : jmp L2
17494 : L1:
17495 : push CALL_OP
17496 : jmp __x86_indirect_thunk
17497 : L2:
17498 : call L1
17499 :
17500 : and a tail call is converted to:
17501 :
17502 : push CALL_OP
17503 : jmp __x86_indirect_thunk
17504 : */
17505 :
17506 : static void
17507 0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17508 : bool sibcall_p)
17509 : {
17510 0 : char thunk_name_buf[32];
17511 0 : char *thunk_name;
17512 0 : char push_buf[64];
17513 0 : enum indirect_thunk_prefix need_prefix
17514 0 : = indirect_thunk_need_prefix (current_output_insn);
17515 0 : int regno = -1;
17516 :
17517 0 : if (cfun->machine->indirect_branch_type
17518 0 : != indirect_branch_thunk_inline)
17519 : {
17520 0 : if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17521 0 : indirect_thunk_needed = true;
17522 0 : indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
17523 0 : thunk_name = thunk_name_buf;
17524 : }
17525 : else
17526 : thunk_name = NULL;
17527 :
17528 0 : snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
17529 0 : TARGET_64BIT ? 'q' : 'l', xasm);
17530 :
17531 0 : if (sibcall_p)
17532 : {
17533 0 : output_asm_insn (push_buf, &call_op);
17534 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17535 : }
17536 : else
17537 : {
17538 0 : char indirectlabel1[32];
17539 0 : char indirectlabel2[32];
17540 :
17541 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17542 : INDIRECT_LABEL,
17543 : indirectlabelno++);
17544 0 : ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17545 : INDIRECT_LABEL,
17546 : indirectlabelno++);
17547 :
17548 : /* Jump. */
17549 0 : fputs ("\tjmp\t", asm_out_file);
17550 0 : assemble_name_raw (asm_out_file, indirectlabel2);
17551 0 : fputc ('\n', asm_out_file);
17552 :
17553 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17554 :
17555 : /* An external function may be called via GOT, instead of PLT. */
17556 0 : if (MEM_P (call_op))
17557 : {
17558 0 : struct ix86_address parts;
17559 0 : rtx addr = XEXP (call_op, 0);
17560 0 : if (ix86_decompose_address (addr, &parts)
17561 0 : && parts.base == stack_pointer_rtx)
17562 : {
17563 : /* Since call will adjust stack by -UNITS_PER_WORD,
17564 : we must convert "disp(stack, index, scale)" to
17565 : "disp+UNITS_PER_WORD(stack, index, scale)". */
17566 0 : if (parts.index)
17567 : {
17568 0 : addr = gen_rtx_MULT (Pmode, parts.index,
17569 : GEN_INT (parts.scale));
17570 0 : addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17571 : addr);
17572 : }
17573 : else
17574 : addr = stack_pointer_rtx;
17575 :
17576 0 : rtx disp;
17577 0 : if (parts.disp != NULL_RTX)
17578 0 : disp = plus_constant (Pmode, parts.disp,
17579 0 : UNITS_PER_WORD);
17580 : else
17581 0 : disp = GEN_INT (UNITS_PER_WORD);
17582 :
17583 0 : addr = gen_rtx_PLUS (Pmode, addr, disp);
17584 0 : call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17585 : }
17586 : }
17587 :
17588 0 : output_asm_insn (push_buf, &call_op);
17589 :
17590 0 : ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17591 :
17592 0 : ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17593 :
17594 : /* Call. */
17595 0 : fputs ("\tcall\t", asm_out_file);
17596 0 : assemble_name_raw (asm_out_file, indirectlabel1);
17597 0 : fputc ('\n', asm_out_file);
17598 : }
17599 0 : }
17600 :
17601 : /* Output indirect branch via a call and return thunk. CALL_OP is
17602 : the branch target. XASM is the assembly template for CALL_OP.
17603 : Branch is a tail call if SIBCALL_P is true. */
17604 :
17605 : static void
17606 50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
17607 : bool sibcall_p)
17608 : {
17609 50 : if (REG_P (call_op))
17610 50 : ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17611 : else
17612 0 : ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17613 50 : }
17614 :
17615 : /* Output indirect jump. CALL_OP is the jump target. */
17616 :
17617 : const char *
17618 7633 : ix86_output_indirect_jmp (rtx call_op)
17619 : {
17620 7633 : if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17621 : {
17622 : /* We can't have red-zone since "call" in the indirect thunk
17623 : pushes the return address onto stack, destroying red-zone. */
17624 4 : if (ix86_red_zone_used)
17625 0 : gcc_unreachable ();
17626 :
17627 4 : ix86_output_indirect_branch (call_op, "%0", true);
17628 : }
17629 : else
17630 7629 : output_asm_insn ("%!jmp\t%A0", &call_op);
17631 7633 : return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17632 : }
17633 :
17634 : /* Output return instrumentation for current function if needed. */
17635 :
17636 : static void
17637 1709774 : output_return_instrumentation (void)
17638 : {
17639 1709774 : if (ix86_instrument_return != instrument_return_none
17640 6 : && flag_fentry
17641 1709780 : && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17642 : {
17643 5 : if (ix86_flag_record_return)
17644 5 : fprintf (asm_out_file, "1:\n");
17645 5 : switch (ix86_instrument_return)
17646 : {
17647 2 : case instrument_return_call:
17648 2 : fprintf (asm_out_file, "\tcall\t__return__\n");
17649 2 : break;
17650 3 : case instrument_return_nop5:
17651 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17652 3 : fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17653 3 : break;
17654 : case instrument_return_none:
17655 : break;
17656 : }
17657 :
17658 5 : if (ix86_flag_record_return)
17659 : {
17660 5 : fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17661 5 : fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17662 5 : fprintf (asm_out_file, "\t.previous\n");
17663 : }
17664 : }
17665 1709774 : }
17666 :
17667 : /* Output function return. CALL_OP is the jump target. Add a REP
17668 : prefix to RET if LONG_P is true and function return is kept. */
17669 :
17670 : const char *
17671 1580582 : ix86_output_function_return (bool long_p)
17672 : {
17673 1580582 : output_return_instrumentation ();
17674 :
17675 1580582 : if (cfun->machine->function_return_type != indirect_branch_keep)
17676 : {
17677 17 : char thunk_name[32];
17678 17 : enum indirect_thunk_prefix need_prefix
17679 17 : = indirect_thunk_need_prefix (current_output_insn);
17680 :
17681 17 : if (cfun->machine->function_return_type
17682 17 : != indirect_branch_thunk_inline)
17683 : {
17684 12 : bool need_thunk = (cfun->machine->function_return_type
17685 : == indirect_branch_thunk);
17686 12 : indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17687 : true);
17688 12 : indirect_return_needed |= need_thunk;
17689 12 : fprintf (asm_out_file, "\tjmp\t");
17690 12 : assemble_name (asm_out_file, thunk_name);
17691 12 : putc ('\n', asm_out_file);
17692 : }
17693 : else
17694 5 : output_indirect_thunk (INVALID_REGNUM);
17695 :
17696 17 : return "";
17697 : }
17698 :
17699 3160641 : output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17700 1580565 : return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17701 : }
17702 :
17703 : /* Output indirect function return. RET_OP is the function return
17704 : target. */
17705 :
17706 : const char *
17707 17 : ix86_output_indirect_function_return (rtx ret_op)
17708 : {
17709 17 : if (cfun->machine->function_return_type != indirect_branch_keep)
17710 : {
17711 0 : char thunk_name[32];
17712 0 : enum indirect_thunk_prefix need_prefix
17713 0 : = indirect_thunk_need_prefix (current_output_insn);
17714 0 : unsigned int regno = REGNO (ret_op);
17715 0 : gcc_assert (regno == CX_REG);
17716 :
17717 0 : if (cfun->machine->function_return_type
17718 0 : != indirect_branch_thunk_inline)
17719 : {
17720 0 : bool need_thunk = (cfun->machine->function_return_type
17721 : == indirect_branch_thunk);
17722 0 : indirect_thunk_name (thunk_name, regno, need_prefix, true);
17723 :
17724 0 : if (need_thunk)
17725 : {
17726 0 : indirect_return_via_cx = true;
17727 0 : SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17728 : }
17729 0 : fprintf (asm_out_file, "\tjmp\t");
17730 0 : assemble_name (asm_out_file, thunk_name);
17731 0 : putc ('\n', asm_out_file);
17732 : }
17733 : else
17734 0 : output_indirect_thunk (regno);
17735 : }
17736 : else
17737 : {
17738 17 : output_asm_insn ("%!jmp\t%A0", &ret_op);
17739 17 : if (ix86_harden_sls & harden_sls_indirect_jmp)
17740 1 : fputs ("\tint3\n", asm_out_file);
17741 : }
17742 17 : return "";
17743 : }
17744 :
17745 : /* Output the assembly for a call instruction. */
17746 :
17747 : const char *
17748 6106824 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17749 : {
17750 6106824 : bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17751 6106824 : bool output_indirect_p
17752 : = (!TARGET_SEH
17753 6106824 : && cfun->machine->indirect_branch_type != indirect_branch_keep);
17754 6106824 : bool seh_nop_p = false;
17755 6106824 : const char *xasm;
17756 :
17757 6106824 : if (SIBLING_CALL_P (insn))
17758 : {
17759 129192 : output_return_instrumentation ();
17760 129192 : if (direct_p)
17761 : {
17762 119610 : if (ix86_nopic_noplt_attribute_p (call_op))
17763 : {
17764 4 : direct_p = false;
17765 4 : if (TARGET_64BIT)
17766 : {
17767 4 : if (output_indirect_p)
17768 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17769 : else
17770 4 : xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17771 : }
17772 : else
17773 : {
17774 0 : if (output_indirect_p)
17775 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17776 : else
17777 0 : xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17778 : }
17779 : }
17780 : else
17781 : xasm = "%!jmp\t%P0";
17782 : }
17783 : /* SEH epilogue detection requires the indirect branch case
17784 : to include REX.W. */
17785 9582 : else if (TARGET_SEH)
17786 : xasm = "%!rex.W jmp\t%A0";
17787 : else
17788 : {
17789 9582 : if (output_indirect_p)
17790 : xasm = "%0";
17791 : else
17792 9559 : xasm = "%!jmp\t%A0";
17793 : }
17794 :
17795 129192 : if (output_indirect_p && !direct_p)
17796 23 : ix86_output_indirect_branch (call_op, xasm, true);
17797 : else
17798 : {
17799 129169 : output_asm_insn (xasm, &call_op);
17800 129169 : if (!direct_p
17801 9563 : && (ix86_harden_sls & harden_sls_indirect_jmp))
17802 : return "int3";
17803 : }
17804 129191 : return "";
17805 : }
17806 :
17807 : /* SEH unwinding can require an extra nop to be emitted in several
17808 : circumstances. Determine if we have one of those. */
17809 5977632 : if (TARGET_SEH)
17810 : {
17811 : rtx_insn *i;
17812 :
17813 : for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17814 : {
17815 : /* Prevent a catch region from being adjacent to a jump that would
17816 : be interpreted as an epilogue sequence by the unwinder. */
17817 : if (JUMP_P(i) && CROSSING_JUMP_P (i))
17818 : {
17819 : seh_nop_p = true;
17820 : break;
17821 : }
17822 :
17823 : /* If we get to another real insn, we don't need the nop. */
17824 : if (INSN_P (i))
17825 : break;
17826 :
17827 : /* If we get to the epilogue note, prevent a catch region from
17828 : being adjacent to the standard epilogue sequence. Note that,
17829 : if non-call exceptions are enabled, we already did it during
17830 : epilogue expansion, or else, if the insn can throw internally,
17831 : we already did it during the reorg pass. */
17832 : if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17833 : && !flag_non_call_exceptions
17834 : && !can_throw_internal (insn))
17835 : {
17836 : seh_nop_p = true;
17837 : break;
17838 : }
17839 : }
17840 :
17841 : /* If we didn't find a real insn following the call, prevent the
17842 : unwinder from looking into the next function. */
17843 : if (i == NULL)
17844 : seh_nop_p = true;
17845 : }
17846 :
17847 5977632 : if (direct_p)
17848 : {
17849 5804828 : if (ix86_nopic_noplt_attribute_p (call_op))
17850 : {
17851 6 : direct_p = false;
17852 6 : if (TARGET_64BIT)
17853 : {
17854 6 : if (output_indirect_p)
17855 : xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17856 : else
17857 6 : xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17858 : }
17859 : else
17860 : {
17861 0 : if (output_indirect_p)
17862 : xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17863 : else
17864 0 : xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17865 : }
17866 : }
17867 : else
17868 : xasm = "%!call\t%P0";
17869 : }
17870 : else
17871 : {
17872 172804 : if (output_indirect_p)
17873 : xasm = "%0";
17874 : else
17875 172781 : xasm = "%!call\t%A0";
17876 : }
17877 :
17878 5977632 : if (output_indirect_p && !direct_p)
17879 23 : ix86_output_indirect_branch (call_op, xasm, false);
17880 : else
17881 5977609 : output_asm_insn (xasm, &call_op);
17882 :
17883 : if (seh_nop_p)
17884 : return "nop";
17885 :
17886 : return "";
17887 : }
17888 :
17889 : /* Return a MEM corresponding to a stack slot with mode MODE.
17890 : Allocate a new slot if necessary.
17891 :
17892 : The RTL for a function can have several slots available: N is
17893 : which slot to use. */
17894 :
17895 : rtx
17896 22370 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17897 : {
17898 22370 : struct stack_local_entry *s;
17899 :
17900 22370 : gcc_assert (n < MAX_386_STACK_LOCALS);
17901 :
17902 33733 : for (s = ix86_stack_locals; s; s = s->next)
17903 31150 : if (s->mode == mode && s->n == n)
17904 19787 : return validize_mem (copy_rtx (s->rtl));
17905 :
17906 2583 : int align = 0;
17907 : /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17908 : alignment with -m32 -mpreferred-stack-boundary=2. */
17909 2583 : if (mode == DImode
17910 329 : && !TARGET_64BIT
17911 329 : && n == SLOT_FLOATxFDI_387
17912 2912 : && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17913 : align = 32;
17914 2583 : s = ggc_alloc<stack_local_entry> ();
17915 2583 : s->n = n;
17916 2583 : s->mode = mode;
17917 5166 : s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17918 :
17919 2583 : s->next = ix86_stack_locals;
17920 2583 : ix86_stack_locals = s;
17921 2583 : return validize_mem (copy_rtx (s->rtl));
17922 : }
17923 :
17924 : static void
17925 1481478 : ix86_instantiate_decls (void)
17926 : {
17927 1481478 : struct stack_local_entry *s;
17928 :
17929 1481478 : for (s = ix86_stack_locals; s; s = s->next)
17930 0 : if (s->rtl != NULL_RTX)
17931 0 : instantiate_decl_rtl (s->rtl);
17932 1481478 : }
17933 :
17934 : /* Check whether x86 address PARTS is a pc-relative address. */
17935 :
17936 : bool
17937 27343073 : ix86_rip_relative_addr_p (struct ix86_address *parts)
17938 : {
17939 27343073 : rtx base, index, disp;
17940 :
17941 27343073 : base = parts->base;
17942 27343073 : index = parts->index;
17943 27343073 : disp = parts->disp;
17944 :
17945 27343073 : if (disp && !base && !index)
17946 : {
17947 25571187 : if (TARGET_64BIT)
17948 : {
17949 23906482 : rtx symbol = disp;
17950 :
17951 23906482 : if (GET_CODE (disp) == CONST)
17952 7848612 : symbol = XEXP (disp, 0);
17953 23906482 : if (GET_CODE (symbol) == PLUS
17954 7333810 : && CONST_INT_P (XEXP (symbol, 1)))
17955 7333810 : symbol = XEXP (symbol, 0);
17956 :
17957 23906482 : if (LABEL_REF_P (symbol)
17958 23898955 : || (SYMBOL_REF_P (symbol)
17959 22633741 : && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17960 25171696 : || (GET_CODE (symbol) == UNSPEC
17961 533703 : && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17962 : || XINT (symbol, 1) == UNSPEC_PCREL
17963 : || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17964 23147488 : return true;
17965 : }
17966 : }
17967 : return false;
17968 : }
17969 :
17970 : /* Calculate the length of the memory address in the instruction encoding.
17971 : Includes addr32 prefix, does not include the one-byte modrm, opcode,
17972 : or other prefixes. We never generate addr32 prefix for LEA insn. */
17973 :
17974 : int
17975 272086731 : memory_address_length (rtx addr, bool lea)
17976 : {
17977 272086731 : struct ix86_address parts;
17978 272086731 : rtx base, index, disp;
17979 272086731 : int len;
17980 272086731 : int ok;
17981 :
17982 272086731 : if (GET_CODE (addr) == PRE_DEC
17983 263522485 : || GET_CODE (addr) == POST_INC
17984 259011414 : || GET_CODE (addr) == PRE_MODIFY
17985 259011414 : || GET_CODE (addr) == POST_MODIFY)
17986 : return 0;
17987 :
17988 259011414 : ok = ix86_decompose_address (addr, &parts);
17989 259011414 : gcc_assert (ok);
17990 :
17991 259011414 : len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17992 :
17993 : /* If this is not LEA instruction, add the length of addr32 prefix. */
17994 220504876 : if (TARGET_64BIT && !lea
17995 454466819 : && (SImode_address_operand (addr, VOIDmode)
17996 195455252 : || (parts.base && GET_MODE (parts.base) == SImode)
17997 195445022 : || (parts.index && GET_MODE (parts.index) == SImode)))
17998 10383 : len++;
17999 :
18000 259011414 : base = parts.base;
18001 259011414 : index = parts.index;
18002 259011414 : disp = parts.disp;
18003 :
18004 259011414 : if (base && SUBREG_P (base))
18005 2 : base = SUBREG_REG (base);
18006 259011414 : if (index && SUBREG_P (index))
18007 0 : index = SUBREG_REG (index);
18008 :
18009 259011414 : gcc_assert (base == NULL_RTX || REG_P (base));
18010 259011414 : gcc_assert (index == NULL_RTX || REG_P (index));
18011 :
18012 : /* Rule of thumb:
18013 : - esp as the base always wants an index,
18014 : - ebp as the base always wants a displacement,
18015 : - r12 as the base always wants an index,
18016 : - r13 as the base always wants a displacement. */
18017 :
18018 : /* Register Indirect. */
18019 259011414 : if (base && !index && !disp)
18020 : {
18021 : /* esp (for its index) and ebp (for its displacement) need
18022 : the two-byte modrm form. Similarly for r12 and r13 in 64-bit
18023 : code. */
18024 16806469 : if (base == arg_pointer_rtx
18025 16806469 : || base == frame_pointer_rtx
18026 16806469 : || REGNO (base) == SP_REG
18027 10031527 : || REGNO (base) == BP_REG
18028 10031527 : || REGNO (base) == R12_REG
18029 26380625 : || REGNO (base) == R13_REG)
18030 7232313 : len++;
18031 : }
18032 :
18033 : /* Direct Addressing. In 64-bit mode mod 00 r/m 5
18034 : is not disp32, but disp32(%rip), so for disp32
18035 : SIB byte is needed, unless print_operand_address
18036 : optimizes it into disp32(%rip) or (%rip) is implied
18037 : by UNSPEC. */
18038 242204945 : else if (disp && !base && !index)
18039 : {
18040 24595615 : len += 4;
18041 24595615 : if (!ix86_rip_relative_addr_p (&parts))
18042 1850984 : len++;
18043 : }
18044 : else
18045 : {
18046 : /* Find the length of the displacement constant. */
18047 217609330 : if (disp)
18048 : {
18049 213530792 : if (base && satisfies_constraint_K (disp))
18050 123956006 : len += 1;
18051 : else
18052 89574786 : len += 4;
18053 : }
18054 : /* ebp always wants a displacement. Similarly r13. */
18055 4078538 : else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
18056 7909 : len++;
18057 :
18058 : /* An index requires the two-byte modrm form.... */
18059 217609330 : if (index
18060 : /* ...like esp (or r12), which always wants an index. */
18061 206783812 : || base == arg_pointer_rtx
18062 206783812 : || base == frame_pointer_rtx
18063 424393142 : || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
18064 155798873 : len++;
18065 : }
18066 :
18067 : return len;
18068 : }
18069 :
18070 : /* Compute default value for "length_immediate" attribute. When SHORTFORM
18071 : is set, expect that insn have 8bit immediate alternative. */
18072 : int
18073 317372903 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
18074 : {
18075 317372903 : int len = 0;
18076 317372903 : int i;
18077 317372903 : extract_insn_cached (insn);
18078 990246595 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18079 672873692 : if (CONSTANT_P (recog_data.operand[i]))
18080 : {
18081 139933145 : enum attr_mode mode = get_attr_mode (insn);
18082 :
18083 139933145 : gcc_assert (!len);
18084 139933145 : if (shortform && CONST_INT_P (recog_data.operand[i]))
18085 : {
18086 37492769 : HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
18087 37492769 : switch (mode)
18088 : {
18089 1274037 : case MODE_QI:
18090 1274037 : len = 1;
18091 1274037 : continue;
18092 437496 : case MODE_HI:
18093 437496 : ival = trunc_int_for_mode (ival, HImode);
18094 437496 : break;
18095 15884976 : case MODE_SI:
18096 15884976 : ival = trunc_int_for_mode (ival, SImode);
18097 15884976 : break;
18098 : default:
18099 : break;
18100 : }
18101 36218732 : if (IN_RANGE (ival, -128, 127))
18102 : {
18103 32121221 : len = 1;
18104 32121221 : continue;
18105 : }
18106 : }
18107 106537887 : switch (mode)
18108 : {
18109 : case MODE_QI:
18110 : len = 1;
18111 : break;
18112 : case MODE_HI:
18113 672873692 : len = 2;
18114 : break;
18115 : case MODE_SI:
18116 100686468 : len = 4;
18117 : break;
18118 : /* Immediates for DImode instructions are encoded
18119 : as 32bit sign extended values. */
18120 : case MODE_DI:
18121 100686468 : len = 4;
18122 : break;
18123 0 : default:
18124 0 : fatal_insn ("unknown insn mode", insn);
18125 : }
18126 : }
18127 317372903 : return len;
18128 : }
18129 :
18130 : /* Compute default value for "length_address" attribute. */
18131 : int
18132 445219408 : ix86_attr_length_address_default (rtx_insn *insn)
18133 : {
18134 445219408 : int i;
18135 :
18136 445219408 : if (get_attr_type (insn) == TYPE_LEA)
18137 : {
18138 27768778 : rtx set = PATTERN (insn), addr;
18139 :
18140 27768778 : if (GET_CODE (set) == PARALLEL)
18141 86753 : set = XVECEXP (set, 0, 0);
18142 :
18143 27768778 : gcc_assert (GET_CODE (set) == SET);
18144 :
18145 27768778 : addr = SET_SRC (set);
18146 :
18147 27768778 : return memory_address_length (addr, true);
18148 : }
18149 :
18150 417450630 : extract_insn_cached (insn);
18151 958266058 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18152 : {
18153 784847669 : rtx op = recog_data.operand[i];
18154 784847669 : if (MEM_P (op))
18155 : {
18156 244311252 : constrain_operands_cached (insn, reload_completed);
18157 244311252 : if (which_alternative != -1)
18158 : {
18159 244311252 : const char *constraints = recog_data.constraints[i];
18160 244311252 : int alt = which_alternative;
18161 :
18162 387598041 : while (*constraints == '=' || *constraints == '+')
18163 143286789 : constraints++;
18164 1111732793 : while (alt-- > 0)
18165 2127061670 : while (*constraints++ != ',')
18166 : ;
18167 : /* Skip ignored operands. */
18168 244311252 : if (*constraints == 'X')
18169 279011 : continue;
18170 : }
18171 :
18172 244032241 : int len = memory_address_length (XEXP (op, 0), false);
18173 :
18174 : /* Account for segment prefix for non-default addr spaces. */
18175 257419134 : if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
18176 783906 : len++;
18177 :
18178 244032241 : return len;
18179 : }
18180 : }
18181 : return 0;
18182 : }
18183 :
18184 : /* Compute default value for "length_vex" attribute. It includes
18185 : 2 or 3 byte VEX prefix and 1 opcode byte. */
18186 :
18187 : int
18188 5050849 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
18189 : bool has_vex_w)
18190 : {
18191 5050849 : int i, reg_only = 2 + 1;
18192 5050849 : bool has_mem = false;
18193 :
18194 : /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18195 : byte VEX prefix. */
18196 5050849 : if (!has_0f_opcode || has_vex_w)
18197 : return 3 + 1;
18198 :
18199 : /* We can always use 2 byte VEX prefix in 32bit. */
18200 4616094 : if (!TARGET_64BIT)
18201 : return 2 + 1;
18202 :
18203 3524662 : extract_insn_cached (insn);
18204 :
18205 10995720 : for (i = recog_data.n_operands - 1; i >= 0; --i)
18206 7777781 : if (REG_P (recog_data.operand[i]))
18207 : {
18208 : /* REX.W bit uses 3 byte VEX prefix.
18209 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18210 5124758 : if (GET_MODE (recog_data.operand[i]) == DImode
18211 5124758 : && GENERAL_REG_P (recog_data.operand[i]))
18212 : return 3 + 1;
18213 :
18214 : /* REX.B bit requires 3-byte VEX. Right here we don't know which
18215 : operand will be encoded using VEX.B, so be conservative.
18216 : REX2 with vex use extended EVEX prefix length is 4-byte. */
18217 5112670 : if (REX_INT_REGNO_P (recog_data.operand[i])
18218 5112670 : || REX2_INT_REGNO_P (recog_data.operand[i])
18219 5112670 : || REX_SSE_REGNO_P (recog_data.operand[i]))
18220 0 : reg_only = 3 + 1;
18221 : }
18222 2653023 : else if (MEM_P (recog_data.operand[i]))
18223 : {
18224 : /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
18225 2066312 : if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
18226 : return 4;
18227 :
18228 : /* REX.X or REX.B bits use 3 byte VEX prefix. */
18229 2066053 : if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
18230 : return 3 + 1;
18231 :
18232 : has_mem = true;
18233 : }
18234 :
18235 3217939 : return has_mem ? 2 + 1 : reg_only;
18236 : }
18237 :
18238 :
18239 : static bool
18240 : ix86_class_likely_spilled_p (reg_class_t);
18241 :
18242 : /* Returns true if lhs of insn is HW function argument register and set up
18243 : is_spilled to true if it is likely spilled HW register. */
18244 : static bool
18245 1149 : insn_is_function_arg (rtx insn, bool* is_spilled)
18246 : {
18247 1149 : rtx dst;
18248 :
18249 1149 : if (!NONDEBUG_INSN_P (insn))
18250 : return false;
18251 : /* Call instructions are not movable, ignore it. */
18252 1149 : if (CALL_P (insn))
18253 : return false;
18254 1075 : insn = PATTERN (insn);
18255 1075 : if (GET_CODE (insn) == PARALLEL)
18256 73 : insn = XVECEXP (insn, 0, 0);
18257 1075 : if (GET_CODE (insn) != SET)
18258 : return false;
18259 1075 : dst = SET_DEST (insn);
18260 979 : if (REG_P (dst) && HARD_REGISTER_P (dst)
18261 1948 : && ix86_function_arg_regno_p (REGNO (dst)))
18262 : {
18263 : /* Is it likely spilled HW register? */
18264 873 : if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
18265 873 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
18266 829 : *is_spilled = true;
18267 873 : return true;
18268 : }
18269 : return false;
18270 : }
18271 :
18272 : /* Add output dependencies for chain of function adjacent arguments if only
18273 : there is a move to likely spilled HW register. Return first argument
18274 : if at least one dependence was added or NULL otherwise. */
18275 : static rtx_insn *
18276 415 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
18277 : {
18278 415 : rtx_insn *insn;
18279 415 : rtx_insn *last = call;
18280 415 : rtx_insn *first_arg = NULL;
18281 415 : bool is_spilled = false;
18282 :
18283 415 : head = PREV_INSN (head);
18284 :
18285 : /* Find nearest to call argument passing instruction. */
18286 415 : while (true)
18287 : {
18288 415 : last = PREV_INSN (last);
18289 415 : if (last == head)
18290 : return NULL;
18291 415 : if (!NONDEBUG_INSN_P (last))
18292 0 : continue;
18293 415 : if (insn_is_function_arg (last, &is_spilled))
18294 : break;
18295 : return NULL;
18296 : }
18297 :
18298 : first_arg = last;
18299 1054 : while (true)
18300 : {
18301 1054 : insn = PREV_INSN (last);
18302 1054 : if (!INSN_P (insn))
18303 : break;
18304 956 : if (insn == head)
18305 : break;
18306 915 : if (!NONDEBUG_INSN_P (insn))
18307 : {
18308 181 : last = insn;
18309 181 : continue;
18310 : }
18311 734 : if (insn_is_function_arg (insn, &is_spilled))
18312 : {
18313 : /* Add output depdendence between two function arguments if chain
18314 : of output arguments contains likely spilled HW registers. */
18315 466 : if (is_spilled)
18316 466 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18317 : first_arg = last = insn;
18318 : }
18319 : else
18320 : break;
18321 : }
18322 407 : if (!is_spilled)
18323 : return NULL;
18324 : return first_arg;
18325 : }
18326 :
18327 : /* Add output or anti dependency from insn to first_arg to restrict its code
18328 : motion. */
18329 : static void
18330 2333 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
18331 : {
18332 2333 : rtx set;
18333 2333 : rtx tmp;
18334 :
18335 2333 : set = single_set (insn);
18336 2333 : if (!set)
18337 : return;
18338 1453 : tmp = SET_DEST (set);
18339 1453 : if (REG_P (tmp))
18340 : {
18341 : /* Add output dependency to the first function argument. */
18342 1258 : add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18343 1258 : return;
18344 : }
18345 : /* Add anti dependency. */
18346 195 : add_dependence (first_arg, insn, REG_DEP_ANTI);
18347 : }
18348 :
18349 : /* Avoid cross block motion of function argument through adding dependency
18350 : from the first non-jump instruction in bb. */
18351 : static void
18352 68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
18353 : {
18354 68 : rtx_insn *insn = BB_END (bb);
18355 :
18356 134 : while (insn)
18357 : {
18358 134 : if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
18359 : {
18360 67 : rtx set = single_set (insn);
18361 67 : if (set)
18362 : {
18363 67 : avoid_func_arg_motion (arg, insn);
18364 67 : return;
18365 : }
18366 : }
18367 67 : if (insn == BB_HEAD (bb))
18368 : return;
18369 66 : insn = PREV_INSN (insn);
18370 : }
18371 : }
18372 :
18373 : /* Hook for pre-reload schedule - avoid motion of function arguments
18374 : passed in likely spilled HW registers. */
18375 : static void
18376 10267590 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18377 : {
18378 10267590 : rtx_insn *insn;
18379 10267590 : rtx_insn *first_arg = NULL;
18380 10267590 : if (reload_completed)
18381 : return;
18382 2268 : while (head != tail && DEBUG_INSN_P (head))
18383 766 : head = NEXT_INSN (head);
18384 10883 : for (insn = tail; insn != head; insn = PREV_INSN (insn))
18385 9517 : if (INSN_P (insn) && CALL_P (insn))
18386 : {
18387 415 : first_arg = add_parameter_dependencies (insn, head);
18388 415 : if (first_arg)
18389 : {
18390 : /* Add dependee for first argument to predecessors if only
18391 : region contains more than one block. */
18392 407 : basic_block bb = BLOCK_FOR_INSN (insn);
18393 407 : int rgn = CONTAINING_RGN (bb->index);
18394 407 : int nr_blks = RGN_NR_BLOCKS (rgn);
18395 : /* Skip trivial regions and region head blocks that can have
18396 : predecessors outside of region. */
18397 407 : if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18398 : {
18399 67 : edge e;
18400 67 : edge_iterator ei;
18401 :
18402 : /* Regions are SCCs with the exception of selective
18403 : scheduling with pipelining of outer blocks enabled.
18404 : So also check that immediate predecessors of a non-head
18405 : block are in the same region. */
18406 137 : FOR_EACH_EDGE (e, ei, bb->preds)
18407 : {
18408 : /* Avoid creating of loop-carried dependencies through
18409 : using topological ordering in the region. */
18410 70 : if (rgn == CONTAINING_RGN (e->src->index)
18411 69 : && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18412 68 : add_dependee_for_func_arg (first_arg, e->src);
18413 : }
18414 : }
18415 407 : insn = first_arg;
18416 407 : if (insn == head)
18417 : break;
18418 : }
18419 : }
18420 9102 : else if (first_arg)
18421 2266 : avoid_func_arg_motion (first_arg, insn);
18422 : }
18423 :
18424 : /* Hook for pre-reload schedule - set priority of moves from likely spilled
18425 : HW registers to maximum, to schedule them at soon as possible. These are
18426 : moves from function argument registers at the top of the function entry
18427 : and moves from function return value registers after call. */
18428 : static int
18429 108819635 : ix86_adjust_priority (rtx_insn *insn, int priority)
18430 : {
18431 108819635 : rtx set;
18432 :
18433 108819635 : if (reload_completed)
18434 : return priority;
18435 :
18436 14748 : if (!NONDEBUG_INSN_P (insn))
18437 : return priority;
18438 :
18439 12690 : set = single_set (insn);
18440 12690 : if (set)
18441 : {
18442 12082 : rtx tmp = SET_SRC (set);
18443 12082 : if (REG_P (tmp)
18444 2532 : && HARD_REGISTER_P (tmp)
18445 500 : && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18446 12082 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18447 449 : return current_sched_info->sched_max_insns_priority;
18448 : }
18449 :
18450 : return priority;
18451 : }
18452 :
18453 : /* Prepare for scheduling pass. */
18454 : static void
18455 966549 : ix86_sched_init_global (FILE *, int, int)
18456 : {
18457 : /* Install scheduling hooks for current CPU. Some of these hooks are used
18458 : in time-critical parts of the scheduler, so we only set them up when
18459 : they are actually used. */
18460 966549 : switch (ix86_tune)
18461 : {
18462 920005 : case PROCESSOR_CORE2:
18463 920005 : case PROCESSOR_NEHALEM:
18464 920005 : case PROCESSOR_SANDYBRIDGE:
18465 920005 : case PROCESSOR_HASWELL:
18466 920005 : case PROCESSOR_TREMONT:
18467 920005 : case PROCESSOR_ALDERLAKE:
18468 920005 : case PROCESSOR_GENERIC:
18469 : /* Do not perform multipass scheduling for pre-reload schedule
18470 : to save compile time. */
18471 920005 : if (reload_completed)
18472 : {
18473 919518 : ix86_core2i7_init_hooks ();
18474 919518 : break;
18475 : }
18476 : /* Fall through. */
18477 47031 : default:
18478 47031 : targetm.sched.dfa_post_advance_cycle = NULL;
18479 47031 : targetm.sched.first_cycle_multipass_init = NULL;
18480 47031 : targetm.sched.first_cycle_multipass_begin = NULL;
18481 47031 : targetm.sched.first_cycle_multipass_issue = NULL;
18482 47031 : targetm.sched.first_cycle_multipass_backtrack = NULL;
18483 47031 : targetm.sched.first_cycle_multipass_end = NULL;
18484 47031 : targetm.sched.first_cycle_multipass_fini = NULL;
18485 47031 : break;
18486 : }
18487 966549 : }
18488 :
18489 :
18490 : /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18491 :
18492 : static HOST_WIDE_INT
18493 722836 : ix86_static_rtx_alignment (machine_mode mode)
18494 : {
18495 722836 : if (mode == DFmode)
18496 : return 64;
18497 : if (ALIGN_MODE_128 (mode))
18498 155308 : return MAX (128, GET_MODE_ALIGNMENT (mode));
18499 482507 : return GET_MODE_ALIGNMENT (mode);
18500 : }
18501 :
18502 : /* Implement TARGET_CONSTANT_ALIGNMENT. */
18503 :
18504 : static HOST_WIDE_INT
18505 6872667 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18506 : {
18507 6872667 : if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18508 : || TREE_CODE (exp) == INTEGER_CST)
18509 : {
18510 368397 : machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18511 368397 : HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18512 368397 : return MAX (mode_align, align);
18513 : }
18514 6362450 : else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18515 9599673 : && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18516 : return BITS_PER_WORD;
18517 :
18518 : return align;
18519 : }
18520 :
18521 : /* Implement TARGET_EMPTY_RECORD_P. */
18522 :
18523 : static bool
18524 1444410249 : ix86_is_empty_record (const_tree type)
18525 : {
18526 1444410249 : if (!TARGET_64BIT)
18527 : return false;
18528 1413506134 : return default_is_empty_record (type);
18529 : }
18530 :
18531 : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18532 :
18533 : static void
18534 15211455 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18535 : {
18536 15211455 : CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
18537 :
18538 15211455 : if (!cum->warn_empty)
18539 : return;
18540 :
18541 13041017 : if (!TYPE_EMPTY_P (type))
18542 : return;
18543 :
18544 : /* Don't warn if the function isn't visible outside of the TU. */
18545 14636 : if (cum->decl && !TREE_PUBLIC (cum->decl))
18546 : return;
18547 :
18548 13188 : tree decl = cum->decl;
18549 13188 : if (!decl)
18550 : /* If we don't know the target, look at the current TU. */
18551 39 : decl = current_function_decl;
18552 :
18553 13188 : const_tree ctx = get_ultimate_context (decl);
18554 13188 : if (ctx == NULL_TREE
18555 26342 : || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18556 : return;
18557 :
18558 : /* If the actual size of the type is zero, then there is no change
18559 : in how objects of this size are passed. */
18560 72 : if (int_size_in_bytes (type) == 0)
18561 : return;
18562 :
18563 66 : warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18564 : "changes in %<-fabi-version=12%> (GCC 8)", type);
18565 :
18566 : /* Only warn once. */
18567 66 : cum->warn_empty = false;
18568 : }
18569 :
18570 : /* This hook returns name of multilib ABI. */
18571 :
18572 : static const char *
18573 3407835 : ix86_get_multilib_abi_name (void)
18574 : {
18575 3407835 : if (!(TARGET_64BIT_P (ix86_isa_flags)))
18576 : return "i386";
18577 3363879 : else if (TARGET_X32_P (ix86_isa_flags))
18578 : return "x32";
18579 : else
18580 3363879 : return "x86_64";
18581 : }
18582 :
18583 : /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18584 : the data type, and ALIGN is the alignment that the object would
18585 : ordinarily have. */
18586 :
18587 : static int
18588 0 : iamcu_alignment (tree type, int align)
18589 : {
18590 0 : machine_mode mode;
18591 :
18592 0 : if (align < 32 || TYPE_USER_ALIGN (type))
18593 : return align;
18594 :
18595 : /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18596 : bytes. */
18597 0 : type = strip_array_types (type);
18598 0 : if (TYPE_ATOMIC (type))
18599 : return align;
18600 :
18601 0 : mode = TYPE_MODE (type);
18602 0 : switch (GET_MODE_CLASS (mode))
18603 : {
18604 : case MODE_INT:
18605 : case MODE_COMPLEX_INT:
18606 : case MODE_COMPLEX_FLOAT:
18607 : case MODE_FLOAT:
18608 : case MODE_DECIMAL_FLOAT:
18609 : return 32;
18610 : default:
18611 : return align;
18612 : }
18613 : }
18614 :
18615 : /* Compute the alignment for a static variable.
18616 : TYPE is the data type, and ALIGN is the alignment that
18617 : the object would ordinarily have. The value of this function is used
18618 : instead of that alignment to align the object. */
18619 :
18620 : int
18621 12041939 : ix86_data_alignment (tree type, unsigned int align, bool opt)
18622 : {
18623 : /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18624 : for symbols from other compilation units or symbols that don't need
18625 : to bind locally. In order to preserve some ABI compatibility with
18626 : those compilers, ensure we don't decrease alignment from what we
18627 : used to assume. */
18628 :
18629 12041939 : unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18630 :
18631 : /* A data structure, equal or greater than the size of a cache line
18632 : (64 bytes in the Pentium 4 and other recent Intel processors, including
18633 : processors based on Intel Core microarchitecture) should be aligned
18634 : so that its base address is a multiple of a cache line size. */
18635 :
18636 24083878 : unsigned int max_align
18637 12041939 : = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18638 :
18639 14631717 : if (max_align < BITS_PER_WORD)
18640 0 : max_align = BITS_PER_WORD;
18641 :
18642 12041939 : switch (ix86_align_data_type)
18643 : {
18644 12041939 : case ix86_align_data_type_abi: opt = false; break;
18645 12041919 : case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18646 : case ix86_align_data_type_cacheline: break;
18647 : }
18648 :
18649 12041939 : if (TARGET_IAMCU)
18650 0 : align = iamcu_alignment (type, align);
18651 :
18652 12041939 : if (opt
18653 5794613 : && AGGREGATE_TYPE_P (type)
18654 3716409 : && TYPE_SIZE (type)
18655 15758296 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18656 : {
18657 6731441 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18658 3716357 : && align < max_align_compat)
18659 701273 : align = max_align_compat;
18660 7369909 : if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18661 3716357 : && align < max_align)
18662 62805 : align = max_align;
18663 : }
18664 :
18665 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18666 : to 16byte boundary. */
18667 12041939 : if (TARGET_64BIT)
18668 : {
18669 4951679 : if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18670 3264940 : && TYPE_SIZE (type)
18671 3264878 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18672 10887676 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18673 11503014 : && align < 128)
18674 615338 : return 128;
18675 : }
18676 :
18677 11426601 : if (!opt)
18678 6052168 : return align;
18679 :
18680 5374433 : if (TREE_CODE (type) == ARRAY_TYPE)
18681 : {
18682 1102403 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18683 : return 64;
18684 1102403 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18685 : return 128;
18686 : }
18687 4272030 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18688 : {
18689 :
18690 12972 : if (TYPE_MODE (type) == DCmode && align < 64)
18691 : return 64;
18692 12972 : if ((TYPE_MODE (type) == XCmode
18693 12972 : || TYPE_MODE (type) == TCmode) && align < 128)
18694 : return 128;
18695 : }
18696 4259058 : else if (RECORD_OR_UNION_TYPE_P (type)
18697 4259058 : && TYPE_FIELDS (type))
18698 : {
18699 2192083 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18700 : return 64;
18701 2192083 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18702 : return 128;
18703 : }
18704 2066975 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18705 : || TREE_CODE (type) == INTEGER_TYPE)
18706 : {
18707 1918494 : if (TYPE_MODE (type) == DFmode && align < 64)
18708 : return 64;
18709 1918494 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18710 : return 128;
18711 : }
18712 :
18713 5374320 : return align;
18714 : }
18715 :
18716 : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18717 : static void
18718 31539727 : ix86_lower_local_decl_alignment (tree decl)
18719 : {
18720 31539727 : unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18721 31539727 : DECL_ALIGN (decl), true);
18722 31539727 : if (new_align < DECL_ALIGN (decl))
18723 0 : SET_DECL_ALIGN (decl, new_align);
18724 31539727 : }
18725 :
18726 : /* Compute the alignment for a local variable or a stack slot. EXP is
18727 : the data type or decl itself, MODE is the widest mode available and
18728 : ALIGN is the alignment that the object would ordinarily have. The
18729 : value of this macro is used instead of that alignment to align the
18730 : object. */
18731 :
18732 : unsigned int
18733 49009384 : ix86_local_alignment (tree exp, machine_mode mode,
18734 : unsigned int align, bool may_lower)
18735 : {
18736 49009384 : tree type, decl;
18737 :
18738 49009384 : if (exp && DECL_P (exp))
18739 : {
18740 46852850 : type = TREE_TYPE (exp);
18741 46852850 : decl = exp;
18742 : }
18743 : else
18744 : {
18745 : type = exp;
18746 : decl = NULL;
18747 : }
18748 :
18749 : /* Don't do dynamic stack realignment for long long objects with
18750 : -mpreferred-stack-boundary=2. */
18751 49009384 : if (may_lower
18752 31539727 : && !TARGET_64BIT
18753 249231 : && align == 64
18754 38964 : && ix86_preferred_stack_boundary < 64
18755 0 : && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18756 0 : && (!type || (!TYPE_USER_ALIGN (type)
18757 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18758 49009384 : && (!decl || !DECL_USER_ALIGN (decl)))
18759 : align = 32;
18760 :
18761 : /* If TYPE is NULL, we are allocating a stack slot for caller-save
18762 : register in MODE. We will return the largest alignment of XF
18763 : and DF. */
18764 49009384 : if (!type)
18765 : {
18766 1408878 : if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18767 1479 : align = GET_MODE_ALIGNMENT (DFmode);
18768 1408878 : return align;
18769 : }
18770 :
18771 : /* Don't increase alignment for Intel MCU psABI. */
18772 47600506 : if (TARGET_IAMCU)
18773 : return align;
18774 :
18775 : /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18776 : to 16byte boundary. Exact wording is:
18777 :
18778 : An array uses the same alignment as its elements, except that a local or
18779 : global array variable of length at least 16 bytes or
18780 : a C99 variable-length array variable always has alignment of at least 16 bytes.
18781 :
18782 : This was added to allow use of aligned SSE instructions at arrays. This
18783 : rule is meant for static storage (where compiler cannot do the analysis
18784 : by itself). We follow it for automatic variables only when convenient.
18785 : We fully control everything in the function compiled and functions from
18786 : other unit cannot rely on the alignment.
18787 :
18788 : Exclude va_list type. It is the common case of local array where
18789 : we cannot benefit from the alignment.
18790 :
18791 : TODO: Probably one should optimize for size only when var is not escaping. */
18792 44766247 : if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18793 92011320 : && TARGET_SSE)
18794 : {
18795 44371143 : if (AGGREGATE_TYPE_P (type)
18796 9350305 : && (va_list_type_node == NULL_TREE
18797 9350305 : || (TYPE_MAIN_VARIANT (type)
18798 9350305 : != TYPE_MAIN_VARIANT (va_list_type_node)))
18799 9251674 : && TYPE_SIZE (type)
18800 9251674 : && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18801 45438313 : && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18802 51209506 : && align < 128)
18803 5771193 : return 128;
18804 : }
18805 41829313 : if (TREE_CODE (type) == ARRAY_TYPE)
18806 : {
18807 793242 : if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18808 : return 64;
18809 793242 : if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18810 : return 128;
18811 : }
18812 41036071 : else if (TREE_CODE (type) == COMPLEX_TYPE)
18813 : {
18814 154326 : if (TYPE_MODE (type) == DCmode && align < 64)
18815 : return 64;
18816 154326 : if ((TYPE_MODE (type) == XCmode
18817 154326 : || TYPE_MODE (type) == TCmode) && align < 128)
18818 : return 128;
18819 : }
18820 40881745 : else if (RECORD_OR_UNION_TYPE_P (type)
18821 40881745 : && TYPE_FIELDS (type))
18822 : {
18823 4760068 : if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18824 : return 64;
18825 4756963 : if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18826 : return 128;
18827 : }
18828 36121677 : else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18829 : || TREE_CODE (type) == INTEGER_TYPE)
18830 : {
18831 :
18832 29774278 : if (TYPE_MODE (type) == DFmode && align < 64)
18833 : return 64;
18834 29774278 : if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18835 : return 128;
18836 : }
18837 : return align;
18838 : }
18839 :
18840 : /* Compute the minimum required alignment for dynamic stack realignment
18841 : purposes for a local variable, parameter or a stack slot. EXP is
18842 : the data type or decl itself, MODE is its mode and ALIGN is the
18843 : alignment that the object would ordinarily have. */
18844 :
18845 : unsigned int
18846 47929092 : ix86_minimum_alignment (tree exp, machine_mode mode,
18847 : unsigned int align)
18848 : {
18849 47929092 : tree type, decl;
18850 :
18851 47929092 : if (exp && DECL_P (exp))
18852 : {
18853 15128780 : type = TREE_TYPE (exp);
18854 15128780 : decl = exp;
18855 : }
18856 : else
18857 : {
18858 : type = exp;
18859 : decl = NULL;
18860 : }
18861 :
18862 47929092 : if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18863 : return align;
18864 :
18865 : /* Don't do dynamic stack realignment for long long objects with
18866 : -mpreferred-stack-boundary=2. */
18867 0 : if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18868 0 : && (!type || (!TYPE_USER_ALIGN (type)
18869 0 : && !TYPE_ATOMIC (strip_array_types (type))))
18870 0 : && (!decl || !DECL_USER_ALIGN (decl)))
18871 : {
18872 0 : gcc_checking_assert (!TARGET_STV);
18873 : return 32;
18874 : }
18875 :
18876 : return align;
18877 : }
18878 :
18879 : /* Find a location for the static chain incoming to a nested function.
18880 : This is a register, unless all free registers are used by arguments. */
18881 :
18882 : static rtx
18883 269897 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18884 : {
18885 269897 : unsigned regno;
18886 :
18887 269897 : if (TARGET_64BIT)
18888 : {
18889 : /* We always use R10 in 64-bit mode. */
18890 : regno = R10_REG;
18891 : }
18892 : else
18893 : {
18894 88535 : const_tree fntype, fndecl;
18895 88535 : unsigned int ccvt;
18896 :
18897 : /* By default in 32-bit mode we use ECX to pass the static chain. */
18898 88535 : regno = CX_REG;
18899 :
18900 88535 : if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18901 : {
18902 78559 : fntype = TREE_TYPE (fndecl_or_type);
18903 78559 : fndecl = fndecl_or_type;
18904 : }
18905 : else
18906 : {
18907 : fntype = fndecl_or_type;
18908 : fndecl = NULL;
18909 : }
18910 :
18911 88535 : ccvt = ix86_get_callcvt (fntype);
18912 88535 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18913 : {
18914 : /* Fastcall functions use ecx/edx for arguments, which leaves
18915 : us with EAX for the static chain.
18916 : Thiscall functions use ecx for arguments, which also
18917 : leaves us with EAX for the static chain. */
18918 : regno = AX_REG;
18919 : }
18920 88535 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18921 : {
18922 : /* Thiscall functions use ecx for arguments, which leaves
18923 : us with EAX and EDX for the static chain.
18924 : We are using for abi-compatibility EAX. */
18925 : regno = AX_REG;
18926 : }
18927 88535 : else if (ix86_function_regparm (fntype, fndecl) == 3)
18928 : {
18929 : /* For regparm 3, we have no free call-clobbered registers in
18930 : which to store the static chain. In order to implement this,
18931 : we have the trampoline push the static chain to the stack.
18932 : However, we can't push a value below the return address when
18933 : we call the nested function directly, so we have to use an
18934 : alternate entry point. For this we use ESI, and have the
18935 : alternate entry point push ESI, so that things appear the
18936 : same once we're executing the nested function. */
18937 0 : if (incoming_p)
18938 : {
18939 0 : if (fndecl == current_function_decl
18940 0 : && !ix86_static_chain_on_stack)
18941 : {
18942 0 : gcc_assert (!reload_completed);
18943 0 : ix86_static_chain_on_stack = true;
18944 : }
18945 0 : return gen_frame_mem (SImode,
18946 0 : plus_constant (Pmode,
18947 : arg_pointer_rtx, -8));
18948 : }
18949 : regno = SI_REG;
18950 : }
18951 : }
18952 :
18953 358445 : return gen_rtx_REG (Pmode, regno);
18954 : }
18955 :
18956 : /* Emit RTL insns to initialize the variable parts of a trampoline.
18957 : FNDECL is the decl of the target address; M_TRAMP is a MEM for
18958 : the trampoline, and CHAIN_VALUE is an RTX for the static chain
18959 : to be passed to the target function. */
18960 :
18961 : static void
18962 303 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18963 : {
18964 303 : rtx mem, fnaddr;
18965 303 : int opcode;
18966 303 : int offset = 0;
18967 303 : bool need_endbr = (flag_cf_protection & CF_BRANCH);
18968 :
18969 303 : fnaddr = XEXP (DECL_RTL (fndecl), 0);
18970 :
18971 303 : if (TARGET_64BIT)
18972 : {
18973 303 : int size;
18974 :
18975 303 : if (need_endbr)
18976 : {
18977 : /* Insert ENDBR64. */
18978 1 : mem = adjust_address (m_tramp, SImode, offset);
18979 1 : emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18980 1 : offset += 4;
18981 : }
18982 :
18983 : /* Load the function address to r11. Try to load address using
18984 : the shorter movl instead of movabs. We may want to support
18985 : movq for kernel mode, but kernel does not use trampolines at
18986 : the moment. FNADDR is a 32bit address and may not be in
18987 : DImode when ptr_mode == SImode. Always use movl in this
18988 : case. */
18989 303 : if (ptr_mode == SImode
18990 303 : || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18991 : {
18992 271 : fnaddr = copy_addr_to_reg (fnaddr);
18993 :
18994 271 : mem = adjust_address (m_tramp, HImode, offset);
18995 271 : emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18996 :
18997 271 : mem = adjust_address (m_tramp, SImode, offset + 2);
18998 271 : emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18999 271 : offset += 6;
19000 : }
19001 : else
19002 : {
19003 32 : mem = adjust_address (m_tramp, HImode, offset);
19004 32 : emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
19005 :
19006 32 : mem = adjust_address (m_tramp, DImode, offset + 2);
19007 32 : emit_move_insn (mem, fnaddr);
19008 32 : offset += 10;
19009 : }
19010 :
19011 : /* Load static chain using movabs to r10. Use the shorter movl
19012 : instead of movabs when ptr_mode == SImode. */
19013 303 : if (ptr_mode == SImode)
19014 : {
19015 : opcode = 0xba41;
19016 : size = 6;
19017 : }
19018 : else
19019 : {
19020 303 : opcode = 0xba49;
19021 303 : size = 10;
19022 : }
19023 :
19024 303 : mem = adjust_address (m_tramp, HImode, offset);
19025 303 : emit_move_insn (mem, gen_int_mode (opcode, HImode));
19026 :
19027 303 : mem = adjust_address (m_tramp, ptr_mode, offset + 2);
19028 303 : emit_move_insn (mem, chain_value);
19029 303 : offset += size;
19030 :
19031 : /* Jump to r11; the last (unused) byte is a nop, only there to
19032 : pad the write out to a single 32-bit store. */
19033 303 : mem = adjust_address (m_tramp, SImode, offset);
19034 303 : emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
19035 303 : offset += 4;
19036 : }
19037 : else
19038 : {
19039 0 : rtx disp, chain;
19040 :
19041 : /* Depending on the static chain location, either load a register
19042 : with a constant, or push the constant to the stack. All of the
19043 : instructions are the same size. */
19044 0 : chain = ix86_static_chain (fndecl, true);
19045 0 : if (REG_P (chain))
19046 : {
19047 0 : switch (REGNO (chain))
19048 : {
19049 : case AX_REG:
19050 : opcode = 0xb8; break;
19051 0 : case CX_REG:
19052 0 : opcode = 0xb9; break;
19053 0 : default:
19054 0 : gcc_unreachable ();
19055 : }
19056 : }
19057 : else
19058 : opcode = 0x68;
19059 :
19060 0 : if (need_endbr)
19061 : {
19062 : /* Insert ENDBR32. */
19063 0 : mem = adjust_address (m_tramp, SImode, offset);
19064 0 : emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
19065 0 : offset += 4;
19066 : }
19067 :
19068 0 : mem = adjust_address (m_tramp, QImode, offset);
19069 0 : emit_move_insn (mem, gen_int_mode (opcode, QImode));
19070 :
19071 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19072 0 : emit_move_insn (mem, chain_value);
19073 0 : offset += 5;
19074 :
19075 0 : mem = adjust_address (m_tramp, QImode, offset);
19076 0 : emit_move_insn (mem, gen_int_mode (0xe9, QImode));
19077 :
19078 0 : mem = adjust_address (m_tramp, SImode, offset + 1);
19079 :
19080 : /* Compute offset from the end of the jmp to the target function.
19081 : In the case in which the trampoline stores the static chain on
19082 : the stack, we need to skip the first insn which pushes the
19083 : (call-saved) register static chain; this push is 1 byte. */
19084 0 : offset += 5;
19085 0 : int skip = MEM_P (chain) ? 1 : 0;
19086 : /* Skip ENDBR32 at the entry of the target function. */
19087 0 : if (need_endbr
19088 0 : && !cgraph_node::get (fndecl)->only_called_directly_p ())
19089 0 : skip += 4;
19090 0 : disp = expand_binop (SImode, sub_optab, fnaddr,
19091 0 : plus_constant (Pmode, XEXP (m_tramp, 0),
19092 0 : offset - skip),
19093 : NULL_RTX, 1, OPTAB_DIRECT);
19094 0 : emit_move_insn (mem, disp);
19095 : }
19096 :
19097 303 : gcc_assert (offset <= TRAMPOLINE_SIZE);
19098 :
19099 : #ifdef HAVE_ENABLE_EXECUTE_STACK
19100 : #ifdef CHECK_EXECUTE_STACK_ENABLED
19101 : if (CHECK_EXECUTE_STACK_ENABLED)
19102 : #endif
19103 : emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19104 : LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
19105 : #endif
19106 303 : }
19107 :
19108 : static bool
19109 54047059 : ix86_allocate_stack_slots_for_args (void)
19110 : {
19111 : /* Naked functions should not allocate stack slots for arguments. */
19112 54047059 : return !ix86_function_naked (current_function_decl);
19113 : }
19114 :
19115 : static bool
19116 37899932 : ix86_warn_func_return (tree decl)
19117 : {
19118 : /* Naked functions are implemented entirely in assembly, including the
19119 : return sequence, so suppress warnings about this. */
19120 37899932 : return !ix86_function_naked (decl);
19121 : }
19122 :
19123 : /* Return the shift count of a vector by scalar shift builtin second argument
19124 : ARG1. */
19125 : static tree
19126 14142 : ix86_vector_shift_count (tree arg1)
19127 : {
19128 14142 : if (tree_fits_uhwi_p (arg1))
19129 : return arg1;
19130 8316 : else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
19131 : {
19132 : /* The count argument is weird, passed in as various 128-bit
19133 : (or 64-bit) vectors, the low 64 bits from it are the count. */
19134 162 : unsigned char buf[16];
19135 162 : int len = native_encode_expr (arg1, buf, 16);
19136 162 : if (len == 0)
19137 162 : return NULL_TREE;
19138 162 : tree t = native_interpret_expr (uint64_type_node, buf, len);
19139 162 : if (t && tree_fits_uhwi_p (t))
19140 : return t;
19141 : }
19142 : return NULL_TREE;
19143 : }
19144 :
19145 : /* Return true if arg_mask is all ones, ELEMS is elements number of
19146 : corresponding vector. */
19147 : static bool
19148 25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
19149 : {
19150 25042 : if (TREE_CODE (arg_mask) != INTEGER_CST)
19151 : return false;
19152 :
19153 7462 : unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
19154 7462 : if (elems == HOST_BITS_PER_WIDE_INT)
19155 33 : return mask == HOST_WIDE_INT_M1U;
19156 7429 : if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
19157 2681 : return false;
19158 :
19159 : return true;
19160 : }
19161 :
19162 : static tree
19163 68146688 : ix86_fold_builtin (tree fndecl, int n_args,
19164 : tree *args, bool ignore ATTRIBUTE_UNUSED)
19165 : {
19166 68146688 : if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
19167 : {
19168 68146688 : enum ix86_builtins fn_code
19169 68146688 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19170 68146688 : enum rtx_code rcode;
19171 68146688 : bool is_vshift;
19172 68146688 : enum tree_code tcode;
19173 68146688 : bool is_scalar;
19174 68146688 : unsigned HOST_WIDE_INT mask;
19175 :
19176 68146688 : switch (fn_code)
19177 : {
19178 8883 : case IX86_BUILTIN_CPU_IS:
19179 8883 : case IX86_BUILTIN_CPU_SUPPORTS:
19180 8883 : gcc_assert (n_args == 1);
19181 8883 : return fold_builtin_cpu (fndecl, args);
19182 :
19183 24859 : case IX86_BUILTIN_NANQ:
19184 24859 : case IX86_BUILTIN_NANSQ:
19185 24859 : {
19186 24859 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19187 24859 : const char *str = c_getstr (*args);
19188 24859 : int quiet = fn_code == IX86_BUILTIN_NANQ;
19189 24859 : REAL_VALUE_TYPE real;
19190 :
19191 24859 : if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
19192 24859 : return build_real (type, real);
19193 0 : return NULL_TREE;
19194 : }
19195 :
19196 108 : case IX86_BUILTIN_INFQ:
19197 108 : case IX86_BUILTIN_HUGE_VALQ:
19198 108 : {
19199 108 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19200 108 : REAL_VALUE_TYPE inf;
19201 108 : real_inf (&inf);
19202 108 : return build_real (type, inf);
19203 : }
19204 :
19205 62447 : case IX86_BUILTIN_TZCNT16:
19206 62447 : case IX86_BUILTIN_CTZS:
19207 62447 : case IX86_BUILTIN_TZCNT32:
19208 62447 : case IX86_BUILTIN_TZCNT64:
19209 62447 : gcc_assert (n_args == 1);
19210 62447 : if (TREE_CODE (args[0]) == INTEGER_CST)
19211 : {
19212 45 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19213 45 : tree arg = args[0];
19214 45 : if (fn_code == IX86_BUILTIN_TZCNT16
19215 45 : || fn_code == IX86_BUILTIN_CTZS)
19216 3 : arg = fold_convert (short_unsigned_type_node, arg);
19217 45 : if (integer_zerop (arg))
19218 6 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19219 : else
19220 39 : return fold_const_call (CFN_CTZ, type, arg);
19221 : }
19222 : break;
19223 :
19224 52002 : case IX86_BUILTIN_LZCNT16:
19225 52002 : case IX86_BUILTIN_CLZS:
19226 52002 : case IX86_BUILTIN_LZCNT32:
19227 52002 : case IX86_BUILTIN_LZCNT64:
19228 52002 : gcc_assert (n_args == 1);
19229 52002 : if (TREE_CODE (args[0]) == INTEGER_CST)
19230 : {
19231 54 : tree type = TREE_TYPE (TREE_TYPE (fndecl));
19232 54 : tree arg = args[0];
19233 54 : if (fn_code == IX86_BUILTIN_LZCNT16
19234 54 : || fn_code == IX86_BUILTIN_CLZS)
19235 18 : arg = fold_convert (short_unsigned_type_node, arg);
19236 54 : if (integer_zerop (arg))
19237 3 : return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19238 : else
19239 51 : return fold_const_call (CFN_CLZ, type, arg);
19240 : }
19241 : break;
19242 :
19243 61227 : case IX86_BUILTIN_BEXTR32:
19244 61227 : case IX86_BUILTIN_BEXTR64:
19245 61227 : case IX86_BUILTIN_BEXTRI32:
19246 61227 : case IX86_BUILTIN_BEXTRI64:
19247 61227 : gcc_assert (n_args == 2);
19248 61227 : if (tree_fits_uhwi_p (args[1]))
19249 : {
19250 152 : unsigned HOST_WIDE_INT res = 0;
19251 152 : unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
19252 152 : unsigned int start = tree_to_uhwi (args[1]);
19253 152 : unsigned int len = (start & 0xff00) >> 8;
19254 152 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19255 152 : start &= 0xff;
19256 152 : if (start >= prec || len == 0)
19257 111 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19258 : args[0]);
19259 41 : else if (!tree_fits_uhwi_p (args[0]))
19260 : break;
19261 : else
19262 24 : res = tree_to_uhwi (args[0]) >> start;
19263 24 : if (len > prec)
19264 : len = prec;
19265 24 : if (len < HOST_BITS_PER_WIDE_INT)
19266 15 : res &= (HOST_WIDE_INT_1U << len) - 1;
19267 24 : return build_int_cstu (lhs_type, res);
19268 : }
19269 : break;
19270 :
19271 21034 : case IX86_BUILTIN_BZHI32:
19272 21034 : case IX86_BUILTIN_BZHI64:
19273 21034 : gcc_assert (n_args == 2);
19274 21034 : if (tree_fits_uhwi_p (args[1]))
19275 : {
19276 190 : unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
19277 190 : tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19278 190 : if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
19279 : return args[0];
19280 190 : if (idx == 0)
19281 52 : return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19282 : args[0]);
19283 138 : if (!tree_fits_uhwi_p (args[0]))
19284 : break;
19285 12 : unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
19286 12 : res &= ~(HOST_WIDE_INT_M1U << idx);
19287 12 : return build_int_cstu (lhs_type, res);
19288 : }
19289 : break;
19290 :
19291 20792 : case IX86_BUILTIN_PDEP32:
19292 20792 : case IX86_BUILTIN_PDEP64:
19293 20792 : gcc_assert (n_args == 2);
19294 20792 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19295 : {
19296 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19297 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19298 46 : unsigned HOST_WIDE_INT res = 0;
19299 46 : unsigned HOST_WIDE_INT m, k = 1;
19300 2990 : for (m = 1; m; m <<= 1)
19301 2944 : if ((mask & m) != 0)
19302 : {
19303 1440 : if ((src & k) != 0)
19304 789 : res |= m;
19305 1440 : k <<= 1;
19306 : }
19307 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19308 : }
19309 : break;
19310 :
19311 20794 : case IX86_BUILTIN_PEXT32:
19312 20794 : case IX86_BUILTIN_PEXT64:
19313 20794 : gcc_assert (n_args == 2);
19314 20794 : if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19315 : {
19316 46 : unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19317 46 : unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19318 46 : unsigned HOST_WIDE_INT res = 0;
19319 46 : unsigned HOST_WIDE_INT m, k = 1;
19320 2990 : for (m = 1; m; m <<= 1)
19321 2944 : if ((mask & m) != 0)
19322 : {
19323 2016 : if ((src & m) != 0)
19324 1063 : res |= k;
19325 2016 : k <<= 1;
19326 : }
19327 46 : return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19328 : }
19329 : break;
19330 :
19331 100841 : case IX86_BUILTIN_MOVMSKPS:
19332 100841 : case IX86_BUILTIN_PMOVMSKB:
19333 100841 : case IX86_BUILTIN_MOVMSKPD:
19334 100841 : case IX86_BUILTIN_PMOVMSKB128:
19335 100841 : case IX86_BUILTIN_MOVMSKPD256:
19336 100841 : case IX86_BUILTIN_MOVMSKPS256:
19337 100841 : case IX86_BUILTIN_PMOVMSKB256:
19338 100841 : gcc_assert (n_args == 1);
19339 100841 : if (TREE_CODE (args[0]) == VECTOR_CST)
19340 : {
19341 : HOST_WIDE_INT res = 0;
19342 1492 : for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
19343 : {
19344 1242 : tree e = VECTOR_CST_ELT (args[0], i);
19345 1242 : if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
19346 : {
19347 624 : if (wi::neg_p (wi::to_wide (e)))
19348 575 : res |= HOST_WIDE_INT_1 << i;
19349 : }
19350 618 : else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
19351 : {
19352 618 : if (TREE_REAL_CST (e).sign)
19353 517 : res |= HOST_WIDE_INT_1 << i;
19354 : }
19355 : else
19356 : return NULL_TREE;
19357 : }
19358 250 : return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
19359 : }
19360 : break;
19361 :
19362 659648 : case IX86_BUILTIN_PSLLD:
19363 659648 : case IX86_BUILTIN_PSLLD128:
19364 659648 : case IX86_BUILTIN_PSLLD128_MASK:
19365 659648 : case IX86_BUILTIN_PSLLD256:
19366 659648 : case IX86_BUILTIN_PSLLD256_MASK:
19367 659648 : case IX86_BUILTIN_PSLLD512:
19368 659648 : case IX86_BUILTIN_PSLLDI:
19369 659648 : case IX86_BUILTIN_PSLLDI128:
19370 659648 : case IX86_BUILTIN_PSLLDI128_MASK:
19371 659648 : case IX86_BUILTIN_PSLLDI256:
19372 659648 : case IX86_BUILTIN_PSLLDI256_MASK:
19373 659648 : case IX86_BUILTIN_PSLLDI512:
19374 659648 : case IX86_BUILTIN_PSLLQ:
19375 659648 : case IX86_BUILTIN_PSLLQ128:
19376 659648 : case IX86_BUILTIN_PSLLQ128_MASK:
19377 659648 : case IX86_BUILTIN_PSLLQ256:
19378 659648 : case IX86_BUILTIN_PSLLQ256_MASK:
19379 659648 : case IX86_BUILTIN_PSLLQ512:
19380 659648 : case IX86_BUILTIN_PSLLQI:
19381 659648 : case IX86_BUILTIN_PSLLQI128:
19382 659648 : case IX86_BUILTIN_PSLLQI128_MASK:
19383 659648 : case IX86_BUILTIN_PSLLQI256:
19384 659648 : case IX86_BUILTIN_PSLLQI256_MASK:
19385 659648 : case IX86_BUILTIN_PSLLQI512:
19386 659648 : case IX86_BUILTIN_PSLLW:
19387 659648 : case IX86_BUILTIN_PSLLW128:
19388 659648 : case IX86_BUILTIN_PSLLW128_MASK:
19389 659648 : case IX86_BUILTIN_PSLLW256:
19390 659648 : case IX86_BUILTIN_PSLLW256_MASK:
19391 659648 : case IX86_BUILTIN_PSLLW512_MASK:
19392 659648 : case IX86_BUILTIN_PSLLWI:
19393 659648 : case IX86_BUILTIN_PSLLWI128:
19394 659648 : case IX86_BUILTIN_PSLLWI128_MASK:
19395 659648 : case IX86_BUILTIN_PSLLWI256:
19396 659648 : case IX86_BUILTIN_PSLLWI256_MASK:
19397 659648 : case IX86_BUILTIN_PSLLWI512_MASK:
19398 659648 : rcode = ASHIFT;
19399 659648 : is_vshift = false;
19400 659648 : goto do_shift;
19401 601251 : case IX86_BUILTIN_PSRAD:
19402 601251 : case IX86_BUILTIN_PSRAD128:
19403 601251 : case IX86_BUILTIN_PSRAD128_MASK:
19404 601251 : case IX86_BUILTIN_PSRAD256:
19405 601251 : case IX86_BUILTIN_PSRAD256_MASK:
19406 601251 : case IX86_BUILTIN_PSRAD512:
19407 601251 : case IX86_BUILTIN_PSRADI:
19408 601251 : case IX86_BUILTIN_PSRADI128:
19409 601251 : case IX86_BUILTIN_PSRADI128_MASK:
19410 601251 : case IX86_BUILTIN_PSRADI256:
19411 601251 : case IX86_BUILTIN_PSRADI256_MASK:
19412 601251 : case IX86_BUILTIN_PSRADI512:
19413 601251 : case IX86_BUILTIN_PSRAQ128_MASK:
19414 601251 : case IX86_BUILTIN_PSRAQ256_MASK:
19415 601251 : case IX86_BUILTIN_PSRAQ512:
19416 601251 : case IX86_BUILTIN_PSRAQI128_MASK:
19417 601251 : case IX86_BUILTIN_PSRAQI256_MASK:
19418 601251 : case IX86_BUILTIN_PSRAQI512:
19419 601251 : case IX86_BUILTIN_PSRAW:
19420 601251 : case IX86_BUILTIN_PSRAW128:
19421 601251 : case IX86_BUILTIN_PSRAW128_MASK:
19422 601251 : case IX86_BUILTIN_PSRAW256:
19423 601251 : case IX86_BUILTIN_PSRAW256_MASK:
19424 601251 : case IX86_BUILTIN_PSRAW512:
19425 601251 : case IX86_BUILTIN_PSRAWI:
19426 601251 : case IX86_BUILTIN_PSRAWI128:
19427 601251 : case IX86_BUILTIN_PSRAWI128_MASK:
19428 601251 : case IX86_BUILTIN_PSRAWI256:
19429 601251 : case IX86_BUILTIN_PSRAWI256_MASK:
19430 601251 : case IX86_BUILTIN_PSRAWI512:
19431 601251 : rcode = ASHIFTRT;
19432 601251 : is_vshift = false;
19433 601251 : goto do_shift;
19434 633525 : case IX86_BUILTIN_PSRLD:
19435 633525 : case IX86_BUILTIN_PSRLD128:
19436 633525 : case IX86_BUILTIN_PSRLD128_MASK:
19437 633525 : case IX86_BUILTIN_PSRLD256:
19438 633525 : case IX86_BUILTIN_PSRLD256_MASK:
19439 633525 : case IX86_BUILTIN_PSRLD512:
19440 633525 : case IX86_BUILTIN_PSRLDI:
19441 633525 : case IX86_BUILTIN_PSRLDI128:
19442 633525 : case IX86_BUILTIN_PSRLDI128_MASK:
19443 633525 : case IX86_BUILTIN_PSRLDI256:
19444 633525 : case IX86_BUILTIN_PSRLDI256_MASK:
19445 633525 : case IX86_BUILTIN_PSRLDI512:
19446 633525 : case IX86_BUILTIN_PSRLQ:
19447 633525 : case IX86_BUILTIN_PSRLQ128:
19448 633525 : case IX86_BUILTIN_PSRLQ128_MASK:
19449 633525 : case IX86_BUILTIN_PSRLQ256:
19450 633525 : case IX86_BUILTIN_PSRLQ256_MASK:
19451 633525 : case IX86_BUILTIN_PSRLQ512:
19452 633525 : case IX86_BUILTIN_PSRLQI:
19453 633525 : case IX86_BUILTIN_PSRLQI128:
19454 633525 : case IX86_BUILTIN_PSRLQI128_MASK:
19455 633525 : case IX86_BUILTIN_PSRLQI256:
19456 633525 : case IX86_BUILTIN_PSRLQI256_MASK:
19457 633525 : case IX86_BUILTIN_PSRLQI512:
19458 633525 : case IX86_BUILTIN_PSRLW:
19459 633525 : case IX86_BUILTIN_PSRLW128:
19460 633525 : case IX86_BUILTIN_PSRLW128_MASK:
19461 633525 : case IX86_BUILTIN_PSRLW256:
19462 633525 : case IX86_BUILTIN_PSRLW256_MASK:
19463 633525 : case IX86_BUILTIN_PSRLW512:
19464 633525 : case IX86_BUILTIN_PSRLWI:
19465 633525 : case IX86_BUILTIN_PSRLWI128:
19466 633525 : case IX86_BUILTIN_PSRLWI128_MASK:
19467 633525 : case IX86_BUILTIN_PSRLWI256:
19468 633525 : case IX86_BUILTIN_PSRLWI256_MASK:
19469 633525 : case IX86_BUILTIN_PSRLWI512:
19470 633525 : rcode = LSHIFTRT;
19471 633525 : is_vshift = false;
19472 633525 : goto do_shift;
19473 276009 : case IX86_BUILTIN_PSLLVV16HI:
19474 276009 : case IX86_BUILTIN_PSLLVV16SI:
19475 276009 : case IX86_BUILTIN_PSLLVV2DI:
19476 276009 : case IX86_BUILTIN_PSLLVV2DI_MASK:
19477 276009 : case IX86_BUILTIN_PSLLVV32HI:
19478 276009 : case IX86_BUILTIN_PSLLVV4DI:
19479 276009 : case IX86_BUILTIN_PSLLVV4DI_MASK:
19480 276009 : case IX86_BUILTIN_PSLLVV4SI:
19481 276009 : case IX86_BUILTIN_PSLLVV4SI_MASK:
19482 276009 : case IX86_BUILTIN_PSLLVV8DI:
19483 276009 : case IX86_BUILTIN_PSLLVV8HI:
19484 276009 : case IX86_BUILTIN_PSLLVV8SI:
19485 276009 : case IX86_BUILTIN_PSLLVV8SI_MASK:
19486 276009 : rcode = ASHIFT;
19487 276009 : is_vshift = true;
19488 276009 : goto do_shift;
19489 275588 : case IX86_BUILTIN_PSRAVQ128:
19490 275588 : case IX86_BUILTIN_PSRAVQ256:
19491 275588 : case IX86_BUILTIN_PSRAVV16HI:
19492 275588 : case IX86_BUILTIN_PSRAVV16SI:
19493 275588 : case IX86_BUILTIN_PSRAVV32HI:
19494 275588 : case IX86_BUILTIN_PSRAVV4SI:
19495 275588 : case IX86_BUILTIN_PSRAVV4SI_MASK:
19496 275588 : case IX86_BUILTIN_PSRAVV8DI:
19497 275588 : case IX86_BUILTIN_PSRAVV8HI:
19498 275588 : case IX86_BUILTIN_PSRAVV8SI:
19499 275588 : case IX86_BUILTIN_PSRAVV8SI_MASK:
19500 275588 : rcode = ASHIFTRT;
19501 275588 : is_vshift = true;
19502 275588 : goto do_shift;
19503 276000 : case IX86_BUILTIN_PSRLVV16HI:
19504 276000 : case IX86_BUILTIN_PSRLVV16SI:
19505 276000 : case IX86_BUILTIN_PSRLVV2DI:
19506 276000 : case IX86_BUILTIN_PSRLVV2DI_MASK:
19507 276000 : case IX86_BUILTIN_PSRLVV32HI:
19508 276000 : case IX86_BUILTIN_PSRLVV4DI:
19509 276000 : case IX86_BUILTIN_PSRLVV4DI_MASK:
19510 276000 : case IX86_BUILTIN_PSRLVV4SI:
19511 276000 : case IX86_BUILTIN_PSRLVV4SI_MASK:
19512 276000 : case IX86_BUILTIN_PSRLVV8DI:
19513 276000 : case IX86_BUILTIN_PSRLVV8HI:
19514 276000 : case IX86_BUILTIN_PSRLVV8SI:
19515 276000 : case IX86_BUILTIN_PSRLVV8SI_MASK:
19516 276000 : rcode = LSHIFTRT;
19517 276000 : is_vshift = true;
19518 276000 : goto do_shift;
19519 :
19520 2722021 : do_shift:
19521 2722021 : gcc_assert (n_args >= 2);
19522 2722021 : if (TREE_CODE (args[0]) != VECTOR_CST)
19523 : break;
19524 927 : mask = HOST_WIDE_INT_M1U;
19525 927 : if (n_args > 2)
19526 : {
19527 : /* This is masked shift. */
19528 678 : if (!tree_fits_uhwi_p (args[n_args - 1])
19529 678 : || TREE_SIDE_EFFECTS (args[n_args - 2]))
19530 : break;
19531 678 : mask = tree_to_uhwi (args[n_args - 1]);
19532 678 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19533 678 : mask |= HOST_WIDE_INT_M1U << elems;
19534 678 : if (mask != HOST_WIDE_INT_M1U
19535 567 : && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19536 : break;
19537 633 : if (mask == (HOST_WIDE_INT_M1U << elems))
19538 : return args[n_args - 2];
19539 : }
19540 879 : if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19541 : break;
19542 879 : if (tree tem = (is_vshift ? integer_one_node
19543 879 : : ix86_vector_shift_count (args[1])))
19544 : {
19545 558 : unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19546 558 : unsigned HOST_WIDE_INT prec
19547 558 : = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19548 558 : if (count == 0 && mask == HOST_WIDE_INT_M1U)
19549 : return args[0];
19550 558 : if (count >= prec)
19551 : {
19552 72 : if (rcode == ASHIFTRT)
19553 27 : count = prec - 1;
19554 45 : else if (mask == HOST_WIDE_INT_M1U)
19555 3 : return build_zero_cst (TREE_TYPE (args[0]));
19556 : }
19557 555 : tree countt = NULL_TREE;
19558 555 : if (!is_vshift)
19559 : {
19560 377 : if (count >= prec)
19561 42 : countt = integer_zero_node;
19562 : else
19563 335 : countt = build_int_cst (integer_type_node, count);
19564 : }
19565 555 : tree_vector_builder builder;
19566 555 : if (mask != HOST_WIDE_INT_M1U || is_vshift)
19567 392 : builder.new_vector (TREE_TYPE (args[0]),
19568 784 : TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19569 : 1);
19570 : else
19571 163 : builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
19572 : false);
19573 555 : unsigned int cnt = builder.encoded_nelts ();
19574 5967 : for (unsigned int i = 0; i < cnt; ++i)
19575 : {
19576 5412 : tree elt = VECTOR_CST_ELT (args[0], i);
19577 5412 : if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19578 0 : return NULL_TREE;
19579 5412 : tree type = TREE_TYPE (elt);
19580 5412 : if (rcode == LSHIFTRT)
19581 2040 : elt = fold_convert (unsigned_type_for (type), elt);
19582 5412 : if (is_vshift)
19583 : {
19584 1846 : countt = VECTOR_CST_ELT (args[1], i);
19585 1846 : if (TREE_CODE (countt) != INTEGER_CST
19586 1846 : || TREE_OVERFLOW (countt))
19587 : return NULL_TREE;
19588 1846 : if (wi::neg_p (wi::to_wide (countt))
19589 3610 : || wi::to_widest (countt) >= prec)
19590 : {
19591 325 : if (rcode == ASHIFTRT)
19592 108 : countt = build_int_cst (TREE_TYPE (countt),
19593 108 : prec - 1);
19594 : else
19595 : {
19596 217 : elt = build_zero_cst (TREE_TYPE (elt));
19597 217 : countt = build_zero_cst (TREE_TYPE (countt));
19598 : }
19599 : }
19600 : }
19601 3566 : else if (count >= prec)
19602 504 : elt = build_zero_cst (TREE_TYPE (elt));
19603 8950 : elt = const_binop (rcode == ASHIFT
19604 : ? LSHIFT_EXPR : RSHIFT_EXPR,
19605 5412 : TREE_TYPE (elt), elt, countt);
19606 5412 : if (!elt || TREE_CODE (elt) != INTEGER_CST)
19607 : return NULL_TREE;
19608 5412 : if (rcode == LSHIFTRT)
19609 2040 : elt = fold_convert (type, elt);
19610 5412 : if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19611 : {
19612 1566 : elt = VECTOR_CST_ELT (args[n_args - 2], i);
19613 1566 : if (TREE_CODE (elt) != INTEGER_CST
19614 1566 : || TREE_OVERFLOW (elt))
19615 : return NULL_TREE;
19616 : }
19617 5412 : builder.quick_push (elt);
19618 : }
19619 555 : return builder.build ();
19620 555 : }
19621 : break;
19622 :
19623 32718 : case IX86_BUILTIN_MINSS:
19624 32718 : case IX86_BUILTIN_MINSH_MASK:
19625 32718 : tcode = LT_EXPR;
19626 32718 : is_scalar = true;
19627 32718 : goto do_minmax;
19628 :
19629 32718 : case IX86_BUILTIN_MAXSS:
19630 32718 : case IX86_BUILTIN_MAXSH_MASK:
19631 32718 : tcode = GT_EXPR;
19632 32718 : is_scalar = true;
19633 32718 : goto do_minmax;
19634 :
19635 350576 : case IX86_BUILTIN_MINPS:
19636 350576 : case IX86_BUILTIN_MINPD:
19637 350576 : case IX86_BUILTIN_MINPS256:
19638 350576 : case IX86_BUILTIN_MINPD256:
19639 350576 : case IX86_BUILTIN_MINPS512:
19640 350576 : case IX86_BUILTIN_MINPD512:
19641 350576 : case IX86_BUILTIN_MINPS128_MASK:
19642 350576 : case IX86_BUILTIN_MINPD128_MASK:
19643 350576 : case IX86_BUILTIN_MINPS256_MASK:
19644 350576 : case IX86_BUILTIN_MINPD256_MASK:
19645 350576 : case IX86_BUILTIN_MINPH128_MASK:
19646 350576 : case IX86_BUILTIN_MINPH256_MASK:
19647 350576 : case IX86_BUILTIN_MINPH512_MASK:
19648 350576 : tcode = LT_EXPR;
19649 350576 : is_scalar = false;
19650 350576 : goto do_minmax;
19651 :
19652 : case IX86_BUILTIN_MAXPS:
19653 : case IX86_BUILTIN_MAXPD:
19654 : case IX86_BUILTIN_MAXPS256:
19655 : case IX86_BUILTIN_MAXPD256:
19656 : case IX86_BUILTIN_MAXPS512:
19657 : case IX86_BUILTIN_MAXPD512:
19658 : case IX86_BUILTIN_MAXPS128_MASK:
19659 : case IX86_BUILTIN_MAXPD128_MASK:
19660 : case IX86_BUILTIN_MAXPS256_MASK:
19661 : case IX86_BUILTIN_MAXPD256_MASK:
19662 : case IX86_BUILTIN_MAXPH128_MASK:
19663 : case IX86_BUILTIN_MAXPH256_MASK:
19664 : case IX86_BUILTIN_MAXPH512_MASK:
19665 : tcode = GT_EXPR;
19666 : is_scalar = false;
19667 766608 : do_minmax:
19668 766608 : gcc_assert (n_args >= 2);
19669 766608 : if (TREE_CODE (args[0]) != VECTOR_CST
19670 76 : || TREE_CODE (args[1]) != VECTOR_CST)
19671 : break;
19672 76 : mask = HOST_WIDE_INT_M1U;
19673 76 : if (n_args > 2)
19674 : {
19675 36 : gcc_assert (n_args >= 4);
19676 : /* This is masked minmax. */
19677 36 : if (TREE_CODE (args[3]) != INTEGER_CST
19678 36 : || TREE_SIDE_EFFECTS (args[2]))
19679 : break;
19680 36 : mask = TREE_INT_CST_LOW (args[3]);
19681 36 : unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19682 36 : mask |= HOST_WIDE_INT_M1U << elems;
19683 36 : if (mask != HOST_WIDE_INT_M1U
19684 32 : && TREE_CODE (args[2]) != VECTOR_CST)
19685 : break;
19686 36 : if (n_args >= 5)
19687 : {
19688 20 : if (!tree_fits_uhwi_p (args[4]))
19689 : break;
19690 20 : if (tree_to_uhwi (args[4]) != 4
19691 0 : && tree_to_uhwi (args[4]) != 8)
19692 : break;
19693 : }
19694 36 : if (mask == (HOST_WIDE_INT_M1U << elems))
19695 : return args[2];
19696 : }
19697 : /* Punt on NaNs, unless exceptions are disabled. */
19698 76 : if (HONOR_NANS (args[0])
19699 76 : && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19700 184 : for (int i = 0; i < 2; ++i)
19701 : {
19702 134 : unsigned count = vector_cst_encoded_nelts (args[i]);
19703 957 : for (unsigned j = 0; j < count; ++j)
19704 849 : if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19705 : return NULL_TREE;
19706 : }
19707 50 : {
19708 50 : tree res = const_binop (tcode,
19709 50 : truth_type_for (TREE_TYPE (args[0])),
19710 : args[0], args[1]);
19711 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19712 : break;
19713 50 : res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19714 : args[0], args[1]);
19715 50 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19716 : break;
19717 50 : if (mask != HOST_WIDE_INT_M1U)
19718 : {
19719 32 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19720 32 : vec_perm_builder sel (nelts, nelts, 1);
19721 328 : for (unsigned int i = 0; i < nelts; i++)
19722 296 : if (mask & (HOST_WIDE_INT_1U << i))
19723 160 : sel.quick_push (i);
19724 : else
19725 136 : sel.quick_push (nelts + i);
19726 32 : vec_perm_indices indices (sel, 2, nelts);
19727 32 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19728 : indices);
19729 32 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19730 : break;
19731 32 : }
19732 50 : if (is_scalar)
19733 : {
19734 10 : unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19735 10 : vec_perm_builder sel (nelts, nelts, 1);
19736 10 : sel.quick_push (0);
19737 40 : for (unsigned int i = 1; i < nelts; i++)
19738 30 : sel.quick_push (nelts + i);
19739 10 : vec_perm_indices indices (sel, 2, nelts);
19740 10 : res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19741 : indices);
19742 10 : if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19743 : break;
19744 10 : }
19745 50 : return res;
19746 : }
19747 :
19748 : default:
19749 : break;
19750 : }
19751 : }
19752 :
19753 : #ifdef SUBTARGET_FOLD_BUILTIN
19754 : return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19755 : #endif
19756 :
19757 : return NULL_TREE;
19758 : }
19759 :
19760 : /* Fold a MD builtin (use ix86_fold_builtin for folding into
19761 : constant) in GIMPLE. */
19762 :
19763 : bool
19764 1121511 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19765 : {
19766 1121511 : gimple *stmt = gsi_stmt (*gsi), *g;
19767 1121511 : gimple_seq stmts = NULL;
19768 1121511 : tree fndecl = gimple_call_fndecl (stmt);
19769 1121511 : gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19770 1121511 : int n_args = gimple_call_num_args (stmt);
19771 1121511 : enum ix86_builtins fn_code
19772 1121511 : = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19773 1121511 : tree decl = NULL_TREE;
19774 1121511 : tree arg0, arg1, arg2;
19775 1121511 : enum rtx_code rcode;
19776 1121511 : enum tree_code tcode;
19777 1121511 : unsigned HOST_WIDE_INT count;
19778 1121511 : bool is_vshift;
19779 1121511 : unsigned HOST_WIDE_INT elems;
19780 1121511 : location_t loc;
19781 :
19782 : /* Don't fold when there's isa mismatch. */
19783 1121511 : if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19784 : return false;
19785 :
19786 1121384 : switch (fn_code)
19787 : {
19788 288 : case IX86_BUILTIN_TZCNT32:
19789 288 : decl = builtin_decl_implicit (BUILT_IN_CTZ);
19790 288 : goto fold_tzcnt_lzcnt;
19791 :
19792 237 : case IX86_BUILTIN_TZCNT64:
19793 237 : decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19794 237 : goto fold_tzcnt_lzcnt;
19795 :
19796 215 : case IX86_BUILTIN_LZCNT32:
19797 215 : decl = builtin_decl_implicit (BUILT_IN_CLZ);
19798 215 : goto fold_tzcnt_lzcnt;
19799 :
19800 224 : case IX86_BUILTIN_LZCNT64:
19801 224 : decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19802 224 : goto fold_tzcnt_lzcnt;
19803 :
19804 964 : fold_tzcnt_lzcnt:
19805 964 : gcc_assert (n_args == 1);
19806 964 : arg0 = gimple_call_arg (stmt, 0);
19807 964 : if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19808 : {
19809 799 : int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19810 : /* If arg0 is provably non-zero, optimize into generic
19811 : __builtin_c[tl]z{,ll} function the middle-end handles
19812 : better. */
19813 799 : if (!expr_not_equal_to (arg0, wi::zero (prec)))
19814 : return false;
19815 :
19816 9 : loc = gimple_location (stmt);
19817 9 : g = gimple_build_call (decl, 1, arg0);
19818 9 : gimple_set_location (g, loc);
19819 9 : tree lhs = make_ssa_name (integer_type_node);
19820 9 : gimple_call_set_lhs (g, lhs);
19821 9 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
19822 9 : g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19823 9 : gimple_set_location (g, loc);
19824 9 : gsi_replace (gsi, g, false);
19825 9 : return true;
19826 : }
19827 : break;
19828 :
19829 491 : case IX86_BUILTIN_BZHI32:
19830 491 : case IX86_BUILTIN_BZHI64:
19831 491 : gcc_assert (n_args == 2);
19832 491 : arg1 = gimple_call_arg (stmt, 1);
19833 491 : if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19834 : {
19835 195 : unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19836 195 : arg0 = gimple_call_arg (stmt, 0);
19837 195 : if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19838 : break;
19839 31 : loc = gimple_location (stmt);
19840 31 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19841 31 : gimple_set_location (g, loc);
19842 31 : gsi_replace (gsi, g, false);
19843 31 : return true;
19844 : }
19845 : break;
19846 :
19847 502 : case IX86_BUILTIN_PDEP32:
19848 502 : case IX86_BUILTIN_PDEP64:
19849 502 : case IX86_BUILTIN_PEXT32:
19850 502 : case IX86_BUILTIN_PEXT64:
19851 502 : gcc_assert (n_args == 2);
19852 502 : arg1 = gimple_call_arg (stmt, 1);
19853 502 : if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19854 : {
19855 4 : loc = gimple_location (stmt);
19856 4 : arg0 = gimple_call_arg (stmt, 0);
19857 4 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19858 4 : gimple_set_location (g, loc);
19859 4 : gsi_replace (gsi, g, false);
19860 4 : return true;
19861 : }
19862 : break;
19863 :
19864 145 : case IX86_BUILTIN_PBLENDVB256:
19865 145 : case IX86_BUILTIN_BLENDVPS256:
19866 145 : case IX86_BUILTIN_BLENDVPD256:
19867 : /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19868 : to scalar operations and not combined back. */
19869 145 : if (!TARGET_AVX2)
19870 : break;
19871 :
19872 : /* FALLTHRU. */
19873 112 : case IX86_BUILTIN_BLENDVPD:
19874 : /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19875 : w/o sse4.2, it's veclowered to scalar operations and
19876 : not combined back. */
19877 112 : if (!TARGET_SSE4_2)
19878 : break;
19879 : /* FALLTHRU. */
19880 166 : case IX86_BUILTIN_PBLENDVB128:
19881 166 : case IX86_BUILTIN_BLENDVPS:
19882 166 : gcc_assert (n_args == 3);
19883 166 : arg0 = gimple_call_arg (stmt, 0);
19884 166 : arg1 = gimple_call_arg (stmt, 1);
19885 166 : arg2 = gimple_call_arg (stmt, 2);
19886 166 : if (gimple_call_lhs (stmt))
19887 : {
19888 166 : loc = gimple_location (stmt);
19889 166 : tree type = TREE_TYPE (arg2);
19890 166 : if (VECTOR_FLOAT_TYPE_P (type))
19891 : {
19892 73 : tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19893 73 : ? intSI_type_node : intDI_type_node;
19894 73 : type = get_same_sized_vectype (itype, type);
19895 : }
19896 : else
19897 93 : type = signed_type_for (type);
19898 166 : arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19899 166 : tree zero_vec = build_zero_cst (type);
19900 166 : tree cmp_type = truth_type_for (type);
19901 166 : tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19902 166 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19903 166 : g = gimple_build_assign (gimple_call_lhs (stmt),
19904 : VEC_COND_EXPR, cmp,
19905 : arg1, arg0);
19906 166 : gimple_set_location (g, loc);
19907 166 : gsi_replace (gsi, g, false);
19908 : }
19909 : else
19910 0 : gsi_replace (gsi, gimple_build_nop (), false);
19911 : return true;
19912 :
19913 :
19914 16 : case IX86_BUILTIN_PCMPEQB128:
19915 16 : case IX86_BUILTIN_PCMPEQW128:
19916 16 : case IX86_BUILTIN_PCMPEQD128:
19917 16 : case IX86_BUILTIN_PCMPEQQ:
19918 16 : case IX86_BUILTIN_PCMPEQB256:
19919 16 : case IX86_BUILTIN_PCMPEQW256:
19920 16 : case IX86_BUILTIN_PCMPEQD256:
19921 16 : case IX86_BUILTIN_PCMPEQQ256:
19922 16 : tcode = EQ_EXPR;
19923 16 : goto do_cmp;
19924 :
19925 : case IX86_BUILTIN_PCMPGTB128:
19926 : case IX86_BUILTIN_PCMPGTW128:
19927 : case IX86_BUILTIN_PCMPGTD128:
19928 : case IX86_BUILTIN_PCMPGTQ:
19929 : case IX86_BUILTIN_PCMPGTB256:
19930 : case IX86_BUILTIN_PCMPGTW256:
19931 : case IX86_BUILTIN_PCMPGTD256:
19932 : case IX86_BUILTIN_PCMPGTQ256:
19933 : tcode = GT_EXPR;
19934 :
19935 33 : do_cmp:
19936 33 : gcc_assert (n_args == 2);
19937 33 : arg0 = gimple_call_arg (stmt, 0);
19938 33 : arg1 = gimple_call_arg (stmt, 1);
19939 33 : if (gimple_call_lhs (stmt))
19940 : {
19941 32 : loc = gimple_location (stmt);
19942 32 : tree type = TREE_TYPE (arg0);
19943 32 : tree zero_vec = build_zero_cst (type);
19944 32 : tree minus_one_vec = build_minus_one_cst (type);
19945 32 : tree cmp_type = truth_type_for (type);
19946 32 : tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19947 32 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19948 32 : g = gimple_build_assign (gimple_call_lhs (stmt),
19949 : VEC_COND_EXPR, cmp,
19950 : minus_one_vec, zero_vec);
19951 32 : gimple_set_location (g, loc);
19952 32 : gsi_replace (gsi, g, false);
19953 : }
19954 : else
19955 1 : gsi_replace (gsi, gimple_build_nop (), false);
19956 : return true;
19957 :
19958 9297 : case IX86_BUILTIN_PSLLD:
19959 9297 : case IX86_BUILTIN_PSLLD128:
19960 9297 : case IX86_BUILTIN_PSLLD128_MASK:
19961 9297 : case IX86_BUILTIN_PSLLD256:
19962 9297 : case IX86_BUILTIN_PSLLD256_MASK:
19963 9297 : case IX86_BUILTIN_PSLLD512:
19964 9297 : case IX86_BUILTIN_PSLLDI:
19965 9297 : case IX86_BUILTIN_PSLLDI128:
19966 9297 : case IX86_BUILTIN_PSLLDI128_MASK:
19967 9297 : case IX86_BUILTIN_PSLLDI256:
19968 9297 : case IX86_BUILTIN_PSLLDI256_MASK:
19969 9297 : case IX86_BUILTIN_PSLLDI512:
19970 9297 : case IX86_BUILTIN_PSLLQ:
19971 9297 : case IX86_BUILTIN_PSLLQ128:
19972 9297 : case IX86_BUILTIN_PSLLQ128_MASK:
19973 9297 : case IX86_BUILTIN_PSLLQ256:
19974 9297 : case IX86_BUILTIN_PSLLQ256_MASK:
19975 9297 : case IX86_BUILTIN_PSLLQ512:
19976 9297 : case IX86_BUILTIN_PSLLQI:
19977 9297 : case IX86_BUILTIN_PSLLQI128:
19978 9297 : case IX86_BUILTIN_PSLLQI128_MASK:
19979 9297 : case IX86_BUILTIN_PSLLQI256:
19980 9297 : case IX86_BUILTIN_PSLLQI256_MASK:
19981 9297 : case IX86_BUILTIN_PSLLQI512:
19982 9297 : case IX86_BUILTIN_PSLLW:
19983 9297 : case IX86_BUILTIN_PSLLW128:
19984 9297 : case IX86_BUILTIN_PSLLW128_MASK:
19985 9297 : case IX86_BUILTIN_PSLLW256:
19986 9297 : case IX86_BUILTIN_PSLLW256_MASK:
19987 9297 : case IX86_BUILTIN_PSLLW512_MASK:
19988 9297 : case IX86_BUILTIN_PSLLWI:
19989 9297 : case IX86_BUILTIN_PSLLWI128:
19990 9297 : case IX86_BUILTIN_PSLLWI128_MASK:
19991 9297 : case IX86_BUILTIN_PSLLWI256:
19992 9297 : case IX86_BUILTIN_PSLLWI256_MASK:
19993 9297 : case IX86_BUILTIN_PSLLWI512_MASK:
19994 9297 : rcode = ASHIFT;
19995 9297 : is_vshift = false;
19996 9297 : goto do_shift;
19997 6495 : case IX86_BUILTIN_PSRAD:
19998 6495 : case IX86_BUILTIN_PSRAD128:
19999 6495 : case IX86_BUILTIN_PSRAD128_MASK:
20000 6495 : case IX86_BUILTIN_PSRAD256:
20001 6495 : case IX86_BUILTIN_PSRAD256_MASK:
20002 6495 : case IX86_BUILTIN_PSRAD512:
20003 6495 : case IX86_BUILTIN_PSRADI:
20004 6495 : case IX86_BUILTIN_PSRADI128:
20005 6495 : case IX86_BUILTIN_PSRADI128_MASK:
20006 6495 : case IX86_BUILTIN_PSRADI256:
20007 6495 : case IX86_BUILTIN_PSRADI256_MASK:
20008 6495 : case IX86_BUILTIN_PSRADI512:
20009 6495 : case IX86_BUILTIN_PSRAQ128_MASK:
20010 6495 : case IX86_BUILTIN_PSRAQ256_MASK:
20011 6495 : case IX86_BUILTIN_PSRAQ512:
20012 6495 : case IX86_BUILTIN_PSRAQI128_MASK:
20013 6495 : case IX86_BUILTIN_PSRAQI256_MASK:
20014 6495 : case IX86_BUILTIN_PSRAQI512:
20015 6495 : case IX86_BUILTIN_PSRAW:
20016 6495 : case IX86_BUILTIN_PSRAW128:
20017 6495 : case IX86_BUILTIN_PSRAW128_MASK:
20018 6495 : case IX86_BUILTIN_PSRAW256:
20019 6495 : case IX86_BUILTIN_PSRAW256_MASK:
20020 6495 : case IX86_BUILTIN_PSRAW512:
20021 6495 : case IX86_BUILTIN_PSRAWI:
20022 6495 : case IX86_BUILTIN_PSRAWI128:
20023 6495 : case IX86_BUILTIN_PSRAWI128_MASK:
20024 6495 : case IX86_BUILTIN_PSRAWI256:
20025 6495 : case IX86_BUILTIN_PSRAWI256_MASK:
20026 6495 : case IX86_BUILTIN_PSRAWI512:
20027 6495 : rcode = ASHIFTRT;
20028 6495 : is_vshift = false;
20029 6495 : goto do_shift;
20030 7960 : case IX86_BUILTIN_PSRLD:
20031 7960 : case IX86_BUILTIN_PSRLD128:
20032 7960 : case IX86_BUILTIN_PSRLD128_MASK:
20033 7960 : case IX86_BUILTIN_PSRLD256:
20034 7960 : case IX86_BUILTIN_PSRLD256_MASK:
20035 7960 : case IX86_BUILTIN_PSRLD512:
20036 7960 : case IX86_BUILTIN_PSRLDI:
20037 7960 : case IX86_BUILTIN_PSRLDI128:
20038 7960 : case IX86_BUILTIN_PSRLDI128_MASK:
20039 7960 : case IX86_BUILTIN_PSRLDI256:
20040 7960 : case IX86_BUILTIN_PSRLDI256_MASK:
20041 7960 : case IX86_BUILTIN_PSRLDI512:
20042 7960 : case IX86_BUILTIN_PSRLQ:
20043 7960 : case IX86_BUILTIN_PSRLQ128:
20044 7960 : case IX86_BUILTIN_PSRLQ128_MASK:
20045 7960 : case IX86_BUILTIN_PSRLQ256:
20046 7960 : case IX86_BUILTIN_PSRLQ256_MASK:
20047 7960 : case IX86_BUILTIN_PSRLQ512:
20048 7960 : case IX86_BUILTIN_PSRLQI:
20049 7960 : case IX86_BUILTIN_PSRLQI128:
20050 7960 : case IX86_BUILTIN_PSRLQI128_MASK:
20051 7960 : case IX86_BUILTIN_PSRLQI256:
20052 7960 : case IX86_BUILTIN_PSRLQI256_MASK:
20053 7960 : case IX86_BUILTIN_PSRLQI512:
20054 7960 : case IX86_BUILTIN_PSRLW:
20055 7960 : case IX86_BUILTIN_PSRLW128:
20056 7960 : case IX86_BUILTIN_PSRLW128_MASK:
20057 7960 : case IX86_BUILTIN_PSRLW256:
20058 7960 : case IX86_BUILTIN_PSRLW256_MASK:
20059 7960 : case IX86_BUILTIN_PSRLW512:
20060 7960 : case IX86_BUILTIN_PSRLWI:
20061 7960 : case IX86_BUILTIN_PSRLWI128:
20062 7960 : case IX86_BUILTIN_PSRLWI128_MASK:
20063 7960 : case IX86_BUILTIN_PSRLWI256:
20064 7960 : case IX86_BUILTIN_PSRLWI256_MASK:
20065 7960 : case IX86_BUILTIN_PSRLWI512:
20066 7960 : rcode = LSHIFTRT;
20067 7960 : is_vshift = false;
20068 7960 : goto do_shift;
20069 2384 : case IX86_BUILTIN_PSLLVV16HI:
20070 2384 : case IX86_BUILTIN_PSLLVV16SI:
20071 2384 : case IX86_BUILTIN_PSLLVV2DI:
20072 2384 : case IX86_BUILTIN_PSLLVV2DI_MASK:
20073 2384 : case IX86_BUILTIN_PSLLVV32HI:
20074 2384 : case IX86_BUILTIN_PSLLVV4DI:
20075 2384 : case IX86_BUILTIN_PSLLVV4DI_MASK:
20076 2384 : case IX86_BUILTIN_PSLLVV4SI:
20077 2384 : case IX86_BUILTIN_PSLLVV4SI_MASK:
20078 2384 : case IX86_BUILTIN_PSLLVV8DI:
20079 2384 : case IX86_BUILTIN_PSLLVV8HI:
20080 2384 : case IX86_BUILTIN_PSLLVV8SI:
20081 2384 : case IX86_BUILTIN_PSLLVV8SI_MASK:
20082 2384 : rcode = ASHIFT;
20083 2384 : is_vshift = true;
20084 2384 : goto do_shift;
20085 2341 : case IX86_BUILTIN_PSRAVQ128:
20086 2341 : case IX86_BUILTIN_PSRAVQ256:
20087 2341 : case IX86_BUILTIN_PSRAVV16HI:
20088 2341 : case IX86_BUILTIN_PSRAVV16SI:
20089 2341 : case IX86_BUILTIN_PSRAVV32HI:
20090 2341 : case IX86_BUILTIN_PSRAVV4SI:
20091 2341 : case IX86_BUILTIN_PSRAVV4SI_MASK:
20092 2341 : case IX86_BUILTIN_PSRAVV8DI:
20093 2341 : case IX86_BUILTIN_PSRAVV8HI:
20094 2341 : case IX86_BUILTIN_PSRAVV8SI:
20095 2341 : case IX86_BUILTIN_PSRAVV8SI_MASK:
20096 2341 : rcode = ASHIFTRT;
20097 2341 : is_vshift = true;
20098 2341 : goto do_shift;
20099 2380 : case IX86_BUILTIN_PSRLVV16HI:
20100 2380 : case IX86_BUILTIN_PSRLVV16SI:
20101 2380 : case IX86_BUILTIN_PSRLVV2DI:
20102 2380 : case IX86_BUILTIN_PSRLVV2DI_MASK:
20103 2380 : case IX86_BUILTIN_PSRLVV32HI:
20104 2380 : case IX86_BUILTIN_PSRLVV4DI:
20105 2380 : case IX86_BUILTIN_PSRLVV4DI_MASK:
20106 2380 : case IX86_BUILTIN_PSRLVV4SI:
20107 2380 : case IX86_BUILTIN_PSRLVV4SI_MASK:
20108 2380 : case IX86_BUILTIN_PSRLVV8DI:
20109 2380 : case IX86_BUILTIN_PSRLVV8HI:
20110 2380 : case IX86_BUILTIN_PSRLVV8SI:
20111 2380 : case IX86_BUILTIN_PSRLVV8SI_MASK:
20112 2380 : rcode = LSHIFTRT;
20113 2380 : is_vshift = true;
20114 2380 : goto do_shift;
20115 :
20116 30857 : do_shift:
20117 30857 : gcc_assert (n_args >= 2);
20118 30857 : if (!gimple_call_lhs (stmt))
20119 : {
20120 1 : gsi_replace (gsi, gimple_build_nop (), false);
20121 1 : return true;
20122 : }
20123 30856 : arg0 = gimple_call_arg (stmt, 0);
20124 30856 : arg1 = gimple_call_arg (stmt, 1);
20125 30856 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20126 : /* For masked shift, only optimize if the mask is all ones. */
20127 30856 : if (n_args > 2
20128 30856 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20129 : break;
20130 16081 : if (is_vshift)
20131 : {
20132 2640 : if (TREE_CODE (arg1) != VECTOR_CST)
20133 : break;
20134 69 : count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
20135 69 : if (integer_zerop (arg1))
20136 27 : count = 0;
20137 42 : else if (rcode == ASHIFTRT)
20138 : break;
20139 : else
20140 230 : for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
20141 : {
20142 212 : tree elt = VECTOR_CST_ELT (arg1, i);
20143 212 : if (!wi::neg_p (wi::to_wide (elt))
20144 375 : && wi::to_widest (elt) < count)
20145 16 : return false;
20146 : }
20147 : }
20148 : else
20149 : {
20150 13441 : arg1 = ix86_vector_shift_count (arg1);
20151 13441 : if (!arg1)
20152 : break;
20153 5608 : count = tree_to_uhwi (arg1);
20154 : }
20155 5653 : if (count == 0)
20156 : {
20157 : /* Just return the first argument for shift by 0. */
20158 93 : loc = gimple_location (stmt);
20159 93 : g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
20160 93 : gimple_set_location (g, loc);
20161 93 : gsi_replace (gsi, g, false);
20162 93 : return true;
20163 : }
20164 5560 : if (rcode != ASHIFTRT
20165 5560 : && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
20166 : {
20167 : /* For shift counts equal or greater than precision, except for
20168 : arithmetic right shift the result is zero. */
20169 78 : loc = gimple_location (stmt);
20170 78 : g = gimple_build_assign (gimple_call_lhs (stmt),
20171 78 : build_zero_cst (TREE_TYPE (arg0)));
20172 78 : gimple_set_location (g, loc);
20173 78 : gsi_replace (gsi, g, false);
20174 78 : return true;
20175 : }
20176 : break;
20177 :
20178 531 : case IX86_BUILTIN_SHUFPD512:
20179 531 : case IX86_BUILTIN_SHUFPS512:
20180 531 : case IX86_BUILTIN_SHUFPD:
20181 531 : case IX86_BUILTIN_SHUFPD256:
20182 531 : case IX86_BUILTIN_SHUFPS:
20183 531 : case IX86_BUILTIN_SHUFPS256:
20184 531 : arg0 = gimple_call_arg (stmt, 0);
20185 531 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20186 : /* This is masked shuffle. Only optimize if the mask is all ones. */
20187 531 : if (n_args > 3
20188 895 : && !ix86_masked_all_ones (elems,
20189 364 : gimple_call_arg (stmt, n_args - 1)))
20190 : break;
20191 203 : arg2 = gimple_call_arg (stmt, 2);
20192 203 : if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
20193 : {
20194 146 : unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
20195 : /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
20196 146 : if (shuffle_mask > 255)
20197 : return false;
20198 :
20199 144 : machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
20200 144 : loc = gimple_location (stmt);
20201 144 : tree itype = (imode == E_DFmode
20202 144 : ? long_long_integer_type_node : integer_type_node);
20203 144 : tree vtype = build_vector_type (itype, elems);
20204 144 : tree_vector_builder elts (vtype, elems, 1);
20205 :
20206 :
20207 : /* Transform integer shuffle_mask to vector perm_mask which
20208 : is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
20209 840 : for (unsigned i = 0; i != elems; i++)
20210 : {
20211 696 : unsigned sel_idx;
20212 : /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
20213 : provide 2 select constrols for each element of the
20214 : destination. */
20215 696 : if (imode == E_DFmode)
20216 240 : sel_idx = (i & 1) * elems + (i & ~1)
20217 240 : + ((shuffle_mask >> i) & 1);
20218 : else
20219 : {
20220 : /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
20221 : controls for each element of the destination. */
20222 456 : unsigned j = i % 4;
20223 456 : sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
20224 456 : + ((shuffle_mask >> 2 * j) & 3);
20225 : }
20226 696 : elts.quick_push (build_int_cst (itype, sel_idx));
20227 : }
20228 :
20229 144 : tree perm_mask = elts.build ();
20230 144 : arg1 = gimple_call_arg (stmt, 1);
20231 144 : g = gimple_build_assign (gimple_call_lhs (stmt),
20232 : VEC_PERM_EXPR,
20233 : arg0, arg1, perm_mask);
20234 144 : gimple_set_location (g, loc);
20235 144 : gsi_replace (gsi, g, false);
20236 144 : return true;
20237 144 : }
20238 : // Do not error yet, the constant could be propagated later?
20239 : break;
20240 :
20241 48 : case IX86_BUILTIN_PABSB:
20242 48 : case IX86_BUILTIN_PABSW:
20243 48 : case IX86_BUILTIN_PABSD:
20244 : /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
20245 48 : if (!TARGET_MMX_WITH_SSE)
20246 : break;
20247 : /* FALLTHRU. */
20248 2189 : case IX86_BUILTIN_PABSB128:
20249 2189 : case IX86_BUILTIN_PABSB256:
20250 2189 : case IX86_BUILTIN_PABSB512:
20251 2189 : case IX86_BUILTIN_PABSW128:
20252 2189 : case IX86_BUILTIN_PABSW256:
20253 2189 : case IX86_BUILTIN_PABSW512:
20254 2189 : case IX86_BUILTIN_PABSD128:
20255 2189 : case IX86_BUILTIN_PABSD256:
20256 2189 : case IX86_BUILTIN_PABSD512:
20257 2189 : case IX86_BUILTIN_PABSQ128:
20258 2189 : case IX86_BUILTIN_PABSQ256:
20259 2189 : case IX86_BUILTIN_PABSQ512:
20260 2189 : case IX86_BUILTIN_PABSB128_MASK:
20261 2189 : case IX86_BUILTIN_PABSB256_MASK:
20262 2189 : case IX86_BUILTIN_PABSW128_MASK:
20263 2189 : case IX86_BUILTIN_PABSW256_MASK:
20264 2189 : case IX86_BUILTIN_PABSD128_MASK:
20265 2189 : case IX86_BUILTIN_PABSD256_MASK:
20266 2189 : gcc_assert (n_args >= 1);
20267 2189 : if (!gimple_call_lhs (stmt))
20268 : {
20269 1 : gsi_replace (gsi, gimple_build_nop (), false);
20270 1 : return true;
20271 : }
20272 2188 : arg0 = gimple_call_arg (stmt, 0);
20273 2188 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20274 : /* For masked ABS, only optimize if the mask is all ones. */
20275 2188 : if (n_args > 1
20276 2188 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
20277 : break;
20278 228 : {
20279 228 : tree utype, ures, vce;
20280 228 : utype = unsigned_type_for (TREE_TYPE (arg0));
20281 : /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
20282 : instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
20283 228 : ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
20284 228 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20285 228 : loc = gimple_location (stmt);
20286 228 : vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
20287 228 : g = gimple_build_assign (gimple_call_lhs (stmt),
20288 : VIEW_CONVERT_EXPR, vce);
20289 228 : gsi_replace (gsi, g, false);
20290 : }
20291 228 : return true;
20292 :
20293 2225 : case IX86_BUILTIN_MINPS:
20294 2225 : case IX86_BUILTIN_MINPD:
20295 2225 : case IX86_BUILTIN_MINPS256:
20296 2225 : case IX86_BUILTIN_MINPD256:
20297 2225 : case IX86_BUILTIN_MINPS512:
20298 2225 : case IX86_BUILTIN_MINPD512:
20299 2225 : case IX86_BUILTIN_MINPS128_MASK:
20300 2225 : case IX86_BUILTIN_MINPD128_MASK:
20301 2225 : case IX86_BUILTIN_MINPS256_MASK:
20302 2225 : case IX86_BUILTIN_MINPD256_MASK:
20303 2225 : case IX86_BUILTIN_MINPH128_MASK:
20304 2225 : case IX86_BUILTIN_MINPH256_MASK:
20305 2225 : case IX86_BUILTIN_MINPH512_MASK:
20306 2225 : tcode = LT_EXPR;
20307 2225 : goto do_minmax;
20308 :
20309 : case IX86_BUILTIN_MAXPS:
20310 : case IX86_BUILTIN_MAXPD:
20311 : case IX86_BUILTIN_MAXPS256:
20312 : case IX86_BUILTIN_MAXPD256:
20313 : case IX86_BUILTIN_MAXPS512:
20314 : case IX86_BUILTIN_MAXPD512:
20315 : case IX86_BUILTIN_MAXPS128_MASK:
20316 : case IX86_BUILTIN_MAXPD128_MASK:
20317 : case IX86_BUILTIN_MAXPS256_MASK:
20318 : case IX86_BUILTIN_MAXPD256_MASK:
20319 : case IX86_BUILTIN_MAXPH128_MASK:
20320 : case IX86_BUILTIN_MAXPH256_MASK:
20321 : case IX86_BUILTIN_MAXPH512_MASK:
20322 : tcode = GT_EXPR;
20323 4435 : do_minmax:
20324 4435 : gcc_assert (n_args >= 2);
20325 : /* Without SSE4.1 we often aren't able to pattern match it back to the
20326 : desired instruction. */
20327 4435 : if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
20328 : break;
20329 3865 : arg0 = gimple_call_arg (stmt, 0);
20330 3865 : arg1 = gimple_call_arg (stmt, 1);
20331 3865 : elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20332 : /* For masked minmax, only optimize if the mask is all ones. */
20333 3865 : if (n_args > 2
20334 3865 : && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
20335 : break;
20336 647 : if (n_args >= 5)
20337 : {
20338 436 : tree arg4 = gimple_call_arg (stmt, 4);
20339 436 : if (!tree_fits_uhwi_p (arg4))
20340 : break;
20341 424 : if (tree_to_uhwi (arg4) == 4)
20342 : /* Ok. */;
20343 416 : else if (tree_to_uhwi (arg4) != 8)
20344 : /* Invalid round argument. */
20345 : break;
20346 416 : else if (HONOR_NANS (arg0))
20347 : /* Lowering to comparison would raise exceptions which
20348 : shouldn't be raised. */
20349 : break;
20350 : }
20351 219 : {
20352 219 : tree type = truth_type_for (TREE_TYPE (arg0));
20353 219 : tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
20354 219 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20355 219 : g = gimple_build_assign (gimple_call_lhs (stmt),
20356 : VEC_COND_EXPR, cmpres, arg0, arg1);
20357 219 : gsi_replace (gsi, g, false);
20358 : }
20359 219 : return true;
20360 :
20361 : default:
20362 : break;
20363 : }
20364 :
20365 : return false;
20366 : }
20367 :
20368 : /* Handler for an SVML-style interface to
20369 : a library with vectorized intrinsics. */
20370 :
20371 : tree
20372 10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
20373 : {
20374 10 : char name[20];
20375 10 : tree fntype, new_fndecl, args;
20376 10 : unsigned arity;
20377 10 : const char *bname;
20378 10 : machine_mode el_mode, in_mode;
20379 10 : int n, in_n;
20380 :
20381 : /* The SVML is suitable for unsafe math only. */
20382 10 : if (!flag_unsafe_math_optimizations)
20383 : return NULL_TREE;
20384 :
20385 10 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20386 10 : n = TYPE_VECTOR_SUBPARTS (type_out);
20387 10 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20388 10 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20389 10 : if (el_mode != in_mode
20390 10 : || n != in_n)
20391 : return NULL_TREE;
20392 :
20393 10 : switch (fn)
20394 : {
20395 10 : CASE_CFN_EXP:
20396 10 : CASE_CFN_LOG:
20397 10 : CASE_CFN_LOG10:
20398 10 : CASE_CFN_POW:
20399 10 : CASE_CFN_TANH:
20400 10 : CASE_CFN_TAN:
20401 10 : CASE_CFN_ATAN:
20402 10 : CASE_CFN_ATAN2:
20403 10 : CASE_CFN_ATANH:
20404 10 : CASE_CFN_CBRT:
20405 10 : CASE_CFN_SINH:
20406 10 : CASE_CFN_SIN:
20407 10 : CASE_CFN_ASINH:
20408 10 : CASE_CFN_ASIN:
20409 10 : CASE_CFN_COSH:
20410 10 : CASE_CFN_COS:
20411 10 : CASE_CFN_ACOSH:
20412 10 : CASE_CFN_ACOS:
20413 10 : if ((el_mode != DFmode || n != 2)
20414 8 : && (el_mode != SFmode || n != 4))
20415 : return NULL_TREE;
20416 6 : break;
20417 :
20418 : default:
20419 : return NULL_TREE;
20420 : }
20421 :
20422 6 : tree fndecl = mathfn_built_in (el_mode == DFmode
20423 : ? double_type_node : float_type_node, fn);
20424 6 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20425 :
20426 6 : if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
20427 2 : strcpy (name, "vmlsLn4");
20428 4 : else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
20429 0 : strcpy (name, "vmldLn2");
20430 4 : else if (n == 4)
20431 : {
20432 2 : sprintf (name, "vmls%s", bname+10);
20433 2 : name[strlen (name)-1] = '4';
20434 : }
20435 : else
20436 2 : sprintf (name, "vmld%s2", bname+10);
20437 :
20438 : /* Convert to uppercase. */
20439 6 : name[4] &= ~0x20;
20440 :
20441 6 : arity = 0;
20442 6 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20443 0 : arity++;
20444 :
20445 6 : if (arity == 1)
20446 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20447 : else
20448 6 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20449 :
20450 : /* Build a function declaration for the vectorized function. */
20451 6 : new_fndecl = build_decl (BUILTINS_LOCATION,
20452 : FUNCTION_DECL, get_identifier (name), fntype);
20453 6 : TREE_PUBLIC (new_fndecl) = 1;
20454 6 : DECL_EXTERNAL (new_fndecl) = 1;
20455 6 : DECL_IS_NOVOPS (new_fndecl) = 1;
20456 6 : TREE_READONLY (new_fndecl) = 1;
20457 :
20458 6 : return new_fndecl;
20459 : }
20460 :
20461 : /* Handler for an ACML-style interface to
20462 : a library with vectorized intrinsics. */
20463 :
20464 : tree
20465 3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20466 : {
20467 3 : char name[20] = "__vr.._";
20468 3 : tree fntype, new_fndecl, args;
20469 3 : unsigned arity;
20470 3 : const char *bname;
20471 3 : machine_mode el_mode, in_mode;
20472 3 : int n, in_n;
20473 :
20474 : /* The ACML is 64bits only and suitable for unsafe math only as
20475 : it does not correctly support parts of IEEE with the required
20476 : precision such as denormals. */
20477 3 : if (!TARGET_64BIT
20478 3 : || !flag_unsafe_math_optimizations)
20479 : return NULL_TREE;
20480 :
20481 3 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20482 3 : n = TYPE_VECTOR_SUBPARTS (type_out);
20483 3 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20484 3 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20485 3 : if (el_mode != in_mode
20486 3 : || n != in_n)
20487 : return NULL_TREE;
20488 :
20489 3 : switch (fn)
20490 : {
20491 3 : CASE_CFN_SIN:
20492 3 : CASE_CFN_COS:
20493 3 : CASE_CFN_EXP:
20494 3 : CASE_CFN_LOG:
20495 3 : CASE_CFN_LOG2:
20496 3 : CASE_CFN_LOG10:
20497 3 : if (el_mode == DFmode && n == 2)
20498 : {
20499 3 : name[4] = 'd';
20500 3 : name[5] = '2';
20501 : }
20502 0 : else if (el_mode == SFmode && n == 4)
20503 : {
20504 0 : name[4] = 's';
20505 0 : name[5] = '4';
20506 : }
20507 : else
20508 : return NULL_TREE;
20509 3 : break;
20510 :
20511 : default:
20512 : return NULL_TREE;
20513 : }
20514 :
20515 3 : tree fndecl = mathfn_built_in (el_mode == DFmode
20516 : ? double_type_node : float_type_node, fn);
20517 3 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20518 3 : sprintf (name + 7, "%s", bname+10);
20519 :
20520 3 : arity = 0;
20521 3 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20522 0 : arity++;
20523 :
20524 3 : if (arity == 1)
20525 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20526 : else
20527 3 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20528 :
20529 : /* Build a function declaration for the vectorized function. */
20530 3 : new_fndecl = build_decl (BUILTINS_LOCATION,
20531 : FUNCTION_DECL, get_identifier (name), fntype);
20532 3 : TREE_PUBLIC (new_fndecl) = 1;
20533 3 : DECL_EXTERNAL (new_fndecl) = 1;
20534 3 : DECL_IS_NOVOPS (new_fndecl) = 1;
20535 3 : TREE_READONLY (new_fndecl) = 1;
20536 :
20537 3 : return new_fndecl;
20538 : }
20539 :
20540 : /* Handler for an AOCL-LibM-style interface to
20541 : a library with vectorized intrinsics. */
20542 :
20543 : tree
20544 386 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20545 : {
20546 386 : char name[20] = "amd_vr";
20547 386 : int name_len = 6;
20548 386 : tree fntype, new_fndecl, args;
20549 386 : unsigned arity;
20550 386 : const char *bname;
20551 386 : machine_mode el_mode, in_mode;
20552 386 : int n, in_n;
20553 :
20554 : /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20555 : as it trades off some accuracy for increased performance. */
20556 386 : if (!TARGET_64BIT
20557 386 : || !flag_unsafe_math_optimizations)
20558 : return NULL_TREE;
20559 :
20560 386 : el_mode = TYPE_MODE (TREE_TYPE (type_out));
20561 386 : n = TYPE_VECTOR_SUBPARTS (type_out);
20562 386 : in_mode = TYPE_MODE (TREE_TYPE (type_in));
20563 386 : in_n = TYPE_VECTOR_SUBPARTS (type_in);
20564 386 : if (el_mode != in_mode
20565 386 : || n != in_n)
20566 : return NULL_TREE;
20567 :
20568 386 : gcc_checking_assert (n > 0);
20569 :
20570 : /* Decide whether there exists a function for the combination of FN, the mode
20571 : and the vector width. Return early if it doesn't. */
20572 :
20573 386 : if (el_mode != DFmode && el_mode != SFmode)
20574 : return NULL_TREE;
20575 :
20576 : /* Supported vector widths for given FN and single/double precision. Zeros
20577 : are used to fill out unused positions in the arrays. */
20578 386 : static const int supported_n[][2][3] = {
20579 : /* Single prec. , Double prec. */
20580 : { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20581 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20582 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20583 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20584 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20585 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20586 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20587 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20588 : { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20589 : { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20590 : { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20591 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20592 : { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20593 : { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20594 : { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20595 : { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20596 : { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20597 : { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20598 : };
20599 :
20600 : /* We cannot simply index the supported_n array with FN since multiple FNs
20601 : may correspond to a single operation (see the definitions of these
20602 : CASE_CFN_* macros). */
20603 386 : int i;
20604 386 : switch (fn)
20605 : {
20606 : CASE_CFN_TAN : i = 0; break;
20607 28 : CASE_CFN_EXP : i = 1; break;
20608 28 : CASE_CFN_EXP2 : i = 2; break;
20609 28 : CASE_CFN_LOG : i = 3; break;
20610 28 : CASE_CFN_LOG2 : i = 4; break;
20611 28 : CASE_CFN_COS : i = 5; break;
20612 28 : CASE_CFN_SIN : i = 6; break;
20613 28 : CASE_CFN_POW : i = 7; break;
20614 28 : CASE_CFN_ERF : i = 8; break;
20615 25 : CASE_CFN_ATAN : i = 9; break;
20616 20 : CASE_CFN_LOG10 : i = 10; break;
20617 10 : CASE_CFN_EXP10 : i = 11; break;
20618 10 : CASE_CFN_LOG1P : i = 12; break;
20619 24 : CASE_CFN_ASIN : i = 13; break;
20620 14 : CASE_CFN_ACOS : i = 14; break;
20621 18 : CASE_CFN_TANH : i = 15; break;
20622 9 : CASE_CFN_EXPM1 : i = 16; break;
20623 14 : CASE_CFN_COSH : i = 17; break;
20624 : default: return NULL_TREE;
20625 : }
20626 :
20627 386 : int j = el_mode == DFmode;
20628 386 : bool n_is_supported = false;
20629 976 : for (unsigned k = 0; k < 3; k++)
20630 857 : if (supported_n[i][j][k] == n)
20631 : {
20632 : n_is_supported = true;
20633 : break;
20634 : }
20635 386 : if (!n_is_supported)
20636 : return NULL_TREE;
20637 :
20638 : /* Append the precision and the vector width to the function name we are
20639 : constructing. */
20640 267 : name[name_len++] = el_mode == DFmode ? 'd' : 's';
20641 267 : switch (n)
20642 : {
20643 214 : case 2:
20644 214 : case 4:
20645 214 : case 8:
20646 214 : name[name_len++] = '0' + n;
20647 214 : break;
20648 53 : case 16:
20649 53 : name[name_len++] = '1';
20650 53 : name[name_len++] = '6';
20651 53 : break;
20652 0 : default:
20653 0 : gcc_unreachable ();
20654 : }
20655 267 : name[name_len++] = '_';
20656 :
20657 : /* Append the operation name (steal it from the name of a builtin). */
20658 267 : tree fndecl = mathfn_built_in (el_mode == DFmode
20659 : ? double_type_node : float_type_node, fn);
20660 267 : bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20661 267 : sprintf (name + name_len, "%s", bname + 10);
20662 :
20663 267 : arity = 0;
20664 267 : for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20665 0 : arity++;
20666 :
20667 267 : if (arity == 1)
20668 0 : fntype = build_function_type_list (type_out, type_in, NULL);
20669 : else
20670 267 : fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20671 :
20672 : /* Build a function declaration for the vectorized function. */
20673 267 : new_fndecl = build_decl (BUILTINS_LOCATION,
20674 : FUNCTION_DECL, get_identifier (name), fntype);
20675 267 : TREE_PUBLIC (new_fndecl) = 1;
20676 267 : DECL_EXTERNAL (new_fndecl) = 1;
20677 267 : TREE_READONLY (new_fndecl) = 1;
20678 :
20679 267 : return new_fndecl;
20680 : }
20681 :
20682 : /* Returns a decl of a function that implements scatter store with
20683 : register type VECTYPE and index type INDEX_TYPE and SCALE.
20684 : Return NULL_TREE if it is not available. */
20685 :
20686 : static tree
20687 130448 : ix86_vectorize_builtin_scatter (const_tree vectype,
20688 : const_tree index_type, int scale)
20689 : {
20690 130448 : bool si;
20691 130448 : enum ix86_builtins code;
20692 :
20693 130448 : if (!TARGET_AVX512F)
20694 : return NULL_TREE;
20695 :
20696 4215 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20697 7389 : ? !TARGET_USE_SCATTER_2PARTS
20698 7389 : : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20699 3174 : ? !TARGET_USE_SCATTER_4PARTS
20700 2057 : : !TARGET_USE_SCATTER_8PARTS))
20701 : return NULL_TREE;
20702 :
20703 4215 : if ((TREE_CODE (index_type) != INTEGER_TYPE
20704 463 : && !POINTER_TYPE_P (index_type))
20705 4678 : || (TYPE_MODE (index_type) != SImode
20706 1783 : && TYPE_MODE (index_type) != DImode))
20707 0 : return NULL_TREE;
20708 :
20709 4445 : if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20710 : return NULL_TREE;
20711 :
20712 : /* v*scatter* insn sign extends index to pointer mode. */
20713 4215 : if (TYPE_PRECISION (index_type) < POINTER_SIZE
20714 4215 : && TYPE_UNSIGNED (index_type))
20715 : return NULL_TREE;
20716 :
20717 : /* Scale can be 1, 2, 4 or 8. */
20718 4215 : if (scale <= 0
20719 4215 : || scale > 8
20720 4199 : || (scale & (scale - 1)) != 0)
20721 : return NULL_TREE;
20722 :
20723 4199 : si = TYPE_MODE (index_type) == SImode;
20724 4199 : switch (TYPE_MODE (vectype))
20725 : {
20726 169 : case E_V8DFmode:
20727 169 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20728 : break;
20729 104 : case E_V8DImode:
20730 104 : code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20731 : break;
20732 177 : case E_V16SFmode:
20733 177 : code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20734 : break;
20735 257 : case E_V16SImode:
20736 257 : code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20737 : break;
20738 206 : case E_V4DFmode:
20739 206 : if (TARGET_AVX512VL)
20740 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20741 : else
20742 : return NULL_TREE;
20743 : break;
20744 142 : case E_V4DImode:
20745 142 : if (TARGET_AVX512VL)
20746 34 : code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20747 : else
20748 : return NULL_TREE;
20749 : break;
20750 248 : case E_V8SFmode:
20751 248 : if (TARGET_AVX512VL)
20752 40 : code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20753 : else
20754 : return NULL_TREE;
20755 : break;
20756 268 : case E_V8SImode:
20757 268 : if (TARGET_AVX512VL)
20758 82 : code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20759 : else
20760 : return NULL_TREE;
20761 : break;
20762 254 : case E_V2DFmode:
20763 254 : if (TARGET_AVX512VL)
20764 94 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20765 : else
20766 : return NULL_TREE;
20767 : break;
20768 196 : case E_V2DImode:
20769 196 : if (TARGET_AVX512VL)
20770 94 : code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20771 : else
20772 : return NULL_TREE;
20773 : break;
20774 301 : case E_V4SFmode:
20775 301 : if (TARGET_AVX512VL)
20776 96 : code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20777 : else
20778 : return NULL_TREE;
20779 : break;
20780 324 : case E_V4SImode:
20781 324 : if (TARGET_AVX512VL)
20782 138 : code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20783 : else
20784 : return NULL_TREE;
20785 : break;
20786 : default:
20787 : return NULL_TREE;
20788 : }
20789 :
20790 1319 : return get_ix86_builtin (code);
20791 : }
20792 :
20793 : /* Return true if it is safe to use the rsqrt optabs to optimize
20794 : 1.0/sqrt. */
20795 :
20796 : static bool
20797 66 : use_rsqrt_p (machine_mode mode)
20798 : {
20799 66 : return ((mode == HFmode
20800 42 : || (TARGET_SSE && TARGET_SSE_MATH))
20801 66 : && flag_finite_math_only
20802 65 : && !flag_trapping_math
20803 119 : && flag_unsafe_math_optimizations);
20804 : }
20805 :
20806 : /* Helper for avx_vpermilps256_operand et al. This is also used by
20807 : the expansion functions to turn the parallel back into a mask.
20808 : The return value is 0 for no match and the imm8+1 for a match. */
20809 :
20810 : int
20811 63524 : avx_vpermilp_parallel (rtx par, machine_mode mode)
20812 : {
20813 63524 : unsigned i, nelt = GET_MODE_NUNITS (mode);
20814 63524 : unsigned mask = 0;
20815 63524 : unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20816 :
20817 63524 : if (XVECLEN (par, 0) != (int) nelt)
20818 : return 0;
20819 :
20820 : /* Validate that all of the elements are constants, and not totally
20821 : out of range. Copy the data into an integral array to make the
20822 : subsequent checks easier. */
20823 311086 : for (i = 0; i < nelt; ++i)
20824 : {
20825 247562 : rtx er = XVECEXP (par, 0, i);
20826 247562 : unsigned HOST_WIDE_INT ei;
20827 :
20828 247562 : if (!CONST_INT_P (er))
20829 : return 0;
20830 247562 : ei = INTVAL (er);
20831 247562 : if (ei >= nelt)
20832 : return 0;
20833 247562 : ipar[i] = ei;
20834 : }
20835 :
20836 63524 : switch (mode)
20837 : {
20838 : case E_V8DFmode:
20839 : case E_V8DImode:
20840 : /* In the 512-bit DFmode case, we can only move elements within
20841 : a 128-bit lane. First fill the second part of the mask,
20842 : then fallthru. */
20843 4933 : for (i = 4; i < 6; ++i)
20844 : {
20845 3419 : if (!IN_RANGE (ipar[i], 4, 5))
20846 : return 0;
20847 3194 : mask |= (ipar[i] - 4) << i;
20848 : }
20849 3690 : for (i = 6; i < 8; ++i)
20850 : {
20851 2602 : if (!IN_RANGE (ipar[i], 6, 7))
20852 : return 0;
20853 2176 : mask |= (ipar[i] - 6) << i;
20854 : }
20855 : /* FALLTHRU */
20856 :
20857 : case E_V4DFmode:
20858 : case E_V4DImode:
20859 : /* In the 256-bit DFmode case, we can only move elements within
20860 : a 128-bit lane. */
20861 44649 : for (i = 0; i < 2; ++i)
20862 : {
20863 37299 : if (!IN_RANGE (ipar[i], 0, 1))
20864 : return 0;
20865 25156 : mask |= ipar[i] << i;
20866 : }
20867 19352 : for (i = 2; i < 4; ++i)
20868 : {
20869 13356 : if (!IN_RANGE (ipar[i], 2, 3))
20870 : return 0;
20871 12002 : mask |= (ipar[i] - 2) << i;
20872 : }
20873 : break;
20874 :
20875 : case E_V16SFmode:
20876 : case E_V16SImode:
20877 : /* In 512 bit SFmode case, permutation in the upper 256 bits
20878 : must mirror the permutation in the lower 256-bits. */
20879 4326 : for (i = 0; i < 8; ++i)
20880 3854 : if (ipar[i] + 8 != ipar[i + 8])
20881 : return 0;
20882 : /* FALLTHRU */
20883 :
20884 : case E_V8SFmode:
20885 : case E_V8SImode:
20886 : /* In 256 bit SFmode case, we have full freedom of
20887 : movement within the low 128-bit lane, but the high 128-bit
20888 : lane must mirror the exact same pattern. */
20889 35630 : for (i = 0; i < 4; ++i)
20890 30149 : if (ipar[i] + 4 != ipar[i + 4])
20891 : return 0;
20892 : nelt = 4;
20893 : /* FALLTHRU */
20894 :
20895 38663 : case E_V2DFmode:
20896 38663 : case E_V2DImode:
20897 38663 : case E_V4SFmode:
20898 38663 : case E_V4SImode:
20899 : /* In the 128-bit case, we've full freedom in the placement of
20900 : the elements from the source operand. */
20901 134649 : for (i = 0; i < nelt; ++i)
20902 95986 : mask |= ipar[i] << (i * (nelt / 2));
20903 : break;
20904 :
20905 0 : default:
20906 0 : gcc_unreachable ();
20907 : }
20908 :
20909 : /* Make sure success has a non-zero value by adding one. */
20910 44659 : return mask + 1;
20911 : }
20912 :
20913 : /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20914 : the expansion functions to turn the parallel back into a mask.
20915 : The return value is 0 for no match and the imm8+1 for a match. */
20916 :
20917 : int
20918 42146 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
20919 : {
20920 42146 : unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20921 42146 : unsigned mask = 0;
20922 42146 : unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20923 :
20924 42146 : if (XVECLEN (par, 0) != (int) nelt)
20925 : return 0;
20926 :
20927 : /* Validate that all of the elements are constants, and not totally
20928 : out of range. Copy the data into an integral array to make the
20929 : subsequent checks easier. */
20930 342298 : for (i = 0; i < nelt; ++i)
20931 : {
20932 300152 : rtx er = XVECEXP (par, 0, i);
20933 300152 : unsigned HOST_WIDE_INT ei;
20934 :
20935 300152 : if (!CONST_INT_P (er))
20936 : return 0;
20937 300152 : ei = INTVAL (er);
20938 300152 : if (ei >= 2 * nelt)
20939 : return 0;
20940 300152 : ipar[i] = ei;
20941 : }
20942 :
20943 : /* Validate that the halves of the permute are halves. */
20944 81279 : for (i = 0; i < nelt2 - 1; ++i)
20945 65814 : if (ipar[i] + 1 != ipar[i + 1])
20946 : return 0;
20947 49614 : for (i = nelt2; i < nelt - 1; ++i)
20948 34791 : if (ipar[i] + 1 != ipar[i + 1])
20949 : return 0;
20950 :
20951 : /* Reconstruct the mask. */
20952 44373 : for (i = 0; i < 2; ++i)
20953 : {
20954 29600 : unsigned e = ipar[i * nelt2];
20955 29600 : if (e % nelt2)
20956 : return 0;
20957 29550 : e /= nelt2;
20958 29550 : mask |= e << (i * 4);
20959 : }
20960 :
20961 : /* Make sure success has a non-zero value by adding one. */
20962 14773 : return mask + 1;
20963 : }
20964 :
20965 : /* Return a mask of VPTERNLOG operands that do not affect output. */
20966 :
20967 : int
20968 2431 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
20969 : {
20970 2431 : int mask = 0;
20971 2431 : int imm8 = INTVAL (pternlog_imm);
20972 :
20973 2431 : if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20974 6 : mask |= 1;
20975 2431 : if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20976 6 : mask |= 2;
20977 2431 : if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20978 155 : mask |= 4;
20979 :
20980 2431 : return mask;
20981 : }
20982 :
20983 : /* Eliminate false dependencies on operands that do not affect output
20984 : by substituting other operands of a VPTERNLOG. */
20985 :
20986 : void
20987 81 : substitute_vpternlog_operands (rtx *operands)
20988 : {
20989 81 : int mask = vpternlog_redundant_operand_mask (operands[4]);
20990 :
20991 81 : if (mask & 1) /* The first operand is redundant. */
20992 2 : operands[1] = operands[2];
20993 :
20994 81 : if (mask & 2) /* The second operand is redundant. */
20995 2 : operands[2] = operands[1];
20996 :
20997 81 : if (mask & 4) /* The third operand is redundant. */
20998 77 : operands[3] = operands[1];
20999 4 : else if (REG_P (operands[3]))
21000 : {
21001 0 : if (mask & 1)
21002 0 : operands[1] = operands[3];
21003 0 : if (mask & 2)
21004 0 : operands[2] = operands[3];
21005 : }
21006 81 : }
21007 :
21008 : /* Return a register priority for hard reg REGNO. */
21009 : static int
21010 58162992 : ix86_register_priority (int hard_regno)
21011 : {
21012 : /* ebp and r13 as the base always wants a displacement, r12 as the
21013 : base always wants an index. So discourage their usage in an
21014 : address. */
21015 58162992 : if (hard_regno == R12_REG || hard_regno == R13_REG)
21016 : return 0;
21017 53689699 : if (hard_regno == BP_REG)
21018 : return 1;
21019 : /* New x86-64 int registers result in bigger code size. Discourage them. */
21020 51720706 : if (REX_INT_REGNO_P (hard_regno))
21021 : return 2;
21022 35240588 : if (REX2_INT_REGNO_P (hard_regno))
21023 : return 2;
21024 : /* New x86-64 SSE registers result in bigger code size. Discourage them. */
21025 35238148 : if (REX_SSE_REGNO_P (hard_regno))
21026 : return 2;
21027 29120967 : if (EXT_REX_SSE_REGNO_P (hard_regno))
21028 : return 1;
21029 : /* Usage of AX register results in smaller code. Prefer it. */
21030 28844284 : if (hard_regno == AX_REG)
21031 3796141 : return 4;
21032 : return 3;
21033 : }
21034 :
21035 : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
21036 :
21037 : Put float CONST_DOUBLE in the constant pool instead of fp regs.
21038 : QImode must go into class Q_REGS.
21039 : Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21040 : movdf to do mem-to-mem moves through integer regs. */
21041 :
21042 : static reg_class_t
21043 547405141 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
21044 : {
21045 547405141 : machine_mode mode = GET_MODE (x);
21046 :
21047 : /* We're only allowed to return a subclass of CLASS. Many of the
21048 : following checks fail for NO_REGS, so eliminate that early. */
21049 547405141 : if (regclass == NO_REGS)
21050 : return NO_REGS;
21051 :
21052 : /* All classes can load zeros. */
21053 546548470 : if (x == CONST0_RTX (mode))
21054 : return regclass;
21055 :
21056 : /* Force constants into memory if we are loading a (nonzero) constant into
21057 : an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
21058 : instructions to load from a constant. */
21059 521652666 : if (CONSTANT_P (x)
21060 521652666 : && (MAYBE_MMX_CLASS_P (regclass)
21061 152120033 : || MAYBE_SSE_CLASS_P (regclass)
21062 122083034 : || MAYBE_MASK_CLASS_P (regclass)))
21063 30169108 : return NO_REGS;
21064 :
21065 : /* Floating-point constants need more complex checks. */
21066 491483558 : if (CONST_DOUBLE_P (x))
21067 : {
21068 : /* General regs can load everything. */
21069 303387 : if (INTEGER_CLASS_P (regclass))
21070 : return regclass;
21071 :
21072 : /* Floats can load 0 and 1 plus some others. Note that we eliminated
21073 : zero above. We only want to wind up preferring 80387 registers if
21074 : we plan on doing computation with them. */
21075 179712 : if (IS_STACK_MODE (mode)
21076 238096 : && standard_80387_constant_p (x) > 0)
21077 : {
21078 : /* Limit class to FP regs. */
21079 40492 : if (FLOAT_CLASS_P (regclass))
21080 : return FLOAT_REGS;
21081 : }
21082 :
21083 139220 : return NO_REGS;
21084 : }
21085 :
21086 : /* Prefer SSE if we can use them for math. Also allow integer regs
21087 : when moves between register units are cheap. */
21088 491180171 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21089 : {
21090 31132731 : if (TARGET_INTER_UNIT_MOVES_FROM_VEC
21091 31117818 : && TARGET_INTER_UNIT_MOVES_TO_VEC
21092 93358903 : && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
21093 30974502 : return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21094 : else
21095 158229 : return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21096 : }
21097 :
21098 : /* Generally when we see PLUS here, it's the function invariant
21099 : (plus soft-fp const_int). Which can only be computed into general
21100 : regs. */
21101 460047440 : if (GET_CODE (x) == PLUS)
21102 1895625 : return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
21103 :
21104 : /* QImode constants are easy to load, but non-constant QImode data
21105 : must go into Q_REGS or ALL_MASK_REGS. */
21106 458151815 : if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21107 : {
21108 24685449 : if (Q_CLASS_P (regclass))
21109 : return regclass;
21110 19949216 : else if (reg_class_subset_p (Q_REGS, regclass))
21111 : return Q_REGS;
21112 55325 : else if (MASK_CLASS_P (regclass))
21113 : return regclass;
21114 : else
21115 : return NO_REGS;
21116 : }
21117 :
21118 : return regclass;
21119 : }
21120 :
21121 : /* Discourage putting floating-point values in SSE registers unless
21122 : SSE math is being used, and likewise for the 387 registers. */
21123 : static reg_class_t
21124 74480253 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
21125 : {
21126 : /* Restrict the output reload class to the register bank that we are doing
21127 : math on. If we would like not to return a subset of CLASS, reject this
21128 : alternative: if reload cannot do this, it will still use its choice. */
21129 74480253 : machine_mode mode = GET_MODE (x);
21130 74480253 : if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21131 7229401 : return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
21132 :
21133 67250852 : if (IS_STACK_MODE (mode))
21134 207106 : return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21135 :
21136 : return regclass;
21137 : }
21138 :
21139 : static reg_class_t
21140 385987734 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
21141 : machine_mode mode, secondary_reload_info *sri)
21142 : {
21143 : /* Double-word spills from general registers to non-offsettable memory
21144 : references (zero-extended addresses) require special handling. */
21145 385987734 : if (TARGET_64BIT
21146 333024663 : && MEM_P (x)
21147 180891141 : && GET_MODE_SIZE (mode) > UNITS_PER_WORD
21148 18952538 : && INTEGER_CLASS_P (rclass)
21149 388698330 : && !offsettable_memref_p (x))
21150 : {
21151 2459670 : sri->icode = (in_p
21152 1229835 : ? CODE_FOR_reload_noff_load
21153 : : CODE_FOR_reload_noff_store);
21154 : /* Add the cost of moving address to a temporary. */
21155 1229835 : sri->extra_cost = 1;
21156 :
21157 1229835 : return NO_REGS;
21158 : }
21159 :
21160 : /* QImode spills from non-QI registers require
21161 : intermediate register on 32bit targets. */
21162 384757899 : if (mode == QImode
21163 384757899 : && ((!TARGET_64BIT && !in_p
21164 585977 : && INTEGER_CLASS_P (rclass)
21165 585937 : && MAYBE_NON_Q_CLASS_P (rclass))
21166 22174181 : || (!TARGET_AVX512DQ
21167 21975316 : && MAYBE_MASK_CLASS_P (rclass))))
21168 : {
21169 6518 : int regno = true_regnum (x);
21170 :
21171 : /* Return Q_REGS if the operand is in memory. */
21172 6518 : if (regno == -1)
21173 : return Q_REGS;
21174 :
21175 : return NO_REGS;
21176 : }
21177 :
21178 : /* Require movement to gpr, and then store to memory. */
21179 384751381 : if ((mode == HFmode || mode == HImode || mode == V2QImode
21180 : || mode == BFmode)
21181 3990267 : && !TARGET_SSE4_1
21182 3397744 : && SSE_CLASS_P (rclass)
21183 273415 : && !in_p && MEM_P (x))
21184 : {
21185 168976 : sri->extra_cost = 1;
21186 168976 : return GENERAL_REGS;
21187 : }
21188 :
21189 : /* This condition handles corner case where an expression involving
21190 : pointers gets vectorized. We're trying to use the address of a
21191 : stack slot as a vector initializer.
21192 :
21193 : (set (reg:V2DI 74 [ vect_cst_.2 ])
21194 : (vec_duplicate:V2DI (reg/f:DI 20 frame)))
21195 :
21196 : Eventually frame gets turned into sp+offset like this:
21197 :
21198 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21199 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21200 : (const_int 392 [0x188]))))
21201 :
21202 : That later gets turned into:
21203 :
21204 : (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21205 : (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
21206 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
21207 :
21208 : We'll have the following reload recorded:
21209 :
21210 : Reload 0: reload_in (DI) =
21211 : (plus:DI (reg/f:DI 7 sp)
21212 : (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
21213 : reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21214 : SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
21215 : reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
21216 : reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21217 : reload_reg_rtx: (reg:V2DI 22 xmm1)
21218 :
21219 : Which isn't going to work since SSE instructions can't handle scalar
21220 : additions. Returning GENERAL_REGS forces the addition into integer
21221 : register and reload can handle subsequent reloads without problems. */
21222 :
21223 221276087 : if (in_p && GET_CODE (x) == PLUS
21224 2 : && SSE_CLASS_P (rclass)
21225 384582405 : && SCALAR_INT_MODE_P (mode))
21226 : return GENERAL_REGS;
21227 :
21228 : return NO_REGS;
21229 : }
21230 :
21231 : /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
21232 :
21233 : static bool
21234 716915843 : ix86_class_likely_spilled_p (reg_class_t rclass)
21235 : {
21236 706876065 : switch (rclass)
21237 : {
21238 : case AREG:
21239 : case DREG:
21240 : case CREG:
21241 : case BREG:
21242 : case AD_REGS:
21243 : case SIREG:
21244 : case DIREG:
21245 : case SSE_FIRST_REG:
21246 : case FP_TOP_REG:
21247 : case FP_SECOND_REG:
21248 : return true;
21249 :
21250 685336582 : default:
21251 685336582 : break;
21252 : }
21253 :
21254 685336582 : return false;
21255 : }
21256 :
21257 : /* Implement TARGET_CALLEE_SAVE_COST. */
21258 :
21259 : static int
21260 81881934 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
21261 : unsigned int, int mem_cost, const HARD_REG_SET &, bool)
21262 : {
21263 : /* Account for the fact that push and pop are shorter and do their
21264 : own allocation and deallocation. */
21265 81881934 : if (GENERAL_REGNO_P (hard_regno))
21266 : {
21267 : /* push is 1 byte while typical spill is 4-5 bytes.
21268 : ??? We probably should adjust size costs accordingly.
21269 : Costs are relative to reg-reg move that has 2 bytes for 32bit
21270 : and 3 bytes otherwise. Be sure that no cost table sets cost
21271 : to 2, so we end up with 0. */
21272 81872076 : if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
21273 3572286 : return 1;
21274 78299790 : return mem_cost - 2;
21275 : }
21276 : return mem_cost;
21277 : }
21278 :
21279 : /* Return true if a set of DST by the expression SRC should be allowed.
21280 : This prevents complex sets of likely_spilled hard regs before split1. */
21281 :
21282 : bool
21283 631686272 : ix86_hardreg_mov_ok (rtx dst, rtx src)
21284 : {
21285 : /* Avoid complex sets of likely_spilled hard registers before reload. */
21286 513538201 : if (REG_P (dst) && HARD_REGISTER_P (dst)
21287 304399638 : && !REG_P (src) && !MEM_P (src)
21288 95438421 : && !(VECTOR_MODE_P (GET_MODE (dst))
21289 95438421 : ? standard_sse_constant_p (src, GET_MODE (dst))
21290 47483673 : : x86_64_immediate_operand (src, GET_MODE (dst)))
21291 10039778 : && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
21292 640495352 : && ix86_pre_reload_split ())
21293 : return false;
21294 : return true;
21295 : }
21296 :
21297 : /* If we are copying between registers from different register sets
21298 : (e.g. FP and integer), we may need a memory location.
21299 :
21300 : The function can't work reliably when one of the CLASSES is a class
21301 : containing registers from multiple sets. We avoid this by never combining
21302 : different sets in a single alternative in the machine description.
21303 : Ensure that this constraint holds to avoid unexpected surprises.
21304 :
21305 : When STRICT is false, we are being called from REGISTER_MOVE_COST,
21306 : so do not enforce these sanity checks.
21307 :
21308 : To optimize register_move_cost performance, define inline variant. */
21309 :
21310 : static inline bool
21311 5681960011 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21312 : reg_class_t class2, int strict)
21313 : {
21314 5681960011 : if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
21315 : return false;
21316 :
21317 5649635239 : if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21318 4814465395 : || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21319 4111293083 : || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21320 3922602603 : || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21321 3744032940 : || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21322 3744032940 : || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
21323 3744032940 : || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
21324 9223504441 : || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
21325 : {
21326 2237913128 : gcc_assert (!strict || lra_in_progress);
21327 : return true;
21328 : }
21329 :
21330 3411722111 : if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21331 : return true;
21332 :
21333 : /* ??? This is a lie. We do have moves between mmx/general, and for
21334 : mmx/sse2. But by saying we need secondary memory we discourage the
21335 : register allocator from using the mmx registers unless needed. */
21336 3262553528 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21337 : return true;
21338 :
21339 : /* Between mask and general, we have moves no larger than word size. */
21340 3166237288 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21341 : {
21342 2605806 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
21343 3406186 : || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21344 192451 : return true;
21345 : }
21346 :
21347 3166044837 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21348 : {
21349 : /* SSE1 doesn't have any direct moves from other classes. */
21350 687844117 : if (!TARGET_SSE2)
21351 : return true;
21352 :
21353 685180385 : if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
21354 : return true;
21355 :
21356 : /* If the target says that inter-unit moves are more expensive
21357 : than moving through memory, then don't generate them. */
21358 1027304176 : if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
21359 1026818332 : || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
21360 1321033 : return true;
21361 :
21362 : /* With SSE4.1, *mov{ti,di}_internal supports moves between
21363 : SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
21364 683859352 : if (TARGET_SSE4_1
21365 36716709 : && (TARGET_64BIT ? mode == TImode : mode == DImode))
21366 : return false;
21367 :
21368 682268072 : int msize = GET_MODE_SIZE (mode);
21369 :
21370 : /* Between SSE and general, we have moves no larger than word size. */
21371 698632701 : if (msize > UNITS_PER_WORD)
21372 : return true;
21373 :
21374 : /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
21375 : Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
21376 590174629 : int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
21377 :
21378 590174629 : if (msize < minsize)
21379 : return true;
21380 : }
21381 :
21382 : return false;
21383 : }
21384 :
21385 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21386 :
21387 : static bool
21388 71088308 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21389 : reg_class_t class2)
21390 : {
21391 71088308 : return inline_secondary_memory_needed (mode, class1, class2, true);
21392 : }
21393 :
21394 : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21395 :
21396 : get_secondary_mem widens integral modes to BITS_PER_WORD.
21397 : There is no need to emit full 64 bit move on 64 bit targets
21398 : for integral modes that can be moved using 32 bit move. */
21399 :
21400 : static machine_mode
21401 13069 : ix86_secondary_memory_needed_mode (machine_mode mode)
21402 : {
21403 26138 : if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21404 19 : return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21405 : return mode;
21406 : }
21407 :
21408 : /* Implement the TARGET_CLASS_MAX_NREGS hook.
21409 :
21410 : On the 80386, this is the size of MODE in words,
21411 : except in the FP regs, where a single reg is always enough. */
21412 :
21413 : static unsigned char
21414 5958990220 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21415 : {
21416 5958990220 : if (MAYBE_INTEGER_CLASS_P (rclass))
21417 : {
21418 4008509247 : if (mode == XFmode)
21419 145938113 : return (TARGET_64BIT ? 2 : 3);
21420 3862571134 : else if (mode == XCmode)
21421 145937744 : return (TARGET_64BIT ? 4 : 6);
21422 : else
21423 7539180929 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21424 : }
21425 : else
21426 : {
21427 1950480973 : if (COMPLEX_MODE_P (mode))
21428 : return 2;
21429 : else
21430 1665885734 : return 1;
21431 : }
21432 : }
21433 :
21434 : /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21435 :
21436 : static bool
21437 40219858 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
21438 : reg_class_t regclass)
21439 : {
21440 40219858 : if (from == to)
21441 : return true;
21442 :
21443 : /* x87 registers can't do subreg at all, as all values are reformatted
21444 : to extended precision.
21445 :
21446 : ??? middle-end queries mode changes for ALL_REGS and this makes
21447 : vec_series_lowpart_p to always return false. We probably should
21448 : restrict this to modes supported by i387 and check if it is enabled. */
21449 38818955 : if (MAYBE_FLOAT_CLASS_P (regclass))
21450 : return false;
21451 :
21452 34209572 : if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21453 : {
21454 : /* Vector registers do not support QI or HImode loads. If we don't
21455 : disallow a change to these modes, reload will assume it's ok to
21456 : drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21457 : the vec_dupv4hi pattern.
21458 : NB: SSE2 can load 16bit data to sse register via pinsrw. */
21459 16494647 : int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21460 16494647 : if (GET_MODE_SIZE (from) < mov_size
21461 32988982 : || GET_MODE_SIZE (to) < mov_size)
21462 : return false;
21463 : }
21464 :
21465 : return true;
21466 : }
21467 :
21468 : /* Return index of MODE in the sse load/store tables. */
21469 :
21470 : static inline int
21471 773939717 : sse_store_index (machine_mode mode)
21472 : {
21473 : /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21474 : costs to processor_costs, which requires changes to all entries in
21475 : processor cost table. */
21476 773939717 : if (mode == E_HFmode)
21477 137133734 : mode = E_SFmode;
21478 :
21479 1547879434 : switch (GET_MODE_SIZE (mode))
21480 : {
21481 : case 4:
21482 : return 0;
21483 : case 8:
21484 : return 1;
21485 : case 16:
21486 : return 2;
21487 : case 32:
21488 : return 3;
21489 : case 64:
21490 : return 4;
21491 : default:
21492 : return -1;
21493 : }
21494 : }
21495 :
21496 : /* Return the cost of moving data of mode M between a
21497 : register and memory. A value of 2 is the default; this cost is
21498 : relative to those in `REGISTER_MOVE_COST'.
21499 :
21500 : This function is used extensively by register_move_cost that is used to
21501 : build tables at startup. Make it inline in this case.
21502 : When IN is 2, return maximum of in and out move cost.
21503 :
21504 : If moving between registers and memory is more expensive than
21505 : between two registers, you should define this macro to express the
21506 : relative cost.
21507 :
21508 : Model also increased moving costs of QImode registers in non
21509 : Q_REGS classes.
21510 : */
21511 : static inline int
21512 6915399486 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21513 : {
21514 6915399486 : int cost;
21515 :
21516 6915399486 : if (FLOAT_CLASS_P (regclass))
21517 : {
21518 353164850 : int index;
21519 353164850 : switch (mode)
21520 : {
21521 : case E_SFmode:
21522 : index = 0;
21523 : break;
21524 : case E_DFmode:
21525 : index = 1;
21526 : break;
21527 : case E_XFmode:
21528 : index = 2;
21529 : break;
21530 : default:
21531 : return 100;
21532 : }
21533 105567002 : if (in == 2)
21534 101627446 : return MAX (ix86_cost->hard_register.fp_load [index],
21535 : ix86_cost->hard_register.fp_store [index]);
21536 3939556 : return in ? ix86_cost->hard_register.fp_load [index]
21537 3939556 : : ix86_cost->hard_register.fp_store [index];
21538 : }
21539 6562234636 : if (SSE_CLASS_P (regclass))
21540 : {
21541 644867901 : int index = sse_store_index (mode);
21542 644867901 : if (index == -1)
21543 : return 100;
21544 560816394 : if (in == 2)
21545 397424637 : return MAX (ix86_cost->hard_register.sse_load [index],
21546 : ix86_cost->hard_register.sse_store [index]);
21547 163391757 : return in ? ix86_cost->hard_register.sse_load [index]
21548 163391757 : : ix86_cost->hard_register.sse_store [index];
21549 : }
21550 5917366735 : if (MASK_CLASS_P (regclass))
21551 : {
21552 108289175 : int index;
21553 216578350 : switch (GET_MODE_SIZE (mode))
21554 : {
21555 : case 1:
21556 : index = 0;
21557 : break;
21558 8934069 : case 2:
21559 8934069 : index = 1;
21560 8934069 : break;
21561 : /* DImode loads and stores assumed to cost the same as SImode. */
21562 40187470 : case 4:
21563 40187470 : case 8:
21564 40187470 : index = 2;
21565 40187470 : break;
21566 : default:
21567 : return 100;
21568 : }
21569 :
21570 52699287 : if (in == 2)
21571 583719 : return MAX (ix86_cost->hard_register.mask_load[index],
21572 : ix86_cost->hard_register.mask_store[index]);
21573 52115568 : return in ? ix86_cost->hard_register.mask_load[2]
21574 52115568 : : ix86_cost->hard_register.mask_store[2];
21575 : }
21576 5809077560 : if (MMX_CLASS_P (regclass))
21577 : {
21578 172433024 : int index;
21579 344866048 : switch (GET_MODE_SIZE (mode))
21580 : {
21581 : case 4:
21582 : index = 0;
21583 : break;
21584 101199804 : case 8:
21585 101199804 : index = 1;
21586 101199804 : break;
21587 : default:
21588 : return 100;
21589 : }
21590 138557936 : if (in == 2)
21591 118580298 : return MAX (ix86_cost->hard_register.mmx_load [index],
21592 : ix86_cost->hard_register.mmx_store [index]);
21593 19977638 : return in ? ix86_cost->hard_register.mmx_load [index]
21594 19977638 : : ix86_cost->hard_register.mmx_store [index];
21595 : }
21596 11273289072 : switch (GET_MODE_SIZE (mode))
21597 : {
21598 124774864 : case 1:
21599 124774864 : if (Q_CLASS_P (regclass) || TARGET_64BIT)
21600 : {
21601 122147411 : if (!in)
21602 19594300 : return ix86_cost->hard_register.int_store[0];
21603 102553111 : if (TARGET_PARTIAL_REG_DEPENDENCY
21604 102553111 : && optimize_function_for_speed_p (cfun))
21605 95628137 : cost = ix86_cost->hard_register.movzbl_load;
21606 : else
21607 6924974 : cost = ix86_cost->hard_register.int_load[0];
21608 102553111 : if (in == 2)
21609 82930390 : return MAX (cost, ix86_cost->hard_register.int_store[0]);
21610 : return cost;
21611 : }
21612 : else
21613 : {
21614 2627453 : if (in == 2)
21615 1860710 : return MAX (ix86_cost->hard_register.movzbl_load,
21616 : ix86_cost->hard_register.int_store[0] + 4);
21617 766743 : if (in)
21618 383427 : return ix86_cost->hard_register.movzbl_load;
21619 : else
21620 383316 : return ix86_cost->hard_register.int_store[0] + 4;
21621 : }
21622 644077154 : break;
21623 644077154 : case 2:
21624 644077154 : {
21625 644077154 : int cost;
21626 644077154 : if (in == 2)
21627 544168225 : cost = MAX (ix86_cost->hard_register.int_load[1],
21628 : ix86_cost->hard_register.int_store[1]);
21629 : else
21630 99908929 : cost = in ? ix86_cost->hard_register.int_load[1]
21631 : : ix86_cost->hard_register.int_store[1];
21632 :
21633 644077154 : if (mode == E_HFmode)
21634 : {
21635 : /* Prefer SSE over GPR for HFmode. */
21636 124807302 : int sse_cost;
21637 124807302 : int index = sse_store_index (mode);
21638 124807302 : if (in == 2)
21639 114818432 : sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21640 : ix86_cost->hard_register.sse_store[index]);
21641 : else
21642 19977740 : sse_cost = (in
21643 9988870 : ? ix86_cost->hard_register.sse_load [index]
21644 : : ix86_cost->hard_register.sse_store [index]);
21645 124807302 : if (sse_cost >= cost)
21646 124807302 : cost = sse_cost + 1;
21647 : }
21648 : return cost;
21649 : }
21650 4867792518 : default:
21651 4867792518 : if (in == 2)
21652 3768645032 : cost = MAX (ix86_cost->hard_register.int_load[2],
21653 : ix86_cost->hard_register.int_store[2]);
21654 1099147486 : else if (in)
21655 549763046 : cost = ix86_cost->hard_register.int_load[2];
21656 : else
21657 549384440 : cost = ix86_cost->hard_register.int_store[2];
21658 : /* Multiply with the number of GPR moves needed. */
21659 9854481525 : return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21660 : }
21661 : }
21662 :
21663 : static int
21664 1777785732 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21665 : {
21666 2666355348 : return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
21667 : }
21668 :
21669 :
21670 : /* Return the cost of moving data from a register in class CLASS1 to
21671 : one in class CLASS2.
21672 :
21673 : It is not required that the cost always equal 2 when FROM is the same as TO;
21674 : on some machines it is expensive to move between registers if they are not
21675 : general registers. */
21676 :
21677 : static int
21678 5610871703 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21679 : reg_class_t class2_i)
21680 : {
21681 5610871703 : enum reg_class class1 = (enum reg_class) class1_i;
21682 5610871703 : enum reg_class class2 = (enum reg_class) class2_i;
21683 :
21684 : /* In case we require secondary memory, compute cost of the store followed
21685 : by load. In order to avoid bad register allocation choices, we need
21686 : for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21687 :
21688 5610871703 : if (inline_secondary_memory_needed (mode, class1, class2, false))
21689 : {
21690 2568806877 : int cost = 1;
21691 :
21692 2568806877 : cost += inline_memory_move_cost (mode, class1, 2);
21693 2568806877 : cost += inline_memory_move_cost (mode, class2, 2);
21694 :
21695 : /* In case of copying from general_purpose_register we may emit multiple
21696 : stores followed by single load causing memory size mismatch stall.
21697 : Count this as arbitrarily high cost of 20. */
21698 5137613754 : if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21699 768981607 : && TARGET_MEMORY_MISMATCH_STALL
21700 4106770091 : && targetm.class_max_nregs (class1, mode)
21701 768981607 : > targetm.class_max_nregs (class2, mode))
21702 146314735 : cost += 20;
21703 :
21704 : /* In the case of FP/MMX moves, the registers actually overlap, and we
21705 : have to switch modes in order to treat them differently. */
21706 59290185 : if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21707 2618759698 : || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21708 18674728 : cost += 20;
21709 :
21710 2568806877 : return cost;
21711 : }
21712 :
21713 : /* Moves between MMX and non-MMX units require secondary memory. */
21714 3042064826 : if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21715 0 : gcc_unreachable ();
21716 :
21717 3042064826 : if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21718 582314087 : return (SSE_CLASS_P (class1)
21719 582314087 : ? ix86_cost->hard_register.sse_to_integer
21720 582314087 : : ix86_cost->hard_register.integer_to_sse);
21721 :
21722 : /* Moves between mask register and GPR. */
21723 2459750739 : if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21724 : {
21725 1054174 : return (MASK_CLASS_P (class1)
21726 1054174 : ? ix86_cost->hard_register.mask_to_integer
21727 1054174 : : ix86_cost->hard_register.integer_to_mask);
21728 : }
21729 : /* Moving between mask registers. */
21730 2458696565 : if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21731 101240 : return ix86_cost->hard_register.mask_move;
21732 :
21733 2458595325 : if (MAYBE_FLOAT_CLASS_P (class1))
21734 11801615 : return ix86_cost->hard_register.fp_move;
21735 2446793710 : if (MAYBE_SSE_CLASS_P (class1))
21736 : {
21737 229659204 : if (GET_MODE_BITSIZE (mode) <= 128)
21738 112357278 : return ix86_cost->hard_register.xmm_move;
21739 4944648 : if (GET_MODE_BITSIZE (mode) <= 256)
21740 1571466 : return ix86_cost->hard_register.ymm_move;
21741 900858 : return ix86_cost->hard_register.zmm_move;
21742 : }
21743 2331964108 : if (MAYBE_MMX_CLASS_P (class1))
21744 2171535 : return ix86_cost->hard_register.mmx_move;
21745 : return 2;
21746 : }
21747 :
21748 : /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21749 : words of a value of mode MODE but can be less for certain modes in
21750 : special long registers.
21751 :
21752 : Actually there are no two word move instructions for consecutive
21753 : registers. And only registers 0-3 may have mov byte instructions
21754 : applied to them. */
21755 :
21756 : static unsigned int
21757 8877363360 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21758 : {
21759 8877363360 : if (GENERAL_REGNO_P (regno))
21760 : {
21761 3087778560 : if (mode == XFmode)
21762 25379840 : return TARGET_64BIT ? 2 : 3;
21763 3062877120 : if (mode == XCmode)
21764 25379840 : return TARGET_64BIT ? 4 : 6;
21765 6134316160 : return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21766 : }
21767 5789584800 : if (COMPLEX_MODE_P (mode))
21768 : return 2;
21769 : /* Register pair for mask registers. */
21770 5042541600 : if (mode == P2QImode || mode == P2HImode)
21771 93380400 : return 2;
21772 :
21773 : return 1;
21774 : }
21775 :
21776 : /* Implement REGMODE_NATURAL_SIZE(MODE). */
21777 : unsigned int
21778 110801320 : ix86_regmode_natural_size (machine_mode mode)
21779 : {
21780 110801320 : if (mode == P2HImode || mode == P2QImode)
21781 2462 : return GET_MODE_SIZE (mode) / 2;
21782 110800089 : return UNITS_PER_WORD;
21783 : }
21784 :
21785 : /* Implement TARGET_HARD_REGNO_MODE_OK. */
21786 :
21787 : static bool
21788 54241960668 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21789 : {
21790 : /* Flags and only flags can only hold CCmode values. */
21791 54241960668 : if (CC_REGNO_P (regno))
21792 431911877 : return GET_MODE_CLASS (mode) == MODE_CC;
21793 53810048791 : if (GET_MODE_CLASS (mode) == MODE_CC
21794 : || GET_MODE_CLASS (mode) == MODE_RANDOM)
21795 : return false;
21796 48275973560 : if (STACK_REGNO_P (regno))
21797 4698471776 : return VALID_FP_MODE_P (mode);
21798 43577501784 : if (MASK_REGNO_P (regno))
21799 : {
21800 : /* Register pair only starts at even register number. */
21801 3669861411 : if ((mode == P2QImode || mode == P2HImode))
21802 51199960 : return MASK_PAIR_REGNO_P(regno);
21803 :
21804 1001212275 : return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21805 4599511211 : || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21806 : }
21807 :
21808 39907640373 : if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21809 : return false;
21810 :
21811 38928630876 : if (SSE_REGNO_P (regno))
21812 : {
21813 : /* We implement the move patterns for all vector modes into and
21814 : out of SSE registers, even when no operation instructions
21815 : are available. */
21816 :
21817 : /* For AVX-512 we allow, regardless of regno:
21818 : - XI mode
21819 : - any of 512-bit wide vector mode
21820 : - any scalar mode. */
21821 16834935974 : if (TARGET_AVX512F
21822 : && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21823 : || VALID_AVX512F_SCALAR_MODE (mode)))
21824 : return true;
21825 :
21826 : /* TODO check for QI/HI scalars. */
21827 : /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21828 16144574550 : if (TARGET_AVX512VL
21829 1751120226 : && (VALID_AVX256_REG_OR_OI_MODE (mode)
21830 1538867313 : || VALID_AVX512VL_128_REG_MODE (mode)))
21831 : return true;
21832 :
21833 : /* xmm16-xmm31 are only available for AVX-512. */
21834 15698059518 : if (EXT_REX_SSE_REGNO_P (regno))
21835 : return false;
21836 :
21837 : /* OImode and AVX modes are available only when AVX is enabled. */
21838 9080317741 : return ((TARGET_AVX
21839 1925511921 : && VALID_AVX256_REG_OR_OI_MODE (mode))
21840 : || VALID_SSE_REG_MODE (mode)
21841 : || VALID_SSE2_REG_MODE (mode)
21842 : || VALID_MMX_REG_MODE (mode)
21843 9080317741 : || VALID_MMX_REG_MODE_3DNOW (mode));
21844 : }
21845 22093694902 : if (MMX_REGNO_P (regno))
21846 : {
21847 : /* We implement the move patterns for 3DNOW modes even in MMX mode,
21848 : so if the register is available at all, then we can move data of
21849 : the given mode into or out of it. */
21850 3938543258 : return (VALID_MMX_REG_MODE (mode)
21851 : || VALID_MMX_REG_MODE_3DNOW (mode));
21852 : }
21853 :
21854 18155151644 : if (mode == QImode)
21855 : {
21856 : /* Take care for QImode values - they can be in non-QI regs,
21857 : but then they do cause partial register stalls. */
21858 205822171 : if (ANY_QI_REGNO_P (regno))
21859 : return true;
21860 14265215 : if (!TARGET_PARTIAL_REG_STALL)
21861 : return true;
21862 : /* LRA checks if the hard register is OK for the given mode.
21863 : QImode values can live in non-QI regs, so we allow all
21864 : registers here. */
21865 0 : if (lra_in_progress)
21866 : return true;
21867 0 : return !can_create_pseudo_p ();
21868 : }
21869 : /* We handle both integer and floats in the general purpose registers. */
21870 17949329473 : else if (VALID_INT_MODE_P (mode)
21871 13129317255 : || VALID_FP_MODE_P (mode))
21872 : return true;
21873 : /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21874 : on to use that value in smaller contexts, this can easily force a
21875 : pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21876 : supporting DImode, allow it. */
21877 12071463717 : else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21878 : return true;
21879 :
21880 : return false;
21881 : }
21882 :
21883 : /* Implement TARGET_INSN_CALLEE_ABI. */
21884 :
21885 : const predefined_function_abi &
21886 247949921 : ix86_insn_callee_abi (const rtx_insn *insn)
21887 : {
21888 247949921 : unsigned int abi_id = 0;
21889 247949921 : rtx pat = PATTERN (insn);
21890 247949921 : if (vzeroupper_pattern (pat, VOIDmode))
21891 398632 : abi_id = ABI_VZEROUPPER;
21892 :
21893 247949921 : return function_abis[abi_id];
21894 : }
21895 :
21896 : /* Initialize function_abis with corresponding abi_id,
21897 : currently only handle vzeroupper. */
21898 : void
21899 21895 : ix86_initialize_callee_abi (unsigned int abi_id)
21900 : {
21901 21895 : gcc_assert (abi_id == ABI_VZEROUPPER);
21902 21895 : predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21903 21895 : if (!vzeroupper_abi.initialized_p ())
21904 : {
21905 : HARD_REG_SET full_reg_clobbers;
21906 4274 : CLEAR_HARD_REG_SET (full_reg_clobbers);
21907 4274 : vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21908 : }
21909 21895 : }
21910 :
21911 : void
21912 21895 : ix86_expand_avx_vzeroupper (void)
21913 : {
21914 : /* Initialize vzeroupper_abi here. */
21915 21895 : ix86_initialize_callee_abi (ABI_VZEROUPPER);
21916 21895 : rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21917 : /* Return false for non-local goto in can_nonlocal_goto. */
21918 21895 : make_reg_eh_region_note (insn, 0, INT_MIN);
21919 : /* Flag used for call_insn indicates it's a fake call. */
21920 21895 : RTX_FLAG (insn, used) = 1;
21921 21895 : }
21922 :
21923 :
21924 : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21925 : saves SSE registers across calls is Win64 (thus no need to check the
21926 : current ABI here), and with AVX enabled Win64 only guarantees that
21927 : the low 16 bytes are saved. */
21928 :
21929 : static bool
21930 2045700279 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21931 : machine_mode mode)
21932 : {
21933 : /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21934 2045700279 : if (abi_id == ABI_VZEROUPPER)
21935 30893696 : return (GET_MODE_SIZE (mode) > 16
21936 30893696 : && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21937 4724924 : || LEGACY_SSE_REGNO_P (regno)));
21938 :
21939 2651583135 : return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21940 : }
21941 :
21942 : /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21943 : tieable integer mode. */
21944 :
21945 : static bool
21946 52155810 : ix86_tieable_integer_mode_p (machine_mode mode)
21947 : {
21948 52155810 : switch (mode)
21949 : {
21950 : case E_HImode:
21951 : case E_SImode:
21952 : return true;
21953 :
21954 5263592 : case E_QImode:
21955 5263592 : return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21956 :
21957 10183017 : case E_DImode:
21958 10183017 : return TARGET_64BIT;
21959 :
21960 : default:
21961 : return false;
21962 : }
21963 : }
21964 :
21965 : /* Implement TARGET_MODES_TIEABLE_P.
21966 :
21967 : Return true if MODE1 is accessible in a register that can hold MODE2
21968 : without copying. That is, all register classes that can hold MODE2
21969 : can also hold MODE1. */
21970 :
21971 : static bool
21972 33788604 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21973 : {
21974 33788604 : if (mode1 == mode2)
21975 : return true;
21976 :
21977 33702128 : if (ix86_tieable_integer_mode_p (mode1)
21978 33702128 : && ix86_tieable_integer_mode_p (mode2))
21979 : return true;
21980 :
21981 : /* MODE2 being XFmode implies fp stack or general regs, which means we
21982 : can tie any smaller floating point modes to it. Note that we do not
21983 : tie this with TFmode. */
21984 24678761 : if (mode2 == XFmode)
21985 4314 : return mode1 == SFmode || mode1 == DFmode;
21986 :
21987 : /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21988 : that we can tie it with SFmode. */
21989 24674447 : if (mode2 == DFmode)
21990 249977 : return mode1 == SFmode;
21991 :
21992 : /* If MODE2 is only appropriate for an SSE register, then tie with
21993 : any vector modes or scalar floating point modes acceptable to SSE
21994 : registers, excluding scalar integer modes with SUBREG:
21995 : (subreg:QI (reg:TI 99) 0))
21996 : (subreg:HI (reg:TI 99) 0))
21997 : (subreg:SI (reg:TI 99) 0))
21998 : (subreg:DI (reg:TI 99) 0))
21999 : to avoid unnecessary move from SSE register to integer register.
22000 : */
22001 24424470 : if (GET_MODE_SIZE (mode2) >= 16
22002 38248558 : && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
22003 13474929 : || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
22004 484704 : && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
22005 30315082 : && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22006 5457078 : return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
22007 :
22008 : /* If MODE2 is appropriate for an MMX register, then tie
22009 : with any other mode acceptable to MMX registers. */
22010 18967392 : if (GET_MODE_SIZE (mode2) == 8
22011 18967392 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22012 3289403 : return (GET_MODE_SIZE (mode1) == 8
22013 3289403 : && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22014 :
22015 : /* SCmode and DImode can be tied. */
22016 15677989 : if ((mode1 == E_SCmode && mode2 == E_DImode)
22017 15677989 : || (mode1 == E_DImode && mode2 == E_SCmode))
22018 108 : return TARGET_64BIT;
22019 :
22020 : /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
22021 15677881 : if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
22022 15677881 : || (mode1 == E_V2SFmode && mode2 == E_SCmode)
22023 15677881 : || (mode1 == E_DCmode && mode2 == E_V2DFmode)
22024 15677881 : || (mode1 == E_V2DFmode && mode2 == E_DCmode))
22025 0 : return true;
22026 :
22027 : return false;
22028 : }
22029 :
22030 : /* Return the cost of moving between two registers of mode MODE. */
22031 :
22032 : static int
22033 29251605 : ix86_set_reg_reg_cost (machine_mode mode)
22034 : {
22035 29251605 : unsigned int units = UNITS_PER_WORD;
22036 :
22037 29251605 : switch (GET_MODE_CLASS (mode))
22038 : {
22039 : default:
22040 : break;
22041 :
22042 : case MODE_CC:
22043 29251605 : units = GET_MODE_SIZE (CCmode);
22044 : break;
22045 :
22046 1165229 : case MODE_FLOAT:
22047 1165229 : if ((TARGET_SSE && mode == TFmode)
22048 683238 : || (TARGET_80387 && mode == XFmode)
22049 210599 : || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
22050 142490 : || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
22051 2300542 : units = GET_MODE_SIZE (mode);
22052 : break;
22053 :
22054 1307262 : case MODE_COMPLEX_FLOAT:
22055 1307262 : if ((TARGET_SSE && mode == TCmode)
22056 876438 : || (TARGET_80387 && mode == XCmode)
22057 445492 : || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
22058 14518 : || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
22059 2608036 : units = GET_MODE_SIZE (mode);
22060 : break;
22061 :
22062 18707751 : case MODE_VECTOR_INT:
22063 18707751 : case MODE_VECTOR_FLOAT:
22064 18707751 : if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
22065 18611788 : || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
22066 18440330 : || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22067 15811766 : || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22068 14506114 : || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
22069 14460604 : && VALID_MMX_REG_MODE (mode)))
22070 8511284 : units = GET_MODE_SIZE (mode);
22071 : }
22072 :
22073 : /* Return the cost of moving between two registers of mode MODE,
22074 : assuming that the move will be in pieces of at most UNITS bytes. */
22075 29251605 : return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
22076 : }
22077 :
22078 : /* Return cost of vector operation in MODE given that scalar version has
22079 : COST. */
22080 :
22081 : static int
22082 2831901434 : ix86_vec_cost (machine_mode mode, int cost)
22083 : {
22084 2831901434 : if (!VECTOR_MODE_P (mode))
22085 : return cost;
22086 :
22087 2831667923 : if (GET_MODE_BITSIZE (mode) == 128
22088 2831667923 : && TARGET_SSE_SPLIT_REGS)
22089 2861998 : return cost * GET_MODE_BITSIZE (mode) / 64;
22090 2830236924 : else if (GET_MODE_BITSIZE (mode) > 128
22091 2830236924 : && TARGET_AVX256_SPLIT_REGS)
22092 1674620 : return cost * GET_MODE_BITSIZE (mode) / 128;
22093 2829399614 : else if (GET_MODE_BITSIZE (mode) > 256
22094 2829399614 : && TARGET_AVX512_SPLIT_REGS)
22095 224056 : return cost * GET_MODE_BITSIZE (mode) / 256;
22096 : return cost;
22097 : }
22098 :
22099 : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
22100 : vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
22101 : static int
22102 1076 : ix86_widen_mult_cost (const struct processor_costs *cost,
22103 : enum machine_mode mode, bool uns_p)
22104 : {
22105 1076 : gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
22106 1076 : int extra_cost = 0;
22107 1076 : int basic_cost = 0;
22108 1076 : switch (mode)
22109 : {
22110 124 : case V8HImode:
22111 124 : case V16HImode:
22112 124 : if (!uns_p || mode == V16HImode)
22113 53 : extra_cost = cost->sse_op * 2;
22114 124 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22115 124 : break;
22116 203 : case V4SImode:
22117 203 : case V8SImode:
22118 : /* pmulhw/pmullw can be used. */
22119 203 : basic_cost = cost->mulss * 2 + cost->sse_op * 2;
22120 203 : break;
22121 681 : case V2DImode:
22122 : /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
22123 : require extra 4 mul, 4 add, 4 cmp and 2 shift. */
22124 681 : if (!TARGET_SSE4_1 && !uns_p)
22125 403 : extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
22126 403 : + cost->sse_op * 2;
22127 : /* Fallthru. */
22128 737 : case V4DImode:
22129 737 : basic_cost = cost->mulss * 2 + cost->sse_op * 4;
22130 737 : break;
22131 : default:
22132 : /* Not implemented. */
22133 : return 100;
22134 : }
22135 1064 : return ix86_vec_cost (mode, basic_cost + extra_cost);
22136 : }
22137 :
22138 : /* Return cost of multiplication in MODE. */
22139 :
22140 : static int
22141 1208506312 : ix86_multiplication_cost (const struct processor_costs *cost,
22142 : enum machine_mode mode)
22143 : {
22144 1208506312 : machine_mode inner_mode = mode;
22145 1208506312 : if (VECTOR_MODE_P (mode))
22146 1207482968 : inner_mode = GET_MODE_INNER (mode);
22147 :
22148 1208506312 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22149 753467 : return inner_mode == DFmode ? cost->mulsd : cost->mulss;
22150 1207752845 : else if (X87_FLOAT_MODE_P (mode))
22151 162427 : return cost->fmul;
22152 1207590418 : else if (FLOAT_MODE_P (mode))
22153 230434 : return ix86_vec_cost (mode,
22154 230434 : inner_mode == DFmode ? cost->mulsd : cost->mulss);
22155 1207359984 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22156 : {
22157 1207276780 : int nmults, nops;
22158 : /* Cost of reading the memory. */
22159 1207276780 : int extra;
22160 :
22161 1207276780 : switch (mode)
22162 : {
22163 19011938 : case V4QImode:
22164 19011938 : case V8QImode:
22165 : /* Partial V*QImode is emulated with 4-6 insns. */
22166 19011938 : nmults = 1;
22167 19011938 : nops = 3;
22168 19011938 : extra = 0;
22169 :
22170 19011938 : if (TARGET_AVX512BW && TARGET_AVX512VL)
22171 : ;
22172 18902720 : else if (TARGET_AVX2)
22173 : nops += 2;
22174 18395372 : else if (TARGET_XOP)
22175 10216 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22176 : else
22177 : {
22178 18385156 : nops += 1;
22179 18385156 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22180 : }
22181 19011938 : goto do_qimode;
22182 :
22183 9506444 : case V16QImode:
22184 : /* V*QImode is emulated with 4-11 insns. */
22185 9506444 : nmults = 1;
22186 9506444 : nops = 3;
22187 9506444 : extra = 0;
22188 :
22189 9506444 : if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
22190 : {
22191 306247 : if (!(TARGET_AVX512BW && TARGET_AVX512VL))
22192 251924 : nops += 3;
22193 : }
22194 9200197 : else if (TARGET_XOP)
22195 : {
22196 5552 : nmults += 1;
22197 5552 : nops += 2;
22198 5552 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22199 : }
22200 : else
22201 : {
22202 9194645 : nmults += 1;
22203 9194645 : nops += 4;
22204 9194645 : extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
22205 : }
22206 9506444 : goto do_qimode;
22207 :
22208 9504824 : case V32QImode:
22209 9504824 : nmults = 1;
22210 9504824 : nops = 3;
22211 9504824 : extra = 0;
22212 :
22213 9504824 : if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
22214 : {
22215 9422224 : nmults += 1;
22216 9422224 : nops += 4;
22217 : /* 2 loads, so no division by 2. */
22218 9422224 : extra += COSTS_N_INSNS (cost->sse_load[3]);
22219 : }
22220 9504824 : goto do_qimode;
22221 :
22222 9504345 : case V64QImode:
22223 9504345 : nmults = 2;
22224 9504345 : nops = 9;
22225 : /* 2 loads of each size, so no division by 2. */
22226 9504345 : extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
22227 :
22228 47527551 : do_qimode:
22229 47527551 : return ix86_vec_cost (mode, cost->mulss * nmults
22230 47527551 : + cost->sse_op * nops) + extra;
22231 :
22232 40647006 : case V4SImode:
22233 : /* pmulld is used in this case. No emulation is needed. */
22234 40647006 : if (TARGET_SSE4_1)
22235 2245249 : goto do_native;
22236 : /* V4SImode is emulated with 7 insns. */
22237 : else
22238 38401757 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
22239 :
22240 164286955 : case V2DImode:
22241 164286955 : case V4DImode:
22242 : /* vpmullq is used in this case. No emulation is needed. */
22243 164286955 : if (TARGET_AVX512DQ && TARGET_AVX512VL)
22244 590707 : goto do_native;
22245 : /* V*DImode is emulated with 6-8 insns. */
22246 163696248 : else if (TARGET_XOP && mode == V2DImode)
22247 55860 : return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
22248 : /* FALLTHRU */
22249 245724302 : case V8DImode:
22250 : /* vpmullq is used in this case. No emulation is needed. */
22251 245724302 : if (TARGET_AVX512DQ && mode == V8DImode)
22252 388450 : goto do_native;
22253 : else
22254 245335852 : return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
22255 :
22256 875955760 : default:
22257 875955760 : do_native:
22258 875955760 : return ix86_vec_cost (mode, cost->mulss);
22259 : }
22260 : }
22261 : else
22262 166400 : return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
22263 : }
22264 :
22265 : /* Return cost of multiplication in MODE. */
22266 :
22267 : static int
22268 72629201 : ix86_division_cost (const struct processor_costs *cost,
22269 : enum machine_mode mode)
22270 : {
22271 72629201 : machine_mode inner_mode = mode;
22272 72629201 : if (VECTOR_MODE_P (mode))
22273 53583417 : inner_mode = GET_MODE_INNER (mode);
22274 :
22275 72629201 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22276 248436 : return inner_mode == DFmode ? cost->divsd : cost->divss;
22277 72380765 : else if (X87_FLOAT_MODE_P (mode))
22278 44842 : return cost->fdiv;
22279 72335923 : else if (FLOAT_MODE_P (mode))
22280 17522 : return ix86_vec_cost (mode,
22281 17522 : inner_mode == DFmode ? cost->divsd : cost->divss);
22282 : else
22283 80701128 : return cost->divide[MODE_INDEX (mode)];
22284 : }
22285 :
22286 : /* Return cost of shift in MODE.
22287 : If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
22288 : AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
22289 : if op1 is a result of subreg.
22290 :
22291 : SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
22292 :
22293 : static int
22294 775750941 : ix86_shift_rotate_cost (const struct processor_costs *cost,
22295 : enum rtx_code code,
22296 : enum machine_mode mode, bool constant_op1,
22297 : HOST_WIDE_INT op1_val,
22298 : bool and_in_op1,
22299 : bool shift_and_truncate,
22300 : bool *skip_op0, bool *skip_op1)
22301 : {
22302 775750941 : if (skip_op0)
22303 775678479 : *skip_op0 = *skip_op1 = false;
22304 :
22305 775750941 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22306 : {
22307 398515494 : int count;
22308 : /* Cost of reading the memory. */
22309 398515494 : int extra;
22310 :
22311 398515494 : switch (mode)
22312 : {
22313 6049261 : case V4QImode:
22314 6049261 : case V8QImode:
22315 6049261 : if (TARGET_AVX2)
22316 : /* Use vpbroadcast. */
22317 196243 : extra = cost->sse_op;
22318 : else
22319 5853018 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22320 :
22321 6049261 : if (constant_op1)
22322 : {
22323 6049231 : if (code == ASHIFTRT)
22324 : {
22325 190 : count = 4;
22326 190 : extra *= 2;
22327 : }
22328 : else
22329 : count = 2;
22330 : }
22331 30 : else if (TARGET_AVX512BW && TARGET_AVX512VL)
22332 30 : return ix86_vec_cost (mode, cost->sse_op * 4);
22333 0 : else if (TARGET_SSE4_1)
22334 : count = 5;
22335 0 : else if (code == ASHIFTRT)
22336 : count = 6;
22337 : else
22338 0 : count = 5;
22339 6049231 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22340 :
22341 3027676 : case V16QImode:
22342 3027676 : if (TARGET_XOP)
22343 : {
22344 : /* For XOP we use vpshab, which requires a broadcast of the
22345 : value to the variable shift insn. For constants this
22346 : means a V16Q const in mem; even when we can perform the
22347 : shift with one insn set the cost to prefer paddb. */
22348 3601 : if (constant_op1)
22349 : {
22350 2642 : extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22351 2642 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22352 : }
22353 : else
22354 : {
22355 959 : count = (code == ASHIFT) ? 3 : 4;
22356 959 : return ix86_vec_cost (mode, cost->sse_op * count);
22357 : }
22358 : }
22359 : /* FALLTHRU */
22360 6048400 : case V32QImode:
22361 6048400 : if (TARGET_GFNI && constant_op1)
22362 : {
22363 : /* Use vgf2p8affine. One extra load for the mask, but in a loop
22364 : with enough registers it will be moved out. So for now don't
22365 : account the constant mask load. This is not quite right
22366 : for non loop vectorization. */
22367 11878 : extra = 0;
22368 11878 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22369 : }
22370 6036522 : if (TARGET_AVX2)
22371 : /* Use vpbroadcast. */
22372 189190 : extra = cost->sse_op;
22373 : else
22374 5847332 : extra = COSTS_N_INSNS (mode == V16QImode
22375 : ? cost->sse_load[2]
22376 5847332 : : cost->sse_load[3]) / 2;
22377 :
22378 6036522 : if (constant_op1)
22379 : {
22380 6036334 : if (code == ASHIFTRT)
22381 : {
22382 198 : count = 4;
22383 198 : extra *= 2;
22384 : }
22385 : else
22386 : count = 2;
22387 : }
22388 188 : else if (TARGET_AVX512BW
22389 76 : && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22390 38 : || (mode == V16QImode && TARGET_AVX512VL
22391 38 : && !TARGET_PREFER_AVX128)))
22392 76 : return ix86_vec_cost (mode, cost->sse_op * 4);
22393 112 : else if (TARGET_AVX2
22394 0 : && mode == V16QImode && !TARGET_PREFER_AVX128)
22395 : count = 6;
22396 112 : else if (TARGET_SSE4_1)
22397 : count = 9;
22398 112 : else if (code == ASHIFTRT)
22399 : count = 10;
22400 : else
22401 76 : count = 9;
22402 6036446 : return ix86_vec_cost (mode, cost->sse_op * count) + extra;
22403 :
22404 3024574 : case V64QImode:
22405 : /* Ignore the mask load for GF2P8AFFINEQB. */
22406 3024574 : extra = 0;
22407 3024574 : return ix86_vec_cost (mode, cost->sse_op) + extra;
22408 :
22409 54523373 : case V2DImode:
22410 54523373 : case V4DImode:
22411 : /* V*DImode arithmetic right shift is emulated. */
22412 54523373 : if (code == ASHIFTRT && !TARGET_AVX512VL)
22413 : {
22414 1389 : if (constant_op1)
22415 : {
22416 650 : if (op1_val == 63)
22417 440 : count = TARGET_SSE4_2 ? 1 : 2;
22418 509 : else if (TARGET_XOP)
22419 : count = 2;
22420 210 : else if (TARGET_SSE4_1)
22421 : count = 3;
22422 : else
22423 230 : count = 4;
22424 : }
22425 739 : else if (TARGET_XOP)
22426 : count = 3;
22427 74 : else if (TARGET_SSE4_2)
22428 : count = 4;
22429 : else
22430 1389 : count = 5;
22431 :
22432 1389 : return ix86_vec_cost (mode, cost->sse_op * count);
22433 : }
22434 : /* FALLTHRU */
22435 383388269 : default:
22436 383388269 : return ix86_vec_cost (mode, cost->sse_op);
22437 : }
22438 : }
22439 :
22440 763166031 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22441 : {
22442 193071946 : if (constant_op1)
22443 : {
22444 193037309 : if (op1_val > 32)
22445 137148697 : return cost->shift_const + COSTS_N_INSNS (2);
22446 : else
22447 55888612 : return cost->shift_const * 2;
22448 : }
22449 : else
22450 : {
22451 34637 : if (and_in_op1)
22452 63 : return cost->shift_var * 2;
22453 : else
22454 34574 : return cost->shift_var * 6 + COSTS_N_INSNS (2);
22455 : }
22456 : }
22457 : else
22458 : {
22459 184163501 : if (constant_op1)
22460 183427052 : return cost->shift_const;
22461 736449 : else if (shift_and_truncate)
22462 : {
22463 22905 : if (skip_op0)
22464 22905 : *skip_op0 = *skip_op1 = true;
22465 : /* Return the cost after shift-and truncation. */
22466 22905 : return cost->shift_var;
22467 : }
22468 : else
22469 713544 : return cost->shift_var;
22470 : }
22471 : }
22472 :
22473 : static int
22474 148822715 : ix86_insn_cost (rtx_insn *insn, bool speed)
22475 : {
22476 148822715 : int insn_cost = 0;
22477 : /* Add extra cost to avoid post_reload late_combine revert
22478 : the optimization did in pass_rpad. */
22479 148822715 : if (reload_completed
22480 4580078 : && ix86_rpad_gate ()
22481 247264 : && recog_memoized (insn) >= 0
22482 149069717 : && get_attr_avx_partial_xmm_update (insn)
22483 : == AVX_PARTIAL_XMM_UPDATE_TRUE)
22484 : insn_cost += COSTS_N_INSNS (3);
22485 :
22486 148822715 : return insn_cost + pattern_cost (PATTERN (insn), speed);
22487 : }
22488 :
22489 : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22490 :
22491 : static int
22492 757346 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22493 : {
22494 757346 : if (size < 128)
22495 752210 : return cost->cvtss2sd;
22496 5136 : else if (size < 256)
22497 : {
22498 2351 : if (TARGET_SSE_SPLIT_REGS)
22499 0 : return cost->cvtss2sd * size / 64;
22500 2351 : return cost->cvtss2sd;
22501 : }
22502 2785 : if (size < 512)
22503 1483 : return cost->vcvtps2pd256;
22504 : else
22505 1302 : return cost->vcvtps2pd512;
22506 : }
22507 :
22508 : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22509 :
22510 : static bool
22511 271041 : unspec_pcmp_p (rtx x)
22512 : {
22513 271041 : return GET_CODE (x) == UNSPEC
22514 271041 : && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22515 : }
22516 :
22517 : /* Compute a (partial) cost for rtx X. Return true if the complete
22518 : cost has been computed, and false if subexpressions should be
22519 : scanned. In either case, *TOTAL contains the cost result. */
22520 :
22521 : static bool
22522 7707471020 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22523 : int *total, bool speed)
22524 : {
22525 7707471020 : rtx mask;
22526 7707471020 : enum rtx_code code = GET_CODE (x);
22527 7707471020 : enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22528 4126787060 : const struct processor_costs *cost
22529 7707471020 : = speed ? ix86_tune_cost : &ix86_size_cost;
22530 7707471020 : int src_cost;
22531 :
22532 : /* Handling different vternlog variants. */
22533 7707471020 : if ((GET_MODE_SIZE (mode) == 64
22534 7707471020 : ? TARGET_AVX512F
22535 6522607366 : : (TARGET_AVX512VL
22536 6461000004 : || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22537 177801295 : && GET_MODE_SIZE (mode) >= 16
22538 120814674 : && outer_code_i == SET
22539 7754140250 : && ternlog_operand (x, mode))
22540 : {
22541 33628 : rtx args[3];
22542 :
22543 33628 : args[0] = NULL_RTX;
22544 33628 : args[1] = NULL_RTX;
22545 33628 : args[2] = NULL_RTX;
22546 33628 : int idx = ix86_ternlog_idx (x, args);
22547 33628 : gcc_assert (idx >= 0);
22548 :
22549 33628 : *total = cost->sse_op;
22550 134512 : for (int i = 0; i != 3; i++)
22551 100884 : if (args[i])
22552 71146 : *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22553 33628 : return true;
22554 : }
22555 :
22556 :
22557 7707437392 : switch (code)
22558 : {
22559 47720047 : case SET:
22560 47720047 : if (register_operand (SET_DEST (x), VOIDmode)
22561 47720047 : && register_operand (SET_SRC (x), VOIDmode))
22562 : {
22563 29251605 : *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22564 29251605 : return true;
22565 : }
22566 :
22567 18468442 : if (register_operand (SET_SRC (x), VOIDmode))
22568 : /* Avoid potentially incorrect high cost from rtx_costs
22569 : for non-tieable SUBREGs. */
22570 : src_cost = 0;
22571 : else
22572 : {
22573 15648829 : src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22574 :
22575 15648829 : if (CONSTANT_P (SET_SRC (x)))
22576 : /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22577 : a small value, possibly zero for cheap constants. */
22578 6992484 : src_cost += COSTS_N_INSNS (1);
22579 : }
22580 :
22581 18468442 : *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22582 18468442 : return true;
22583 :
22584 2838511814 : case CONST_INT:
22585 2838511814 : case CONST:
22586 2838511814 : case LABEL_REF:
22587 2838511814 : case SYMBOL_REF:
22588 2838511814 : if (x86_64_immediate_operand (x, VOIDmode))
22589 2232646330 : *total = 0;
22590 605865484 : else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22591 : /* Consider the zext constants slightly more expensive, as they
22592 : can't appear in most instructions. */
22593 28043839 : *total = 1;
22594 : else
22595 : /* movabsq is slightly more expensive than a simple instruction. */
22596 577821645 : *total = COSTS_N_INSNS (1) + 1;
22597 : return true;
22598 :
22599 7534015 : case CONST_DOUBLE:
22600 7534015 : if (IS_STACK_MODE (mode))
22601 1300526 : switch (standard_80387_constant_p (x))
22602 : {
22603 : case -1:
22604 : case 0:
22605 : break;
22606 279744 : case 1: /* 0.0 */
22607 279744 : *total = 1;
22608 279744 : return true;
22609 485458 : default: /* Other constants */
22610 485458 : *total = 2;
22611 485458 : return true;
22612 : }
22613 : /* FALLTHRU */
22614 :
22615 14498707 : case CONST_VECTOR:
22616 14498707 : switch (standard_sse_constant_p (x, mode))
22617 : {
22618 : case 0:
22619 : break;
22620 4199712 : case 1: /* 0: xor eliminates false dependency */
22621 4199712 : *total = 0;
22622 4199712 : return true;
22623 192049 : default: /* -1: cmp contains false dependency */
22624 192049 : *total = 1;
22625 192049 : return true;
22626 : }
22627 : /* FALLTHRU */
22628 :
22629 11101960 : case CONST_WIDE_INT:
22630 : /* Fall back to (MEM (SYMBOL_REF)), since that's where
22631 : it'll probably end up. Add a penalty for size. */
22632 22203920 : *total = (COSTS_N_INSNS (1)
22633 21977663 : + (!TARGET_64BIT && flag_pic)
22634 22203920 : + (GET_MODE_SIZE (mode) <= 4
22635 19427353 : ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22636 11101960 : return true;
22637 :
22638 22552387 : case ZERO_EXTEND:
22639 : /* The zero extensions is often completely free on x86_64, so make
22640 : it as cheap as possible. */
22641 22552387 : if (TARGET_64BIT && mode == DImode
22642 4905648 : && GET_MODE (XEXP (x, 0)) == SImode)
22643 3024061 : *total = 1;
22644 19528326 : else if (TARGET_ZERO_EXTEND_WITH_AND)
22645 0 : *total = cost->add;
22646 : else
22647 19528326 : *total = cost->movzx;
22648 : return false;
22649 :
22650 2737588 : case SIGN_EXTEND:
22651 2737588 : *total = cost->movsx;
22652 2737588 : return false;
22653 :
22654 638249435 : case ASHIFT:
22655 638249435 : if (SCALAR_INT_MODE_P (mode)
22656 246669624 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22657 681384407 : && CONST_INT_P (XEXP (x, 1)))
22658 : {
22659 42956183 : HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22660 42956183 : if (value == 1)
22661 : {
22662 2478499 : *total = cost->add;
22663 2478499 : return false;
22664 : }
22665 40477684 : if ((value == 2 || value == 3)
22666 4548597 : && cost->lea <= cost->shift_const)
22667 : {
22668 2148308 : *total = cost->lea;
22669 2148308 : return false;
22670 : }
22671 : }
22672 : /* FALLTHRU */
22673 :
22674 775678479 : case ROTATE:
22675 775678479 : case ASHIFTRT:
22676 775678479 : case LSHIFTRT:
22677 775678479 : case ROTATERT:
22678 775678479 : bool skip_op0, skip_op1;
22679 775678479 : *total = ix86_shift_rotate_cost (cost, code, mode,
22680 775678479 : CONSTANT_P (XEXP (x, 1)),
22681 : CONST_INT_P (XEXP (x, 1))
22682 : ? INTVAL (XEXP (x, 1)) : -1,
22683 : GET_CODE (XEXP (x, 1)) == AND,
22684 775678479 : SUBREG_P (XEXP (x, 1))
22685 775678479 : && GET_CODE (XEXP (XEXP (x, 1),
22686 : 0)) == AND,
22687 : &skip_op0, &skip_op1);
22688 775678479 : if (skip_op0 || skip_op1)
22689 : {
22690 22905 : if (!skip_op0)
22691 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22692 22905 : if (!skip_op1)
22693 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22694 22905 : return true;
22695 : }
22696 : return false;
22697 :
22698 228862 : case FMA:
22699 228862 : {
22700 228862 : rtx sub;
22701 :
22702 228862 : gcc_assert (FLOAT_MODE_P (mode));
22703 228862 : gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22704 :
22705 457724 : *total = ix86_vec_cost (mode,
22706 228862 : GET_MODE_INNER (mode) == SFmode
22707 : ? cost->fmass : cost->fmasd);
22708 228862 : *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22709 :
22710 : /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22711 228862 : sub = XEXP (x, 0);
22712 228862 : if (GET_CODE (sub) == NEG)
22713 50851 : sub = XEXP (sub, 0);
22714 228862 : *total += rtx_cost (sub, mode, FMA, 0, speed);
22715 :
22716 228862 : sub = XEXP (x, 2);
22717 228862 : if (GET_CODE (sub) == NEG)
22718 40517 : sub = XEXP (sub, 0);
22719 228862 : *total += rtx_cost (sub, mode, FMA, 2, speed);
22720 228862 : return true;
22721 : }
22722 :
22723 1760206957 : case MULT:
22724 1760206957 : if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22725 : {
22726 551953968 : rtx op0 = XEXP (x, 0);
22727 551953968 : rtx op1 = XEXP (x, 1);
22728 551953968 : int nbits;
22729 551953968 : if (CONST_INT_P (XEXP (x, 1)))
22730 : {
22731 533826256 : unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22732 1083612276 : for (nbits = 0; value != 0; value &= value - 1)
22733 549786020 : nbits++;
22734 : }
22735 : else
22736 : /* This is arbitrary. */
22737 : nbits = 7;
22738 :
22739 : /* Compute costs correctly for widening multiplication. */
22740 551953968 : if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22741 557491983 : && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22742 5538015 : == GET_MODE_SIZE (mode))
22743 : {
22744 5524216 : int is_mulwiden = 0;
22745 5524216 : machine_mode inner_mode = GET_MODE (op0);
22746 :
22747 5524216 : if (GET_CODE (op0) == GET_CODE (op1))
22748 5423575 : is_mulwiden = 1, op1 = XEXP (op1, 0);
22749 100641 : else if (CONST_INT_P (op1))
22750 : {
22751 90643 : if (GET_CODE (op0) == SIGN_EXTEND)
22752 40493 : is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22753 40493 : == INTVAL (op1);
22754 : else
22755 50150 : is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22756 : }
22757 :
22758 5514218 : if (is_mulwiden)
22759 5514218 : op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22760 : }
22761 :
22762 551953968 : int mult_init;
22763 : // Double word multiplication requires 3 mults and 2 adds.
22764 1119562846 : if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22765 : {
22766 332248210 : mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22767 332248210 : + 2 * cost->add;
22768 332248210 : nbits *= 3;
22769 : }
22770 378932076 : else mult_init = cost->mult_init[MODE_INDEX (mode)];
22771 :
22772 1103907936 : *total = (mult_init
22773 551953968 : + nbits * cost->mult_bit
22774 551953968 : + rtx_cost (op0, mode, outer_code, opno, speed)
22775 551953968 : + rtx_cost (op1, mode, outer_code, opno, speed));
22776 :
22777 551953968 : return true;
22778 : }
22779 1208252989 : *total = ix86_multiplication_cost (cost, mode);
22780 1208252989 : return false;
22781 :
22782 72615174 : case DIV:
22783 72615174 : case UDIV:
22784 72615174 : case MOD:
22785 72615174 : case UMOD:
22786 72615174 : *total = ix86_division_cost (cost, mode);
22787 72615174 : return false;
22788 :
22789 690037337 : case PLUS:
22790 690037337 : if (GET_MODE_CLASS (mode) == MODE_INT
22791 946411052 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22792 : {
22793 142718762 : if (GET_CODE (XEXP (x, 0)) == PLUS
22794 3824437 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22795 844672 : && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22796 844647 : && CONSTANT_P (XEXP (x, 1)))
22797 : {
22798 844590 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22799 844590 : if (val == 2 || val == 4 || val == 8)
22800 : {
22801 844486 : *total = cost->lea;
22802 844486 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22803 : outer_code, opno, speed);
22804 844486 : *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22805 : outer_code, opno, speed);
22806 844486 : *total += rtx_cost (XEXP (x, 1), mode,
22807 : outer_code, opno, speed);
22808 844486 : return true;
22809 : }
22810 : }
22811 141874172 : else if (GET_CODE (XEXP (x, 0)) == MULT
22812 52441287 : && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22813 : {
22814 52381386 : HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22815 52381386 : if (val == 2 || val == 4 || val == 8)
22816 : {
22817 8032178 : *total = cost->lea;
22818 8032178 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22819 : outer_code, opno, speed);
22820 8032178 : *total += rtx_cost (XEXP (x, 1), mode,
22821 : outer_code, opno, speed);
22822 8032178 : return true;
22823 : }
22824 : }
22825 89492786 : else if (GET_CODE (XEXP (x, 0)) == PLUS)
22826 : {
22827 2979847 : rtx op = XEXP (XEXP (x, 0), 0);
22828 :
22829 : /* Add with carry, ignore the cost of adding a carry flag. */
22830 2979847 : if (ix86_carry_flag_operator (op, mode)
22831 2979847 : || ix86_carry_flag_unset_operator (op, mode))
22832 70586 : *total = cost->add;
22833 : else
22834 : {
22835 2909261 : *total = cost->lea;
22836 2909261 : *total += rtx_cost (op, mode,
22837 : outer_code, opno, speed);
22838 : }
22839 :
22840 2979847 : *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22841 : outer_code, opno, speed);
22842 2979847 : *total += rtx_cost (XEXP (x, 1), mode,
22843 : outer_code, opno, speed);
22844 2979847 : return true;
22845 : }
22846 : }
22847 : /* FALLTHRU */
22848 :
22849 1837853101 : case MINUS:
22850 : /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22851 1837853101 : if (GET_MODE_CLASS (mode) == MODE_INT
22852 519787902 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22853 234985286 : && GET_CODE (XEXP (x, 0)) == MINUS
22854 1837893181 : && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22855 15161 : || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22856 : {
22857 24919 : *total = cost->add;
22858 24919 : *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22859 : outer_code, opno, speed);
22860 24919 : *total += rtx_cost (XEXP (x, 1), mode,
22861 : outer_code, opno, speed);
22862 24919 : return true;
22863 : }
22864 :
22865 1837828182 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22866 2410034 : *total = cost->addss;
22867 1835418148 : else if (X87_FLOAT_MODE_P (mode))
22868 220181 : *total = cost->fadd;
22869 1835197967 : else if (FLOAT_MODE_P (mode))
22870 441234 : *total = ix86_vec_cost (mode, cost->addss);
22871 1834756733 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22872 1208067601 : *total = ix86_vec_cost (mode, cost->sse_op);
22873 1292749157 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22874 331249325 : *total = cost->add * 2;
22875 : else
22876 295439807 : *total = cost->add;
22877 : return false;
22878 :
22879 3936105 : case IOR:
22880 3936105 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22881 3690273 : || SSE_FLOAT_MODE_P (mode))
22882 : {
22883 : /* (ior (not ...) ...) can be a single insn in AVX512. */
22884 480 : if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22885 255426 : && (GET_MODE_SIZE (mode) == 64
22886 0 : || (TARGET_AVX512VL
22887 0 : && (GET_MODE_SIZE (mode) == 32
22888 0 : || GET_MODE_SIZE (mode) == 16))))
22889 : {
22890 0 : rtx right = GET_CODE (XEXP (x, 1)) != NOT
22891 0 : ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22892 :
22893 0 : *total = ix86_vec_cost (mode, cost->sse_op)
22894 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22895 : outer_code, opno, speed)
22896 0 : + rtx_cost (right, mode, outer_code, opno, speed);
22897 0 : return true;
22898 : }
22899 255426 : *total = ix86_vec_cost (mode, cost->sse_op);
22900 255426 : }
22901 3680679 : else if (TARGET_64BIT
22902 3388661 : && mode == TImode
22903 1687567 : && GET_CODE (XEXP (x, 0)) == ASHIFT
22904 252336 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22905 250340 : && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22906 250340 : && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22907 250340 : && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22908 250340 : && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22909 228364 : && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22910 : {
22911 : /* *concatditi3 is cheap. */
22912 228364 : rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22913 228364 : rtx op1 = XEXP (XEXP (x, 1), 0);
22914 1386 : *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22915 228364 : ? COSTS_N_INSNS (1) /* movq. */
22916 226978 : : set_src_cost (op0, DImode, speed);
22917 2348 : *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22918 228364 : ? COSTS_N_INSNS (1) /* movq. */
22919 226029 : : set_src_cost (op1, DImode, speed);
22920 228364 : return true;
22921 : }
22922 3452315 : else if (TARGET_64BIT
22923 3160297 : && mode == TImode
22924 1459203 : && GET_CODE (XEXP (x, 0)) == AND
22925 1399244 : && REG_P (XEXP (XEXP (x, 0), 0))
22926 1394040 : && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22927 1391362 : && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22928 1391362 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22929 909072 : && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22930 909072 : && GET_CODE (XEXP (x, 1)) == ASHIFT
22931 906922 : && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22932 906922 : && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22933 906922 : && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22934 4359237 : && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22935 : {
22936 : /* *insvti_highpart is cheap. */
22937 906922 : rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22938 906922 : *total = COSTS_N_INSNS (1) + 1;
22939 1393 : *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22940 906922 : ? COSTS_N_INSNS (1) /* movq. */
22941 906019 : : set_src_cost (op, DImode, speed);
22942 906922 : return true;
22943 : }
22944 5382804 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22945 745611 : *total = cost->add * 2;
22946 : else
22947 1799782 : *total = cost->add;
22948 : return false;
22949 :
22950 570157 : case XOR:
22951 570157 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22952 436954 : || SSE_FLOAT_MODE_P (mode))
22953 133203 : *total = ix86_vec_cost (mode, cost->sse_op);
22954 933524 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22955 16507 : *total = cost->add * 2;
22956 : else
22957 420447 : *total = cost->add;
22958 : return false;
22959 :
22960 7052716 : case AND:
22961 7052716 : if (address_no_seg_operand (x, mode))
22962 : {
22963 15686 : *total = cost->lea;
22964 15686 : return true;
22965 : }
22966 7037030 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22967 6634042 : || SSE_FLOAT_MODE_P (mode))
22968 : {
22969 : /* pandn is a single instruction. */
22970 436495 : if (GET_CODE (XEXP (x, 0)) == NOT)
22971 : {
22972 55705 : rtx right = XEXP (x, 1);
22973 :
22974 : /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22975 362 : if (GET_CODE (right) == NOT && TARGET_AVX512F
22976 55705 : && (GET_MODE_SIZE (mode) == 64
22977 0 : || (TARGET_AVX512VL
22978 0 : && (GET_MODE_SIZE (mode) == 32
22979 0 : || GET_MODE_SIZE (mode) == 16))))
22980 0 : right = XEXP (right, 0);
22981 :
22982 55705 : *total = ix86_vec_cost (mode, cost->sse_op)
22983 55705 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22984 : outer_code, opno, speed)
22985 55705 : + rtx_cost (right, mode, outer_code, opno, speed);
22986 55705 : return true;
22987 : }
22988 380790 : else if (GET_CODE (XEXP (x, 1)) == NOT)
22989 : {
22990 740 : *total = ix86_vec_cost (mode, cost->sse_op)
22991 740 : + rtx_cost (XEXP (x, 0), mode,
22992 : outer_code, opno, speed)
22993 740 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22994 : outer_code, opno, speed);
22995 740 : return true;
22996 : }
22997 380050 : *total = ix86_vec_cost (mode, cost->sse_op);
22998 380050 : }
22999 13902262 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23000 : {
23001 1131709 : if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
23002 : {
23003 1670 : *total = cost->add * 2
23004 835 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23005 : outer_code, opno, speed)
23006 835 : + rtx_cost (XEXP (x, 1), mode,
23007 : outer_code, opno, speed);
23008 835 : return true;
23009 : }
23010 1130874 : else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
23011 : {
23012 0 : *total = cost->add * 2
23013 0 : + rtx_cost (XEXP (x, 0), mode,
23014 : outer_code, opno, speed)
23015 0 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
23016 : outer_code, opno, speed);
23017 0 : return true;
23018 : }
23019 1130874 : *total = cost->add * 2;
23020 : }
23021 5468826 : else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
23022 : {
23023 7578 : *total = cost->add
23024 3789 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23025 : outer_code, opno, speed)
23026 3789 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23027 3789 : return true;
23028 : }
23029 5465037 : else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
23030 : {
23031 112 : *total = cost->add
23032 56 : + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23033 56 : + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
23034 : outer_code, opno, speed);
23035 56 : return true;
23036 : }
23037 : else
23038 5464981 : *total = cost->add;
23039 : return false;
23040 :
23041 516574 : case NOT:
23042 516574 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23043 : {
23044 : /* (not (xor ...)) can be a single insn in AVX512. */
23045 0 : if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
23046 11016 : && (GET_MODE_SIZE (mode) == 64
23047 0 : || (TARGET_AVX512VL
23048 0 : && (GET_MODE_SIZE (mode) == 32
23049 0 : || GET_MODE_SIZE (mode) == 16))))
23050 : {
23051 0 : *total = ix86_vec_cost (mode, cost->sse_op)
23052 0 : + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23053 : outer_code, opno, speed)
23054 0 : + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
23055 : outer_code, opno, speed);
23056 0 : return true;
23057 : }
23058 :
23059 : // vnot is pxor -1.
23060 11016 : *total = ix86_vec_cost (mode, cost->sse_op) + 1;
23061 : }
23062 1156999 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23063 45905 : *total = cost->add * 2;
23064 : else
23065 459653 : *total = cost->add;
23066 : return false;
23067 :
23068 18212345 : case NEG:
23069 18212345 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23070 51184 : *total = cost->sse_op;
23071 18161161 : else if (X87_FLOAT_MODE_P (mode))
23072 15090 : *total = cost->fchs;
23073 18146071 : else if (FLOAT_MODE_P (mode))
23074 27070 : *total = ix86_vec_cost (mode, cost->sse_op);
23075 18119001 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23076 13404576 : *total = ix86_vec_cost (mode, cost->sse_op);
23077 9579721 : else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23078 1765058 : *total = cost->add * 3;
23079 : else
23080 2949367 : *total = cost->add;
23081 : return false;
23082 :
23083 53976519 : case COMPARE:
23084 53976519 : rtx op0, op1;
23085 53976519 : op0 = XEXP (x, 0);
23086 53976519 : op1 = XEXP (x, 1);
23087 53976519 : if (GET_CODE (op0) == ZERO_EXTRACT
23088 167335 : && XEXP (op0, 1) == const1_rtx
23089 150103 : && CONST_INT_P (XEXP (op0, 2))
23090 150067 : && op1 == const0_rtx)
23091 : {
23092 : /* This kind of construct is implemented using test[bwl].
23093 : Treat it as if we had an AND. */
23094 150067 : mode = GET_MODE (XEXP (op0, 0));
23095 300134 : *total = (cost->add
23096 150067 : + rtx_cost (XEXP (op0, 0), mode, outer_code,
23097 : opno, speed)
23098 150067 : + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
23099 150067 : return true;
23100 : }
23101 :
23102 53826452 : if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
23103 : {
23104 : /* This is an overflow detection, count it as a normal compare. */
23105 143162 : *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
23106 143162 : return true;
23107 : }
23108 :
23109 53683290 : rtx geu;
23110 : /* Match x
23111 : (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
23112 : (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
23113 53683290 : if (mode == CCCmode
23114 293077 : && GET_CODE (op0) == NEG
23115 7879 : && GET_CODE (geu = XEXP (op0, 0)) == GEU
23116 7876 : && REG_P (XEXP (geu, 0))
23117 7876 : && (GET_MODE (XEXP (geu, 0)) == CCCmode
23118 759 : || GET_MODE (XEXP (geu, 0)) == CCmode)
23119 7876 : && REGNO (XEXP (geu, 0)) == FLAGS_REG
23120 7876 : && XEXP (geu, 1) == const0_rtx
23121 7876 : && GET_CODE (op1) == LTU
23122 7876 : && REG_P (XEXP (op1, 0))
23123 7876 : && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
23124 7876 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23125 53691166 : && XEXP (op1, 1) == const0_rtx)
23126 : {
23127 : /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
23128 7876 : *total = 0;
23129 7876 : return true;
23130 : }
23131 : /* Match x
23132 : (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
23133 : (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
23134 53675414 : if (mode == CCCmode
23135 285201 : && GET_CODE (op0) == NEG
23136 3 : && GET_CODE (XEXP (op0, 0)) == LTU
23137 3 : && REG_P (XEXP (XEXP (op0, 0), 0))
23138 3 : && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
23139 3 : && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
23140 3 : && XEXP (XEXP (op0, 0), 1) == const0_rtx
23141 3 : && GET_CODE (op1) == GEU
23142 3 : && REG_P (XEXP (op1, 0))
23143 3 : && GET_MODE (XEXP (op1, 0)) == CCCmode
23144 3 : && REGNO (XEXP (op1, 0)) == FLAGS_REG
23145 53675417 : && XEXP (op1, 1) == const0_rtx)
23146 : {
23147 : /* This is *x86_cmc. */
23148 3 : if (!speed)
23149 0 : *total = COSTS_N_BYTES (1);
23150 3 : else if (TARGET_SLOW_STC)
23151 0 : *total = COSTS_N_INSNS (2);
23152 : else
23153 3 : *total = COSTS_N_INSNS (1);
23154 3 : return true;
23155 : }
23156 :
23157 53675411 : if (SCALAR_INT_MODE_P (GET_MODE (op0))
23158 111809523 : && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
23159 : {
23160 757633 : if (op1 == const0_rtx)
23161 218208 : *total = cost->add
23162 109104 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
23163 : else
23164 1297058 : *total = 3*cost->add
23165 648529 : + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
23166 648529 : + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
23167 757633 : return true;
23168 : }
23169 :
23170 : /* The embedded comparison operand is completely free. */
23171 52917778 : if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
23172 377366 : *total = 0;
23173 :
23174 : return false;
23175 :
23176 1369818 : case FLOAT_EXTEND:
23177 : /* x87 represents all values extended to 80bit. */
23178 1369818 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23179 665041 : *total = 0;
23180 : else
23181 1409554 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23182 : return false;
23183 :
23184 83815 : case FLOAT_TRUNCATE:
23185 83815 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23186 58216 : *total = cost->fadd;
23187 : else
23188 51198 : *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
23189 : return false;
23190 683371 : case FLOAT:
23191 683371 : case UNSIGNED_FLOAT:
23192 683371 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23193 : /* TODO: We do not have cost tables for x87. */
23194 93499 : *total = cost->fadd;
23195 589872 : else if (VECTOR_MODE_P (mode))
23196 0 : *total = ix86_vec_cost (mode, cost->cvtpi2ps);
23197 : else
23198 589872 : *total = cost->cvtsi2ss;
23199 : return false;
23200 :
23201 285311 : case FIX:
23202 285311 : case UNSIGNED_FIX:
23203 285311 : if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23204 : /* TODO: We do not have cost tables for x87. */
23205 285311 : *total = cost->fadd;
23206 0 : else if (VECTOR_MODE_P (mode))
23207 0 : *total = ix86_vec_cost (mode, cost->cvtps2pi);
23208 : else
23209 0 : *total = cost->cvtss2si;
23210 : return false;
23211 :
23212 371416 : case ABS:
23213 : /* SSE requires memory load for the constant operand. It may make
23214 : sense to account for this. Of course the constant operand may or
23215 : may not be reused. */
23216 371416 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23217 264477 : *total = cost->sse_op;
23218 106939 : else if (X87_FLOAT_MODE_P (mode))
23219 31496 : *total = cost->fabs;
23220 75443 : else if (FLOAT_MODE_P (mode))
23221 25859 : *total = ix86_vec_cost (mode, cost->sse_op);
23222 49584 : else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23223 6330 : *total = cost->sse_op;
23224 : return false;
23225 :
23226 28721 : case SQRT:
23227 28721 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23228 18364 : *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
23229 10357 : else if (X87_FLOAT_MODE_P (mode))
23230 4315 : *total = cost->fsqrt;
23231 6042 : else if (FLOAT_MODE_P (mode))
23232 6042 : *total = ix86_vec_cost (mode,
23233 : mode == SFmode ? cost->sqrtss : cost->sqrtsd);
23234 : return false;
23235 :
23236 3943489 : case UNSPEC:
23237 3943489 : switch (XINT (x, 1))
23238 : {
23239 126039 : case UNSPEC_TP:
23240 126039 : *total = 0;
23241 126039 : break;
23242 :
23243 5210 : case UNSPEC_VTERNLOG:
23244 5210 : *total = cost->sse_op;
23245 5210 : if (!REG_P (XVECEXP (x, 0, 0)))
23246 720 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23247 5210 : if (!REG_P (XVECEXP (x, 0, 1)))
23248 694 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23249 5210 : if (!REG_P (XVECEXP (x, 0, 2)))
23250 733 : *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
23251 : return true;
23252 :
23253 95135 : case UNSPEC_PTEST:
23254 95135 : {
23255 95135 : *total = cost->sse_op;
23256 95135 : rtx test_op0 = XVECEXP (x, 0, 0);
23257 95135 : if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
23258 : return false;
23259 94487 : if (GET_CODE (test_op0) == AND)
23260 : {
23261 23 : rtx and_op0 = XEXP (test_op0, 0);
23262 23 : if (GET_CODE (and_op0) == NOT)
23263 0 : and_op0 = XEXP (and_op0, 0);
23264 23 : *total += rtx_cost (and_op0, GET_MODE (and_op0),
23265 : AND, 0, speed)
23266 23 : + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
23267 : AND, 1, speed);
23268 : }
23269 : else
23270 94464 : *total = rtx_cost (test_op0, GET_MODE (test_op0),
23271 : UNSPEC, 0, speed);
23272 : }
23273 : return true;
23274 :
23275 20568 : case UNSPEC_BLENDV:
23276 20568 : *total = cost->sse_op;
23277 20568 : if (!REG_P (XVECEXP (x, 0, 0)))
23278 8409 : *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23279 20568 : if (!REG_P (XVECEXP (x, 0, 1)))
23280 9988 : *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23281 20568 : if (!REG_P (XVECEXP (x, 0, 2)))
23282 : {
23283 12710 : rtx cond = XVECEXP (x, 0, 2);
23284 12710 : if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
23285 773 : && CONST_VECTOR_P (XEXP (cond, 1)))
23286 : {
23287 : /* avx2_blendvpd256_gt and friends. */
23288 153 : if (!REG_P (XEXP (cond, 0)))
23289 70 : *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
23290 : }
23291 : else
23292 12557 : *total += rtx_cost (cond, mode, code, 2, speed);
23293 : }
23294 : return true;
23295 :
23296 28353 : case UNSPEC_MOVMSK:
23297 28353 : *total = cost->sse_op;
23298 28353 : return true;
23299 :
23300 : default:
23301 : break;
23302 : }
23303 : return false;
23304 :
23305 2018590 : case VEC_CONCAT:
23306 : /* ??? Assume all of these vector manipulation patterns are
23307 : recognizable. In which case they all pretty much have the
23308 : same cost.
23309 : ??? We should still recruse when computing cost. */
23310 2018590 : *total = cost->sse_op;
23311 2018590 : return true;
23312 :
23313 2428541 : case VEC_SELECT:
23314 : /* Special case extracting lower part from the vector.
23315 : This by itself needs to code and most of SSE/AVX instructions have
23316 : packed and single forms where the single form may be represented
23317 : by such VEC_SELECT.
23318 :
23319 : Use cost 1 (despite the fact that functionally equivalent SUBREG has
23320 : cost 0). Making VEC_SELECT completely free, for example instructs CSE
23321 : to forward propagate VEC_SELECT into
23322 :
23323 : (set (reg eax) (reg src))
23324 :
23325 : which then prevents fwprop and combining. See i.e.
23326 : gcc.target/i386/pr91103-1.c.
23327 :
23328 : ??? rtvec_series_p test should be, for valid patterns, equivalent to
23329 : vec_series_lowpart_p but is not, since the latter calls
23330 : can_cange_mode_class on ALL_REGS and this return false since x87 does
23331 : not support subregs at all. */
23332 2428541 : if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
23333 761356 : *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
23334 761356 : outer_code, opno, speed) + 1;
23335 : else
23336 : /* ??? We should still recruse when computing cost. */
23337 1667185 : *total = cost->sse_op;
23338 : return true;
23339 :
23340 1222314 : case VEC_DUPLICATE:
23341 2444628 : *total = rtx_cost (XEXP (x, 0),
23342 1222314 : GET_MODE (XEXP (x, 0)),
23343 : VEC_DUPLICATE, 0, speed);
23344 : /* It's broadcast instruction, not embedded broadcasting. */
23345 1222314 : if (outer_code == SET)
23346 1174031 : *total += cost->sse_op;
23347 :
23348 : return true;
23349 :
23350 722832 : case VEC_MERGE:
23351 722832 : mask = XEXP (x, 2);
23352 : /* Scalar versions of SSE instructions may be represented as:
23353 :
23354 : (vec_merge (vec_duplicate (operation ....))
23355 : (register or memory)
23356 : (const_int 1))
23357 :
23358 : In this case vec_merge and vec_duplicate is for free.
23359 : Just recurse into operation and second operand. */
23360 722832 : if (mask == const1_rtx
23361 212737 : && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
23362 : {
23363 75683 : *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23364 : outer_code, opno, speed)
23365 75683 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23366 75683 : return true;
23367 : }
23368 : /* This is masked instruction, assume the same cost,
23369 : as nonmasked variant. */
23370 647149 : else if (TARGET_AVX512F
23371 647149 : && (register_operand (mask, GET_MODE (mask))
23372 : /* Redunduant clean up of high bits for kmask with VL=2/4
23373 : .i.e (vec_merge op0, op1, (and op3 15)). */
23374 120092 : || (GET_CODE (mask) == AND
23375 372 : && register_operand (XEXP (mask, 0), GET_MODE (mask))
23376 372 : && CONST_INT_P (XEXP (mask, 1))
23377 372 : && ((INTVAL (XEXP (mask, 1)) == 3
23378 131 : && GET_MODE_NUNITS (mode) == 2)
23379 241 : || (INTVAL (XEXP (mask, 1)) == 15
23380 241 : && GET_MODE_NUNITS (mode) == 4)))))
23381 : {
23382 373858 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23383 373858 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23384 373858 : return true;
23385 : }
23386 : /* Combination of the two above:
23387 :
23388 : (vec_merge (vec_merge (vec_duplicate (operation ...))
23389 : (register or memory)
23390 : (reg:QI mask))
23391 : (register or memory)
23392 : (const_int 1))
23393 :
23394 : i.e. avx512fp16_vcvtss2sh_mask. */
23395 273291 : else if (TARGET_AVX512F
23396 119720 : && mask == const1_rtx
23397 46523 : && GET_CODE (XEXP (x, 0)) == VEC_MERGE
23398 27158 : && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
23399 275553 : && register_operand (XEXP (XEXP (x, 0), 2),
23400 2262 : GET_MODE (XEXP (XEXP (x, 0), 2))))
23401 : {
23402 2250 : *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23403 : mode, outer_code, opno, speed)
23404 2250 : + rtx_cost (XEXP (XEXP (x, 0), 1),
23405 : mode, outer_code, opno, speed)
23406 2250 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23407 2250 : return true;
23408 : }
23409 : /* vcmp. */
23410 271041 : else if (unspec_pcmp_p (mask)
23411 271041 : || (GET_CODE (mask) == NOT
23412 0 : && unspec_pcmp_p (XEXP (mask, 0))))
23413 : {
23414 1950 : rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
23415 1950 : rtx unsop0 = XVECEXP (uns, 0, 0);
23416 : /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
23417 : cost the same as register.
23418 : This is used by avx_cmp<mode>3_ltint_not. */
23419 1950 : if (SUBREG_P (unsop0))
23420 417 : unsop0 = XEXP (unsop0, 0);
23421 1950 : if (GET_CODE (unsop0) == NOT)
23422 18 : unsop0 = XEXP (unsop0, 0);
23423 1950 : *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23424 1950 : + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
23425 1950 : + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
23426 1950 : + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
23427 1950 : + cost->sse_op;
23428 1950 : return true;
23429 : }
23430 : else
23431 269091 : *total = cost->sse_op;
23432 269091 : return false;
23433 :
23434 106906249 : case MEM:
23435 : /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23436 : or variants in ix86_vector_duplicate_simode_const. */
23437 :
23438 106906249 : if (GET_MODE_SIZE (mode) >= 16
23439 18132281 : && VECTOR_MODE_P (mode)
23440 12149520 : && SYMBOL_REF_P (XEXP (x, 0))
23441 2207900 : && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23442 108884910 : && ix86_broadcast_from_constant (mode, x))
23443 : {
23444 489636 : *total = COSTS_N_INSNS (2) + speed;
23445 489636 : return true;
23446 : }
23447 :
23448 : /* An insn that accesses memory is slightly more expensive
23449 : than one that does not. */
23450 106416613 : if (speed)
23451 : {
23452 95187471 : *total += 1;
23453 95187471 : rtx addr = XEXP (x, 0);
23454 : /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23455 : so for MEM (reg) and MEM (reg + 4), the former costs 5,
23456 : the latter costs 9, it is not accurate for x86. Ideally
23457 : address_cost should be used, but it reduce cost too much.
23458 : So current solution is make constant disp as cheap as possible. */
23459 95187471 : if (GET_CODE (addr) == PLUS
23460 77648239 : && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23461 : /* Only handle (reg + disp) since other forms of addr are mostly LEA,
23462 : there's no additional cost for the plus of disp. */
23463 167253568 : && register_operand (XEXP (addr, 0), Pmode))
23464 : {
23465 55981716 : *total += 1;
23466 68838212 : *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23467 55981716 : return true;
23468 : }
23469 : }
23470 :
23471 : return false;
23472 :
23473 52521 : case ZERO_EXTRACT:
23474 52521 : if (XEXP (x, 1) == const1_rtx
23475 11449 : && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23476 0 : && GET_MODE (XEXP (x, 2)) == SImode
23477 0 : && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23478 : {
23479 : /* Ignore cost of zero extension and masking of last argument. */
23480 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23481 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23482 0 : *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23483 0 : return true;
23484 : }
23485 : return false;
23486 :
23487 29127102 : case IF_THEN_ELSE:
23488 29127102 : if (TARGET_XOP
23489 25126 : && VECTOR_MODE_P (mode)
23490 29132493 : && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23491 : {
23492 : /* vpcmov. */
23493 4823 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23494 4823 : if (!REG_P (XEXP (x, 0)))
23495 4663 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23496 4823 : if (!REG_P (XEXP (x, 1)))
23497 4630 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23498 4823 : if (!REG_P (XEXP (x, 2)))
23499 4632 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23500 4823 : return true;
23501 : }
23502 0 : else if (TARGET_CMOVE
23503 29122279 : && SCALAR_INT_MODE_P (mode)
23504 31512079 : && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23505 : {
23506 : /* cmov. */
23507 2194828 : *total = COSTS_N_INSNS (1);
23508 2194828 : if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23509 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23510 2194828 : if (!REG_P (XEXP (x, 1)))
23511 114188 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23512 2194828 : if (!REG_P (XEXP (x, 2)))
23513 705676 : *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23514 2194828 : return true;
23515 : }
23516 : return false;
23517 :
23518 18177926 : case EQ:
23519 18177926 : case GT:
23520 18177926 : case GTU:
23521 18177926 : case LT:
23522 18177926 : case LTU:
23523 18177926 : if (TARGET_SSE2
23524 18174728 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23525 18510841 : && GET_MODE_SIZE (mode) >= 8)
23526 : {
23527 : /* vpcmpeq */
23528 328350 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
23529 328350 : if (!REG_P (XEXP (x, 0)))
23530 62557 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23531 328350 : if (!REG_P (XEXP (x, 1)))
23532 125533 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23533 328350 : return true;
23534 : }
23535 17849576 : if (TARGET_XOP
23536 12261 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23537 17849684 : && GET_MODE_SIZE (mode) <= 16)
23538 : {
23539 : /* vpcomeq */
23540 108 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23541 108 : if (!REG_P (XEXP (x, 0)))
23542 0 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23543 108 : if (!REG_P (XEXP (x, 1)))
23544 0 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23545 108 : return true;
23546 : }
23547 : return false;
23548 :
23549 16001796 : case NE:
23550 16001796 : case GE:
23551 16001796 : case GEU:
23552 16001796 : if (TARGET_XOP
23553 21866 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23554 16008550 : && GET_MODE_SIZE (mode) <= 16)
23555 : {
23556 : /* vpcomneq */
23557 6754 : *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
23558 6754 : if (!REG_P (XEXP (x, 0)))
23559 1401 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23560 6754 : if (!REG_P (XEXP (x, 1)))
23561 5734 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23562 6754 : return true;
23563 : }
23564 15995042 : if (TARGET_SSE2
23565 15992935 : && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
23566 15996351 : && GET_MODE_SIZE (mode) >= 8)
23567 : {
23568 1333 : if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
23569 : /* vpcmpeq + vpternlog */
23570 40 : *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
23571 : else
23572 : /* vpcmpeq + pxor + vpcmpeq */
23573 1267 : *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
23574 1285 : if (!REG_P (XEXP (x, 0)))
23575 28 : *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23576 1285 : if (!REG_P (XEXP (x, 1)))
23577 28 : *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23578 1285 : return true;
23579 : }
23580 : return false;
23581 :
23582 : default:
23583 : return false;
23584 : }
23585 : }
23586 :
23587 : #if TARGET_MACHO
23588 :
23589 : static int current_machopic_label_num;
23590 :
23591 : /* Given a symbol name and its associated stub, write out the
23592 : definition of the stub. */
23593 :
23594 : void
23595 : machopic_output_stub (FILE *file, const char *symb, const char *stub)
23596 : {
23597 : unsigned int length;
23598 : char *binder_name, *symbol_name, lazy_ptr_name[32];
23599 : int label = ++current_machopic_label_num;
23600 :
23601 : /* For 64-bit we shouldn't get here. */
23602 : gcc_assert (!TARGET_64BIT);
23603 :
23604 : /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23605 : symb = targetm.strip_name_encoding (symb);
23606 :
23607 : length = strlen (stub);
23608 : binder_name = XALLOCAVEC (char, length + 32);
23609 : GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23610 :
23611 : length = strlen (symb);
23612 : symbol_name = XALLOCAVEC (char, length + 32);
23613 : GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23614 :
23615 : sprintf (lazy_ptr_name, "L%d$lz", label);
23616 :
23617 : if (MACHOPIC_ATT_STUB)
23618 : switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23619 : else if (MACHOPIC_PURE)
23620 : switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23621 : else
23622 : switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23623 :
23624 : fprintf (file, "%s:\n", stub);
23625 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23626 :
23627 : if (MACHOPIC_ATT_STUB)
23628 : {
23629 : fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23630 : }
23631 : else if (MACHOPIC_PURE)
23632 : {
23633 : /* PIC stub. */
23634 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23635 : rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23636 : output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23637 : fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23638 : label, lazy_ptr_name, label);
23639 : fprintf (file, "\tjmp\t*%%ecx\n");
23640 : }
23641 : else
23642 : fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23643 :
23644 : /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23645 : it needs no stub-binding-helper. */
23646 : if (MACHOPIC_ATT_STUB)
23647 : return;
23648 :
23649 : fprintf (file, "%s:\n", binder_name);
23650 :
23651 : if (MACHOPIC_PURE)
23652 : {
23653 : fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23654 : fprintf (file, "\tpushl\t%%ecx\n");
23655 : }
23656 : else
23657 : fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23658 :
23659 : fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23660 :
23661 : /* N.B. Keep the correspondence of these
23662 : 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23663 : old-pic/new-pic/non-pic stubs; altering this will break
23664 : compatibility with existing dylibs. */
23665 : if (MACHOPIC_PURE)
23666 : {
23667 : /* 25-byte PIC stub using "CALL get_pc_thunk". */
23668 : switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23669 : }
23670 : else
23671 : /* 16-byte -mdynamic-no-pic stub. */
23672 : switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23673 :
23674 : fprintf (file, "%s:\n", lazy_ptr_name);
23675 : fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23676 : fprintf (file, ASM_LONG "%s\n", binder_name);
23677 : }
23678 : #endif /* TARGET_MACHO */
23679 :
23680 : /* Order the registers for register allocator. */
23681 :
23682 : void
23683 217148 : x86_order_regs_for_local_alloc (void)
23684 : {
23685 217148 : int pos = 0;
23686 217148 : int i;
23687 :
23688 : /* First allocate the local general purpose registers. */
23689 20194764 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23690 26926352 : if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
23691 5654824 : reg_alloc_order [pos++] = i;
23692 :
23693 : /* Global general purpose registers. */
23694 20194764 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23695 23191000 : if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
23696 1293912 : reg_alloc_order [pos++] = i;
23697 :
23698 : /* x87 registers come first in case we are doing FP math
23699 : using them. */
23700 217148 : if (!TARGET_SSE_MATH)
23701 57663 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23702 51256 : reg_alloc_order [pos++] = i;
23703 :
23704 : /* SSE registers. */
23705 1954332 : for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23706 1737184 : reg_alloc_order [pos++] = i;
23707 1954332 : for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23708 1737184 : reg_alloc_order [pos++] = i;
23709 :
23710 : /* Extended REX SSE registers. */
23711 3691516 : for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23712 3474368 : reg_alloc_order [pos++] = i;
23713 :
23714 : /* Mask register. */
23715 1954332 : for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23716 1737184 : reg_alloc_order [pos++] = i;
23717 :
23718 : /* x87 registers. */
23719 217148 : if (TARGET_SSE_MATH)
23720 1896669 : for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23721 1685928 : reg_alloc_order [pos++] = i;
23722 :
23723 1954332 : for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23724 1737184 : reg_alloc_order [pos++] = i;
23725 :
23726 : /* Initialize the rest of array as we do not allocate some registers
23727 : at all. */
23728 1085740 : while (pos < FIRST_PSEUDO_REGISTER)
23729 868592 : reg_alloc_order [pos++] = 0;
23730 217148 : }
23731 :
23732 : static bool
23733 245481131 : ix86_ms_bitfield_layout_p (const_tree record_type)
23734 : {
23735 245481131 : return ((TARGET_MS_BITFIELD_LAYOUT
23736 215 : && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23737 245481131 : || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
23738 : }
23739 :
23740 : /* Returns an expression indicating where the this parameter is
23741 : located on entry to the FUNCTION. */
23742 :
23743 : static rtx
23744 1761 : x86_this_parameter (tree function)
23745 : {
23746 1761 : tree type = TREE_TYPE (function);
23747 1761 : bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23748 1761 : int nregs;
23749 :
23750 1761 : if (TARGET_64BIT)
23751 : {
23752 1759 : const int *parm_regs;
23753 :
23754 1759 : if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
23755 : parm_regs = x86_64_preserve_none_int_parameter_registers;
23756 1759 : else if (ix86_function_type_abi (type) == MS_ABI)
23757 : parm_regs = x86_64_ms_abi_int_parameter_registers;
23758 : else
23759 1759 : parm_regs = x86_64_int_parameter_registers;
23760 1759 : return gen_rtx_REG (Pmode, parm_regs[aggr]);
23761 : }
23762 :
23763 2 : nregs = ix86_function_regparm (type, function);
23764 :
23765 2 : if (nregs > 0 && !stdarg_p (type))
23766 : {
23767 0 : int regno;
23768 0 : unsigned int ccvt = ix86_get_callcvt (type);
23769 :
23770 0 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23771 0 : regno = aggr ? DX_REG : CX_REG;
23772 0 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23773 : {
23774 0 : regno = CX_REG;
23775 0 : if (aggr)
23776 0 : return gen_rtx_MEM (SImode,
23777 0 : plus_constant (Pmode, stack_pointer_rtx, 4));
23778 : }
23779 : else
23780 : {
23781 0 : regno = AX_REG;
23782 0 : if (aggr)
23783 : {
23784 0 : regno = DX_REG;
23785 0 : if (nregs == 1)
23786 0 : return gen_rtx_MEM (SImode,
23787 0 : plus_constant (Pmode,
23788 : stack_pointer_rtx, 4));
23789 : }
23790 : }
23791 0 : return gen_rtx_REG (SImode, regno);
23792 : }
23793 :
23794 4 : return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23795 4 : aggr ? 8 : 4));
23796 : }
23797 :
23798 : /* Determine whether x86_output_mi_thunk can succeed. */
23799 :
23800 : static bool
23801 4907 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23802 : const_tree function)
23803 : {
23804 : /* 64-bit can handle anything. */
23805 4907 : if (TARGET_64BIT)
23806 : return true;
23807 :
23808 : /* For 32-bit, everything's fine if we have one free register. */
23809 76 : if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23810 : return true;
23811 :
23812 : /* Need a free register for vcall_offset. */
23813 0 : if (vcall_offset)
23814 : return false;
23815 :
23816 : /* Need a free register for GOT references. */
23817 0 : if (flag_pic && !targetm.binds_local_p (function))
23818 : return false;
23819 :
23820 : /* Otherwise ok. */
23821 : return true;
23822 : }
23823 :
23824 : /* Output the assembler code for a thunk function. THUNK_DECL is the
23825 : declaration for the thunk function itself, FUNCTION is the decl for
23826 : the target function. DELTA is an immediate constant offset to be
23827 : added to THIS. If VCALL_OFFSET is nonzero, the word at
23828 : *(*this + vcall_offset) should be added to THIS. */
23829 :
23830 : static void
23831 1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23832 : HOST_WIDE_INT vcall_offset, tree function)
23833 : {
23834 1761 : const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23835 1761 : rtx this_param = x86_this_parameter (function);
23836 1761 : rtx this_reg, tmp, fnaddr;
23837 1761 : unsigned int tmp_regno;
23838 1761 : rtx_insn *insn;
23839 1761 : int saved_flag_force_indirect_call = flag_force_indirect_call;
23840 :
23841 1761 : if (TARGET_64BIT)
23842 : tmp_regno = R10_REG;
23843 : else
23844 : {
23845 2 : unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23846 2 : if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23847 : tmp_regno = AX_REG;
23848 2 : else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23849 : tmp_regno = DX_REG;
23850 : else
23851 2 : tmp_regno = CX_REG;
23852 :
23853 2 : if (flag_pic)
23854 2 : flag_force_indirect_call = 0;
23855 : }
23856 :
23857 1761 : emit_note (NOTE_INSN_PROLOGUE_END);
23858 :
23859 : /* CET is enabled, insert EB instruction. */
23860 1761 : if ((flag_cf_protection & CF_BRANCH))
23861 20 : emit_insn (gen_nop_endbr ());
23862 :
23863 : /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23864 : pull it in now and let DELTA benefit. */
23865 1761 : if (REG_P (this_param))
23866 : this_reg = this_param;
23867 2 : else if (vcall_offset)
23868 : {
23869 : /* Put the this parameter into %eax. */
23870 2 : this_reg = gen_rtx_REG (Pmode, AX_REG);
23871 1 : emit_move_insn (this_reg, this_param);
23872 : }
23873 : else
23874 : this_reg = NULL_RTX;
23875 :
23876 : /* Adjust the this parameter by a fixed constant. */
23877 1761 : if (delta)
23878 : {
23879 826 : rtx delta_rtx = GEN_INT (delta);
23880 826 : rtx delta_dst = this_reg ? this_reg : this_param;
23881 :
23882 826 : if (TARGET_64BIT)
23883 : {
23884 825 : if (!x86_64_general_operand (delta_rtx, Pmode))
23885 : {
23886 0 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23887 0 : emit_move_insn (tmp, delta_rtx);
23888 0 : delta_rtx = tmp;
23889 : }
23890 : }
23891 :
23892 827 : ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
23893 : }
23894 :
23895 : /* Adjust the this parameter by a value stored in the vtable. */
23896 1761 : if (vcall_offset)
23897 : {
23898 986 : rtx vcall_addr, vcall_mem, this_mem;
23899 :
23900 987 : tmp = gen_rtx_REG (Pmode, tmp_regno);
23901 :
23902 986 : this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23903 987 : if (Pmode != ptr_mode)
23904 0 : this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23905 986 : emit_move_insn (tmp, this_mem);
23906 :
23907 : /* Adjust the this parameter. */
23908 987 : vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23909 986 : if (TARGET_64BIT
23910 986 : && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
23911 : {
23912 0 : rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23913 0 : emit_move_insn (tmp2, GEN_INT (vcall_offset));
23914 0 : vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23915 : }
23916 :
23917 986 : vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23918 987 : if (Pmode != ptr_mode)
23919 0 : emit_insn (gen_addsi_1_zext (this_reg,
23920 : gen_rtx_REG (ptr_mode,
23921 : REGNO (this_reg)),
23922 : vcall_mem));
23923 : else
23924 986 : ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
23925 : }
23926 :
23927 : /* If necessary, drop THIS back to its stack slot. */
23928 1761 : if (this_reg && this_reg != this_param)
23929 1 : emit_move_insn (this_param, this_reg);
23930 :
23931 1761 : fnaddr = XEXP (DECL_RTL (function), 0);
23932 1761 : if (TARGET_64BIT)
23933 : {
23934 25 : if (!flag_pic || targetm.binds_local_p (function)
23935 1784 : || TARGET_PECOFF)
23936 : ;
23937 : else
23938 : {
23939 0 : tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23940 0 : tmp = gen_rtx_CONST (Pmode, tmp);
23941 0 : fnaddr = gen_const_mem (Pmode, tmp);
23942 : }
23943 : }
23944 : else
23945 : {
23946 2 : if (!flag_pic || targetm.binds_local_p (function))
23947 : ;
23948 : #if TARGET_MACHO
23949 : else if (TARGET_MACHO)
23950 : {
23951 : fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23952 : fnaddr = XEXP (fnaddr, 0);
23953 : }
23954 : #endif /* TARGET_MACHO */
23955 : else
23956 : {
23957 0 : tmp = gen_rtx_REG (Pmode, CX_REG);
23958 0 : output_set_got (tmp, NULL_RTX);
23959 :
23960 0 : fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23961 0 : fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23962 0 : fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23963 0 : fnaddr = gen_const_mem (Pmode, fnaddr);
23964 : }
23965 : }
23966 :
23967 : /* Our sibling call patterns do not allow memories, because we have no
23968 : predicate that can distinguish between frame and non-frame memory.
23969 : For our purposes here, we can get away with (ab)using a jump pattern,
23970 : because we're going to do no optimization. */
23971 1761 : if (MEM_P (fnaddr))
23972 : {
23973 0 : if (sibcall_insn_operand (fnaddr, word_mode))
23974 : {
23975 0 : fnaddr = XEXP (DECL_RTL (function), 0);
23976 0 : tmp = gen_rtx_MEM (QImode, fnaddr);
23977 0 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23978 0 : tmp = emit_call_insn (tmp);
23979 0 : SIBLING_CALL_P (tmp) = 1;
23980 : }
23981 : else
23982 0 : emit_jump_insn (gen_indirect_jump (fnaddr));
23983 : }
23984 : else
23985 : {
23986 1761 : if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23987 : {
23988 : // CM_LARGE_PIC always uses pseudo PIC register which is
23989 : // uninitialized. Since FUNCTION is local and calling it
23990 : // doesn't go through PLT, we use scratch register %r11 as
23991 : // PIC register and initialize it here.
23992 3 : pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23993 3 : ix86_init_large_pic_reg (tmp_regno);
23994 3 : fnaddr = legitimize_pic_address (fnaddr,
23995 3 : gen_rtx_REG (Pmode, tmp_regno));
23996 : }
23997 :
23998 1761 : if (!sibcall_insn_operand (fnaddr, word_mode))
23999 : {
24000 9 : tmp = gen_rtx_REG (word_mode, tmp_regno);
24001 9 : if (GET_MODE (fnaddr) != word_mode)
24002 0 : fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
24003 9 : emit_move_insn (tmp, fnaddr);
24004 9 : fnaddr = tmp;
24005 : }
24006 :
24007 1761 : tmp = gen_rtx_MEM (QImode, fnaddr);
24008 1761 : tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
24009 1761 : tmp = emit_call_insn (tmp);
24010 1761 : SIBLING_CALL_P (tmp) = 1;
24011 : }
24012 1761 : emit_barrier ();
24013 :
24014 : /* Emit just enough of rest_of_compilation to get the insns emitted. */
24015 1761 : insn = get_insns ();
24016 1761 : shorten_branches (insn);
24017 1761 : assemble_start_function (thunk_fndecl, fnname);
24018 1761 : final_start_function (insn, file, 1);
24019 1761 : final (insn, file, 1);
24020 1761 : final_end_function ();
24021 1761 : assemble_end_function (thunk_fndecl, fnname);
24022 :
24023 1761 : flag_force_indirect_call = saved_flag_force_indirect_call;
24024 1761 : }
24025 :
24026 : static void
24027 273142 : x86_file_start (void)
24028 : {
24029 273142 : default_file_start ();
24030 273142 : if (TARGET_16BIT)
24031 6 : fputs ("\t.code16gcc\n", asm_out_file);
24032 : #if TARGET_MACHO
24033 : darwin_file_start ();
24034 : #endif
24035 273142 : if (X86_FILE_START_VERSION_DIRECTIVE)
24036 : fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24037 273142 : if (X86_FILE_START_FLTUSED)
24038 : fputs ("\t.global\t__fltused\n", asm_out_file);
24039 273142 : if (ix86_asm_dialect == ASM_INTEL)
24040 68 : fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24041 273142 : }
24042 :
24043 : int
24044 102384857 : x86_field_alignment (tree type, int computed)
24045 : {
24046 102384857 : machine_mode mode;
24047 :
24048 102384857 : if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24049 : return computed;
24050 9121225 : if (TARGET_IAMCU)
24051 0 : return iamcu_alignment (type, computed);
24052 9121225 : type = strip_array_types (type);
24053 9121225 : mode = TYPE_MODE (type);
24054 9121225 : if (mode == DFmode || mode == DCmode
24055 9015376 : || GET_MODE_CLASS (mode) == MODE_INT
24056 3016898 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24057 : {
24058 6104327 : if (TYPE_ATOMIC (type) && computed > 32)
24059 : {
24060 0 : static bool warned;
24061 :
24062 0 : if (!warned && warn_psabi)
24063 : {
24064 0 : const char *url
24065 : = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
24066 :
24067 0 : warned = true;
24068 0 : inform (input_location, "the alignment of %<_Atomic %T%> "
24069 : "fields changed in %{GCC 11.1%}",
24070 0 : TYPE_MAIN_VARIANT (type), url);
24071 : }
24072 : }
24073 : else
24074 6104327 : return MIN (32, computed);
24075 : }
24076 : return computed;
24077 : }
24078 :
24079 : /* Print call to TARGET to FILE. */
24080 :
24081 : static void
24082 295 : x86_print_call_or_nop (FILE *file, const char *target,
24083 : const char *label)
24084 : {
24085 295 : if (flag_nop_mcount || !strcmp (target, "nop"))
24086 : {
24087 9 : if (TARGET_16BIT)
24088 : /* 3 byte no-op: lea 0(%si), %si */
24089 1 : fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
24090 : else
24091 : /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
24092 8 : fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
24093 : label);
24094 : }
24095 286 : else if (!TARGET_PECOFF && flag_pic)
24096 : {
24097 8 : gcc_assert (flag_plt);
24098 :
24099 8 : fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
24100 : }
24101 : else
24102 278 : fprintf (file, "%s\tcall\t%s\n", label, target);
24103 295 : }
24104 :
24105 : static bool
24106 315 : current_fentry_name (const char **name)
24107 : {
24108 315 : tree attr = lookup_attribute ("fentry_name",
24109 315 : DECL_ATTRIBUTES (current_function_decl));
24110 315 : if (!attr)
24111 : return false;
24112 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24113 2 : return true;
24114 : }
24115 :
24116 : static bool
24117 16 : current_fentry_section (const char **name)
24118 : {
24119 16 : tree attr = lookup_attribute ("fentry_section",
24120 16 : DECL_ATTRIBUTES (current_function_decl));
24121 16 : if (!attr)
24122 : return false;
24123 2 : *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
24124 2 : return true;
24125 : }
24126 :
24127 : /* Return a caller-saved register which isn't live or a callee-saved
24128 : register which has been saved on stack in the prologue at entry for
24129 : profile. */
24130 :
24131 : static int
24132 17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
24133 : {
24134 : /* Use %r10 if the profiler is emitted before the prologue or it isn't
24135 : used by DRAP. */
24136 17 : if (ix86_profile_before_prologue ()
24137 4 : || !crtl->drap_reg
24138 17 : || REGNO (crtl->drap_reg) != R10_REG)
24139 : return R10_REG;
24140 :
24141 : /* The profiler is emitted after the prologue. If there is a
24142 : caller-saved register which isn't live or a callee-saved
24143 : register saved on stack in the prologue, use it. */
24144 :
24145 0 : bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
24146 :
24147 0 : int i;
24148 0 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24149 0 : if (GENERAL_REGNO_P (i)
24150 0 : && i != R10_REG
24151 : #ifdef NO_PROFILE_COUNTERS
24152 0 : && (r11_ok || i != R11_REG)
24153 : #else
24154 : && i != R11_REG
24155 : #endif
24156 0 : && TEST_HARD_REG_BIT (accessible_reg_set, i)
24157 0 : && (ix86_save_reg (i, true, true)
24158 0 : || (call_used_regs[i]
24159 0 : && !fixed_regs[i]
24160 0 : && !REGNO_REG_SET_P (reg_live, i))))
24161 0 : return i;
24162 :
24163 0 : sorry ("no register available for profiling %<-mcmodel=large%s%>",
24164 0 : ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
24165 :
24166 0 : return R10_REG;
24167 : }
24168 :
24169 : /* Output assembler code to FILE to increment profiler label # LABELNO
24170 : for profiling a function entry. */
24171 : void
24172 315 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24173 : {
24174 315 : if (cfun->machine->insn_queued_at_entrance)
24175 : {
24176 7 : if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
24177 6 : fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
24178 7 : unsigned int patch_area_size
24179 7 : = crtl->patch_area_size - crtl->patch_area_entry;
24180 7 : if (patch_area_size)
24181 2 : ix86_output_patchable_area (patch_area_size,
24182 : crtl->patch_area_entry == 0);
24183 : }
24184 :
24185 315 : const char *mcount_name = MCOUNT_NAME;
24186 :
24187 315 : bool fentry_section_p
24188 315 : = (flag_record_mcount
24189 615 : || lookup_attribute ("fentry_section",
24190 300 : DECL_ATTRIBUTES (current_function_decl)));
24191 :
24192 : const char *label = fentry_section_p ? "1:" : "";
24193 :
24194 315 : if (current_fentry_name (&mcount_name))
24195 : ;
24196 313 : else if (fentry_name)
24197 1 : mcount_name = fentry_name;
24198 312 : else if (flag_fentry)
24199 300 : mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
24200 :
24201 315 : if (TARGET_64BIT)
24202 : {
24203 : #ifndef NO_PROFILE_COUNTERS
24204 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24205 : fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
24206 : else
24207 : fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
24208 : #endif
24209 :
24210 314 : int scratch;
24211 314 : const char *reg;
24212 314 : char legacy_reg[4] = { 0 };
24213 :
24214 314 : if (!TARGET_PECOFF)
24215 : {
24216 314 : switch (ix86_cmodel)
24217 : {
24218 7 : case CM_LARGE:
24219 7 : scratch = x86_64_select_profile_regnum (true);
24220 7 : reg = hi_reg_name[scratch];
24221 7 : if (LEGACY_INT_REGNO_P (scratch))
24222 : {
24223 0 : legacy_reg[0] = 'r';
24224 0 : legacy_reg[1] = reg[0];
24225 0 : legacy_reg[2] = reg[1];
24226 0 : reg = legacy_reg;
24227 : }
24228 7 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24229 1 : fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
24230 : "\tcall\t%s\n", label, reg, mcount_name,
24231 : reg);
24232 : else
24233 6 : fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
24234 : label, mcount_name, reg, reg);
24235 : break;
24236 10 : case CM_LARGE_PIC:
24237 : #ifdef NO_PROFILE_COUNTERS
24238 10 : scratch = x86_64_select_profile_regnum (false);
24239 10 : reg = hi_reg_name[scratch];
24240 10 : if (LEGACY_INT_REGNO_P (scratch))
24241 : {
24242 0 : legacy_reg[0] = 'r';
24243 0 : legacy_reg[1] = reg[0];
24244 0 : legacy_reg[2] = reg[1];
24245 0 : reg = legacy_reg;
24246 : }
24247 10 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24248 : {
24249 1 : fprintf (file, "1:movabs\tr11, "
24250 : "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
24251 1 : fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
24252 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24253 1 : fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
24254 : mcount_name);
24255 1 : fprintf (file, "\tadd\t%s, r11\n", reg);
24256 1 : fprintf (file, "\tcall\t%s\n", reg);
24257 1 : break;
24258 : }
24259 9 : fprintf (file,
24260 : "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
24261 9 : fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
24262 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24263 9 : fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
24264 9 : fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
24265 9 : fprintf (file, "\tcall\t*%%%s\n", reg);
24266 : #else
24267 : sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
24268 : #endif
24269 9 : break;
24270 12 : case CM_SMALL_PIC:
24271 12 : case CM_MEDIUM_PIC:
24272 12 : if (!flag_plt)
24273 : {
24274 3 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24275 0 : fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
24276 : label, mcount_name);
24277 : else
24278 3 : fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
24279 : label, mcount_name);
24280 : break;
24281 : }
24282 : /* fall through */
24283 294 : default:
24284 294 : x86_print_call_or_nop (file, mcount_name, label);
24285 294 : break;
24286 : }
24287 : }
24288 : else
24289 : x86_print_call_or_nop (file, mcount_name, label);
24290 : }
24291 1 : else if (flag_pic)
24292 : {
24293 : #ifndef NO_PROFILE_COUNTERS
24294 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24295 : fprintf (file,
24296 : "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
24297 : LPREFIX, labelno);
24298 : else
24299 : fprintf (file,
24300 : "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
24301 : LPREFIX, labelno);
24302 : #endif
24303 0 : if (flag_plt)
24304 0 : x86_print_call_or_nop (file, mcount_name, label);
24305 0 : else if (ASSEMBLER_DIALECT == ASM_INTEL)
24306 0 : fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
24307 : label, mcount_name);
24308 : else
24309 0 : fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
24310 : label, mcount_name);
24311 : }
24312 : else
24313 : {
24314 : #ifndef NO_PROFILE_COUNTERS
24315 : if (ASSEMBLER_DIALECT == ASM_INTEL)
24316 : fprintf (file,
24317 : "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
24318 : LPREFIX, labelno);
24319 : else
24320 : fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
24321 : LPREFIX, labelno);
24322 : #endif
24323 1 : x86_print_call_or_nop (file, mcount_name, label);
24324 : }
24325 :
24326 315 : if (fentry_section_p)
24327 : {
24328 16 : const char *sname = "__mcount_loc";
24329 :
24330 16 : if (current_fentry_section (&sname))
24331 : ;
24332 14 : else if (fentry_section)
24333 1 : sname = fentry_section;
24334 :
24335 16 : fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
24336 16 : fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
24337 16 : fprintf (file, "\t.previous\n");
24338 : }
24339 315 : }
24340 :
24341 : /* We don't have exact information about the insn sizes, but we may assume
24342 : quite safely that we are informed about all 1 byte insns and memory
24343 : address sizes. This is enough to eliminate unnecessary padding in
24344 : 99% of cases. */
24345 :
24346 : int
24347 384213981 : ix86_min_insn_size (rtx_insn *insn)
24348 : {
24349 384213981 : int l = 0, len;
24350 :
24351 384213981 : if (!INSN_P (insn) || !active_insn_p (insn))
24352 500384 : return 0;
24353 :
24354 : /* Discard alignments we've emit and jump instructions. */
24355 383713597 : if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24356 383713597 : && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24357 : return 0;
24358 :
24359 : /* Important case - calls are always 5 bytes.
24360 : It is common to have many calls in the row. */
24361 383713591 : if (CALL_P (insn)
24362 9151193 : && symbolic_reference_mentioned_p (PATTERN (insn))
24363 392534796 : && !SIBLING_CALL_P (insn))
24364 : return 5;
24365 375129579 : len = get_attr_length (insn);
24366 375129579 : if (len <= 1)
24367 : return 1;
24368 :
24369 : /* For normal instructions we rely on get_attr_length being exact,
24370 : with a few exceptions. */
24371 366506349 : if (!JUMP_P (insn))
24372 : {
24373 361175379 : enum attr_type type = get_attr_type (insn);
24374 :
24375 361175379 : switch (type)
24376 : {
24377 95376 : case TYPE_MULTI:
24378 95376 : if (GET_CODE (PATTERN (insn)) == ASM_INPUT
24379 95376 : || asm_noperands (PATTERN (insn)) >= 0)
24380 527 : return 0;
24381 : break;
24382 : case TYPE_OTHER:
24383 : case TYPE_FCMP:
24384 : break;
24385 : default:
24386 : /* Otherwise trust get_attr_length. */
24387 : return len;
24388 : }
24389 :
24390 474423 : l = get_attr_length_address (insn);
24391 474423 : if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24392 : l = 4;
24393 : }
24394 383933 : if (l)
24395 90490 : return 1+l;
24396 : else
24397 5714903 : return 2;
24398 : }
24399 :
24400 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24401 :
24402 : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24403 : window. */
24404 :
24405 : static void
24406 45424 : ix86_avoid_jump_mispredicts (void)
24407 : {
24408 45424 : rtx_insn *insn, *start = get_insns ();
24409 45424 : int nbytes = 0, njumps = 0;
24410 45424 : bool isjump = false;
24411 :
24412 : /* Look for all minimal intervals of instructions containing 4 jumps.
24413 : The intervals are bounded by START and INSN. NBYTES is the total
24414 : size of instructions in the interval including INSN and not including
24415 : START. When the NBYTES is smaller than 16 bytes, it is possible
24416 : that the end of START and INSN ends up in the same 16byte page.
24417 :
24418 : The smallest offset in the page INSN can start is the case where START
24419 : ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24420 : We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
24421 :
24422 : Don't consider asm goto as jump, while it can contain a jump, it doesn't
24423 : have to, control transfer to label(s) can be performed through other
24424 : means, and also we estimate minimum length of all asm stmts as 0. */
24425 700820 : for (insn = start; insn; insn = NEXT_INSN (insn))
24426 : {
24427 655396 : int min_size;
24428 :
24429 655396 : if (LABEL_P (insn))
24430 : {
24431 961 : align_flags alignment = label_to_alignment (insn);
24432 961 : int align = alignment.levels[0].log;
24433 961 : int max_skip = alignment.levels[0].maxskip;
24434 :
24435 961 : if (max_skip > 15)
24436 : max_skip = 15;
24437 : /* If align > 3, only up to 16 - max_skip - 1 bytes can be
24438 : already in the current 16 byte page, because otherwise
24439 : ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
24440 : bytes to reach 16 byte boundary. */
24441 961 : if (align <= 0
24442 328 : || (align <= 3 && max_skip != (1 << align) - 1))
24443 961 : max_skip = 0;
24444 961 : if (dump_file)
24445 0 : fprintf (dump_file, "Label %i with max_skip %i\n",
24446 0 : INSN_UID (insn), max_skip);
24447 961 : if (max_skip)
24448 : {
24449 6278 : while (nbytes + max_skip >= 16)
24450 : {
24451 5950 : start = NEXT_INSN (start);
24452 310 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24453 5967 : || CALL_P (start))
24454 350 : njumps--, isjump = true;
24455 : else
24456 : isjump = false;
24457 5950 : nbytes -= ix86_min_insn_size (start);
24458 : }
24459 : }
24460 961 : continue;
24461 961 : }
24462 :
24463 654435 : min_size = ix86_min_insn_size (insn);
24464 654435 : nbytes += min_size;
24465 654435 : if (dump_file)
24466 0 : fprintf (dump_file, "Insn %i estimated to %i bytes\n",
24467 0 : INSN_UID (insn), min_size);
24468 46586 : if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
24469 654455 : || CALL_P (insn))
24470 47601 : njumps++;
24471 : else
24472 606834 : continue;
24473 :
24474 55999 : while (njumps > 3)
24475 : {
24476 8398 : start = NEXT_INSN (start);
24477 549 : if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
24478 8398 : || CALL_P (start))
24479 1251 : njumps--, isjump = true;
24480 : else
24481 : isjump = false;
24482 8398 : nbytes -= ix86_min_insn_size (start);
24483 : }
24484 47601 : gcc_assert (njumps >= 0);
24485 47601 : if (dump_file)
24486 0 : fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24487 0 : INSN_UID (start), INSN_UID (insn), nbytes);
24488 :
24489 47601 : if (njumps == 3 && isjump && nbytes < 16)
24490 : {
24491 40 : int padsize = 15 - nbytes + ix86_min_insn_size (insn);
24492 :
24493 40 : if (dump_file)
24494 0 : fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24495 0 : INSN_UID (insn), padsize);
24496 40 : emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
24497 : }
24498 : }
24499 45424 : }
24500 : #endif
24501 :
24502 : /* AMD Athlon works faster
24503 : when RET is not destination of conditional jump or directly preceded
24504 : by other jump instruction. We avoid the penalty by inserting NOP just
24505 : before the RET instructions in such cases. */
24506 : static void
24507 45144 : ix86_pad_returns (void)
24508 : {
24509 45144 : edge e;
24510 45144 : edge_iterator ei;
24511 :
24512 90312 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24513 : {
24514 45168 : basic_block bb = e->src;
24515 45168 : rtx_insn *ret = BB_END (bb);
24516 45168 : rtx_insn *prev;
24517 45168 : bool replace = false;
24518 :
24519 45158 : if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24520 90326 : || optimize_bb_for_size_p (bb))
24521 23 : continue;
24522 179724 : for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24523 134161 : if (active_insn_p (prev) || LABEL_P (prev))
24524 : break;
24525 45145 : if (prev && LABEL_P (prev))
24526 : {
24527 43 : edge e;
24528 43 : edge_iterator ei;
24529 :
24530 56 : FOR_EACH_EDGE (e, ei, bb->preds)
24531 146 : if (EDGE_FREQUENCY (e) && e->src->index >= 0
24532 97 : && !(e->flags & EDGE_FALLTHRU))
24533 : {
24534 : replace = true;
24535 : break;
24536 : }
24537 : }
24538 43 : if (!replace)
24539 : {
24540 45109 : prev = prev_active_insn (ret);
24541 45109 : if (prev
24542 45109 : && ((JUMP_P (prev) && any_condjump_p (prev))
24543 44673 : || CALL_P (prev)))
24544 : replace = true;
24545 : /* Empty functions get branch mispredict even when
24546 : the jump destination is not visible to us. */
24547 45109 : if (!prev && !optimize_function_for_size_p (cfun))
24548 : replace = true;
24549 : }
24550 44691 : if (replace)
24551 : {
24552 489 : emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24553 489 : delete_insn (ret);
24554 : }
24555 : }
24556 45144 : }
24557 :
24558 : /* Count the minimum number of instructions in BB. Return 4 if the
24559 : number of instructions >= 4. */
24560 :
24561 : static int
24562 42 : ix86_count_insn_bb (basic_block bb)
24563 : {
24564 42 : rtx_insn *insn;
24565 42 : int insn_count = 0;
24566 :
24567 : /* Count number of instructions in this block. Return 4 if the number
24568 : of instructions >= 4. */
24569 297 : FOR_BB_INSNS (bb, insn)
24570 : {
24571 : /* Only happen in exit blocks. */
24572 291 : if (JUMP_P (insn)
24573 291 : && ANY_RETURN_P (PATTERN (insn)))
24574 : break;
24575 :
24576 267 : if (NONDEBUG_INSN_P (insn)
24577 102 : && GET_CODE (PATTERN (insn)) != USE
24578 351 : && GET_CODE (PATTERN (insn)) != CLOBBER)
24579 : {
24580 84 : insn_count++;
24581 84 : if (insn_count >= 4)
24582 : return insn_count;
24583 : }
24584 : }
24585 :
24586 : return insn_count;
24587 : }
24588 :
24589 :
24590 : /* Count the minimum number of instructions in code path in BB.
24591 : Return 4 if the number of instructions >= 4. */
24592 :
24593 : static int
24594 62 : ix86_count_insn (basic_block bb)
24595 : {
24596 62 : edge e;
24597 62 : edge_iterator ei;
24598 62 : int min_prev_count;
24599 :
24600 : /* Only bother counting instructions along paths with no
24601 : more than 2 basic blocks between entry and exit. Given
24602 : that BB has an edge to exit, determine if a predecessor
24603 : of BB has an edge from entry. If so, compute the number
24604 : of instructions in the predecessor block. If there
24605 : happen to be multiple such blocks, compute the minimum. */
24606 62 : min_prev_count = 4;
24607 145 : FOR_EACH_EDGE (e, ei, bb->preds)
24608 : {
24609 109 : edge prev_e;
24610 109 : edge_iterator prev_ei;
24611 :
24612 109 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24613 : {
24614 26 : min_prev_count = 0;
24615 26 : break;
24616 : }
24617 182 : FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24618 : {
24619 109 : if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24620 : {
24621 10 : int count = ix86_count_insn_bb (e->src);
24622 10 : if (count < min_prev_count)
24623 83 : min_prev_count = count;
24624 : break;
24625 : }
24626 : }
24627 : }
24628 :
24629 62 : if (min_prev_count < 4)
24630 32 : min_prev_count += ix86_count_insn_bb (bb);
24631 :
24632 62 : return min_prev_count;
24633 : }
24634 :
24635 : /* Pad short function to 4 instructions. */
24636 :
24637 : static void
24638 63 : ix86_pad_short_function (void)
24639 : {
24640 63 : edge e;
24641 63 : edge_iterator ei;
24642 :
24643 128 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24644 : {
24645 65 : rtx_insn *ret = BB_END (e->src);
24646 65 : if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24647 : {
24648 62 : int insn_count = ix86_count_insn (e->src);
24649 :
24650 : /* Pad short function. */
24651 62 : if (insn_count < 4)
24652 : {
24653 : rtx_insn *insn = ret;
24654 :
24655 : /* Find epilogue. */
24656 : while (insn
24657 60 : && (!NOTE_P (insn)
24658 26 : || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24659 37 : insn = PREV_INSN (insn);
24660 :
24661 23 : if (!insn)
24662 0 : insn = ret;
24663 :
24664 : /* Two NOPs count as one instruction. */
24665 23 : insn_count = 2 * (4 - insn_count);
24666 23 : emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24667 : }
24668 : }
24669 : }
24670 63 : }
24671 :
24672 : /* Fix up a Windows system unwinder issue. If an EH region falls through into
24673 : the epilogue, the Windows system unwinder will apply epilogue logic and
24674 : produce incorrect offsets. This can be avoided by adding a nop between
24675 : the last insn that can throw and the first insn of the epilogue. */
24676 :
24677 : static void
24678 0 : ix86_seh_fixup_eh_fallthru (void)
24679 : {
24680 0 : edge e;
24681 0 : edge_iterator ei;
24682 :
24683 0 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24684 : {
24685 0 : rtx_insn *insn, *next;
24686 :
24687 : /* Find the beginning of the epilogue. */
24688 0 : for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24689 0 : if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24690 : break;
24691 0 : if (insn == NULL)
24692 0 : continue;
24693 :
24694 : /* We only care about preceding insns that can throw. */
24695 0 : insn = prev_active_insn (insn);
24696 0 : if (insn == NULL || !can_throw_internal (insn))
24697 0 : continue;
24698 :
24699 : /* Do not separate calls from their debug information. */
24700 0 : for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
24701 0 : if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24702 0 : insn = next;
24703 : else
24704 : break;
24705 :
24706 0 : emit_insn_after (gen_nops (const1_rtx), insn);
24707 : }
24708 0 : }
24709 : /* Split vector load from parm_decl to elemental loads to avoid STLF
24710 : stalls. */
24711 : static void
24712 978962 : ix86_split_stlf_stall_load ()
24713 : {
24714 978962 : rtx_insn* insn, *start = get_insns ();
24715 978962 : unsigned window = 0;
24716 :
24717 26916436 : for (insn = start; insn; insn = NEXT_INSN (insn))
24718 : {
24719 26915586 : if (!NONDEBUG_INSN_P (insn))
24720 15266812 : continue;
24721 11648774 : window++;
24722 : /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24723 : other, just emulate for pipeline) before stalled load, stlf stall
24724 : case is as fast as no stall cases on CLX.
24725 : Since CFG is freed before machine_reorg, just do a rough
24726 : calculation of the window according to the layout. */
24727 11648774 : if (window > (unsigned) x86_stlf_window_ninsns)
24728 : return;
24729 :
24730 11630798 : if (any_uncondjump_p (insn)
24731 11595036 : || ANY_RETURN_P (PATTERN (insn))
24732 22849311 : || CALL_P (insn))
24733 : return;
24734 :
24735 10670662 : rtx set = single_set (insn);
24736 10670662 : if (!set)
24737 435241 : continue;
24738 10235421 : rtx src = SET_SRC (set);
24739 20470490 : if (!MEM_P (src)
24740 : /* Only handle V2DFmode load since it doesn't need any scratch
24741 : register. */
24742 1462559 : || GET_MODE (src) != E_V2DFmode
24743 5462 : || !MEM_EXPR (src)
24744 10239370 : || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24745 10235069 : continue;
24746 :
24747 352 : rtx zero = CONST0_RTX (V2DFmode);
24748 352 : rtx dest = SET_DEST (set);
24749 352 : rtx m = adjust_address (src, DFmode, 0);
24750 352 : rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24751 352 : emit_insn_before (loadlpd, insn);
24752 352 : m = adjust_address (src, DFmode, 8);
24753 352 : rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24754 352 : if (dump_file && (dump_flags & TDF_DETAILS))
24755 : {
24756 0 : fputs ("Due to potential STLF stall, split instruction:\n",
24757 : dump_file);
24758 0 : print_rtl_single (dump_file, insn);
24759 0 : fputs ("To:\n", dump_file);
24760 0 : print_rtl_single (dump_file, loadlpd);
24761 0 : print_rtl_single (dump_file, loadhpd);
24762 : }
24763 352 : PATTERN (insn) = loadhpd;
24764 352 : INSN_CODE (insn) = -1;
24765 352 : gcc_assert (recog_memoized (insn) != -1);
24766 : }
24767 : }
24768 :
24769 : /* Implement machine specific optimizations. We implement padding of returns
24770 : for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24771 : static void
24772 1481484 : ix86_reorg (void)
24773 : {
24774 : /* We are freeing block_for_insn in the toplev to keep compatibility
24775 : with old MDEP_REORGS that are not CFG based. Recompute it now. */
24776 1481484 : compute_bb_for_insn ();
24777 :
24778 1481484 : if (TARGET_SEH && current_function_has_exception_handlers ())
24779 : ix86_seh_fixup_eh_fallthru ();
24780 :
24781 1481484 : if (optimize && optimize_function_for_speed_p (cfun))
24782 : {
24783 981264 : if (TARGET_SSE2)
24784 978962 : ix86_split_stlf_stall_load ();
24785 981264 : if (TARGET_PAD_SHORT_FUNCTION)
24786 63 : ix86_pad_short_function ();
24787 981201 : else if (TARGET_PAD_RETURNS)
24788 45144 : ix86_pad_returns ();
24789 : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24790 981264 : if (TARGET_FOUR_JUMP_LIMIT)
24791 45424 : ix86_avoid_jump_mispredicts ();
24792 : #endif
24793 : }
24794 1481484 : }
24795 :
24796 : /* Return nonzero when QImode register that must be represented via REX prefix
24797 : is used. */
24798 : bool
24799 8996484 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24800 : {
24801 8996484 : int i;
24802 8996484 : extract_insn_cached (insn);
24803 34100383 : for (i = 0; i < recog_data.n_operands; i++)
24804 4618044 : if (GENERAL_REG_P (recog_data.operand[i])
24805 22293411 : && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24806 : return true;
24807 : return false;
24808 : }
24809 :
24810 : /* Return true when INSN mentions register that must be encoded using REX
24811 : prefix. */
24812 : bool
24813 196639509 : x86_extended_reg_mentioned_p (rtx insn)
24814 : {
24815 196639509 : subrtx_iterator::array_type array;
24816 1030255745 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24817 : {
24818 881829807 : const_rtx x = *iter;
24819 881829807 : if (REG_P (x)
24820 881829807 : && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24821 253328702 : || REX2_INT_REGNO_P (REGNO (x))))
24822 48213571 : return true;
24823 : }
24824 148425938 : return false;
24825 196639509 : }
24826 :
24827 : /* Return true when INSN mentions register that must be encoded using REX2
24828 : prefix. */
24829 : bool
24830 2069860 : x86_extended_rex2reg_mentioned_p (rtx insn)
24831 : {
24832 2069860 : subrtx_iterator::array_type array;
24833 9635336 : FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24834 : {
24835 7566151 : const_rtx x = *iter;
24836 7566151 : if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24837 675 : return true;
24838 : }
24839 2069185 : return false;
24840 2069860 : }
24841 :
24842 : /* Return true when rtx operands mentions register that must be encoded using
24843 : evex prefix. */
24844 : bool
24845 10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
24846 : {
24847 10 : int i;
24848 28 : for (i = 0; i < nops; i++)
24849 22 : if (EXT_REX_SSE_REG_P (operands[i])
24850 40 : || x86_extended_rex2reg_mentioned_p (operands[i]))
24851 4 : return true;
24852 : return false;
24853 : }
24854 :
24855 : /* If profitable, negate (without causing overflow) integer constant
24856 : of mode MODE at location LOC. Return true in this case. */
24857 : bool
24858 5905940 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24859 : {
24860 5905940 : HOST_WIDE_INT val;
24861 :
24862 5905940 : if (!CONST_INT_P (*loc))
24863 : return false;
24864 :
24865 4980409 : switch (mode)
24866 : {
24867 2829283 : case E_DImode:
24868 : /* DImode x86_64 constants must fit in 32 bits. */
24869 2829283 : gcc_assert (x86_64_immediate_operand (*loc, mode));
24870 :
24871 : mode = SImode;
24872 : break;
24873 :
24874 : case E_SImode:
24875 : case E_HImode:
24876 : case E_QImode:
24877 : break;
24878 :
24879 0 : default:
24880 0 : gcc_unreachable ();
24881 : }
24882 :
24883 : /* Avoid overflows. */
24884 4980409 : if (mode_signbit_p (mode, *loc))
24885 : return false;
24886 :
24887 4979888 : val = INTVAL (*loc);
24888 :
24889 : /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24890 : Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24891 4979888 : if ((val < 0 && val != -128)
24892 3272732 : || val == 128)
24893 : {
24894 1718364 : *loc = GEN_INT (-val);
24895 1718364 : return true;
24896 : }
24897 :
24898 : return false;
24899 : }
24900 :
24901 : /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24902 : optabs would emit if we didn't have TFmode patterns. */
24903 :
24904 : void
24905 4512 : x86_emit_floatuns (rtx operands[2])
24906 : {
24907 4512 : rtx_code_label *neglab, *donelab;
24908 4512 : rtx i0, i1, f0, in, out;
24909 4512 : machine_mode mode, inmode;
24910 :
24911 4512 : inmode = GET_MODE (operands[1]);
24912 4512 : gcc_assert (inmode == SImode || inmode == DImode);
24913 :
24914 4512 : out = operands[0];
24915 4512 : in = force_reg (inmode, operands[1]);
24916 4512 : mode = GET_MODE (out);
24917 4512 : neglab = gen_label_rtx ();
24918 4512 : donelab = gen_label_rtx ();
24919 4512 : f0 = gen_reg_rtx (mode);
24920 :
24921 4512 : emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24922 :
24923 4512 : expand_float (out, in, 0);
24924 :
24925 4512 : emit_jump_insn (gen_jump (donelab));
24926 4512 : emit_barrier ();
24927 :
24928 4512 : emit_label (neglab);
24929 :
24930 4512 : i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24931 : 1, OPTAB_DIRECT);
24932 4512 : i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24933 : 1, OPTAB_DIRECT);
24934 4512 : i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24935 :
24936 4512 : expand_float (f0, i0, 0);
24937 :
24938 4512 : emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24939 :
24940 4512 : emit_label (donelab);
24941 4512 : }
24942 :
24943 : /* Return the diagnostic message string if conversion from FROMTYPE to
24944 : TOTYPE is not allowed, NULL otherwise. */
24945 :
24946 : static const char *
24947 1081008935 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24948 : {
24949 1081008935 : machine_mode from_mode = element_mode (fromtype);
24950 1081008935 : machine_mode to_mode = element_mode (totype);
24951 :
24952 1081008935 : if (!TARGET_SSE2 && from_mode != to_mode)
24953 : {
24954 : /* Do no allow conversions to/from BFmode/HFmode scalar types
24955 : when TARGET_SSE2 is not available. */
24956 468009 : if (from_mode == BFmode)
24957 : return N_("invalid conversion from type %<__bf16%> "
24958 : "without option %<-msse2%>");
24959 468008 : if (from_mode == HFmode)
24960 : return N_("invalid conversion from type %<_Float16%> "
24961 : "without option %<-msse2%>");
24962 468008 : if (to_mode == BFmode)
24963 : return N_("invalid conversion to type %<__bf16%> "
24964 : "without option %<-msse2%>");
24965 468008 : if (to_mode == HFmode)
24966 : return N_("invalid conversion to type %<_Float16%> "
24967 : "without option %<-msse2%>");
24968 : }
24969 :
24970 : /* Warn for silent implicit conversion between __bf16 and short,
24971 : since __bfloat16 is refined as real __bf16 instead of short
24972 : since GCC13. */
24973 1081008933 : if (element_mode (fromtype) != element_mode (totype)
24974 1081008933 : && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24975 : {
24976 : /* Warn for silent implicit conversion where user may expect
24977 : a bitcast. */
24978 7766483 : if ((TYPE_MODE (fromtype) == BFmode
24979 279 : && TYPE_MODE (totype) == HImode)
24980 7766761 : || (TYPE_MODE (totype) == BFmode
24981 423 : && TYPE_MODE (fromtype) == HImode))
24982 1 : warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24983 : "to real %<__bf16%> since GCC 13.1, be careful of "
24984 : "implicit conversion between %<__bf16%> and %<short%>; "
24985 : "an explicit bitcast may be needed here");
24986 : }
24987 :
24988 : /* Conversion allowed. */
24989 : return NULL;
24990 : }
24991 :
24992 : /* Return the diagnostic message string if the unary operation OP is
24993 : not permitted on TYPE, NULL otherwise. */
24994 :
24995 : static const char *
24996 90909272 : ix86_invalid_unary_op (int op, const_tree type)
24997 : {
24998 90909272 : machine_mode mmode = element_mode (type);
24999 : /* Reject all single-operand operations on BFmode/HFmode except for &
25000 : when TARGET_SSE2 is not available. */
25001 90909272 : if (!TARGET_SSE2 && op != ADDR_EXPR)
25002 : {
25003 111098 : if (mmode == BFmode)
25004 : return N_("operation not permitted on type %<__bf16%> "
25005 : "without option %<-msse2%>");
25006 111098 : if (mmode == HFmode)
25007 0 : return N_("operation not permitted on type %<_Float16%> "
25008 : "without option %<-msse2%>");
25009 : }
25010 :
25011 : /* Operation allowed. */
25012 : return NULL;
25013 : }
25014 :
25015 : /* Return the diagnostic message string if the binary operation OP is
25016 : not permitted on TYPE1 and TYPE2, NULL otherwise. */
25017 :
25018 : static const char *
25019 160518388 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
25020 : const_tree type2)
25021 : {
25022 160518388 : machine_mode type1_mode = element_mode (type1);
25023 160518388 : machine_mode type2_mode = element_mode (type2);
25024 : /* Reject all 2-operand operations on BFmode or HFmode
25025 : when TARGET_SSE2 is not available. */
25026 160518388 : if (!TARGET_SSE2)
25027 : {
25028 1008820 : if (type1_mode == BFmode || type2_mode == BFmode)
25029 : return N_("operation not permitted on type %<__bf16%> "
25030 : "without option %<-msse2%>");
25031 :
25032 1008820 : if (type1_mode == HFmode || type2_mode == HFmode)
25033 0 : return N_("operation not permitted on type %<_Float16%> "
25034 : "without option %<-msse2%>");
25035 : }
25036 :
25037 : /* Operation allowed. */
25038 : return NULL;
25039 : }
25040 :
25041 :
25042 : /* Target hook for scalar_mode_supported_p. */
25043 : static bool
25044 4583380 : ix86_scalar_mode_supported_p (scalar_mode mode)
25045 : {
25046 4583380 : if (DECIMAL_FLOAT_MODE_P (mode))
25047 631001 : return default_decimal_float_supported_p ();
25048 3952379 : else if (mode == TFmode)
25049 : return true;
25050 3629887 : else if (mode == HFmode || mode == BFmode)
25051 : return true;
25052 : else
25053 2986886 : return default_scalar_mode_supported_p (mode);
25054 : }
25055 :
25056 : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
25057 : if MODE is HFmode, and punt to the generic implementation otherwise. */
25058 :
25059 : static bool
25060 2214613 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
25061 : {
25062 : /* NB: Always return TRUE for HFmode so that the _Float16 type will
25063 : be defined by the C front-end for AVX512FP16 intrinsics. We will
25064 : issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
25065 : enabled. */
25066 1893588 : return ((mode == HFmode || mode == BFmode)
25067 3787176 : ? true
25068 1572563 : : default_libgcc_floating_mode_supported_p (mode));
25069 : }
25070 :
25071 : /* Implements target hook vector_mode_supported_p. */
25072 : static bool
25073 1345721517 : ix86_vector_mode_supported_p (machine_mode mode)
25074 : {
25075 : /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
25076 : either. */
25077 1482728891 : if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
25078 : return false;
25079 1345721119 : if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
25080 : return true;
25081 1131608173 : if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
25082 : return true;
25083 507241341 : if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
25084 : return true;
25085 366713585 : if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
25086 : return true;
25087 232509567 : if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
25088 232453211 : && VALID_MMX_REG_MODE (mode))
25089 : return true;
25090 33475189 : if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
25091 32839907 : && VALID_MMX_REG_MODE_3DNOW (mode))
25092 : return true;
25093 22351555 : if (mode == V2QImode)
25094 24808 : return true;
25095 : return false;
25096 : }
25097 :
25098 : /* Target hook for c_mode_for_suffix. */
25099 : static machine_mode
25100 196240 : ix86_c_mode_for_suffix (char suffix)
25101 : {
25102 196240 : if (suffix == 'q')
25103 : return TFmode;
25104 37 : if (suffix == 'w')
25105 : return XFmode;
25106 :
25107 0 : return VOIDmode;
25108 : }
25109 :
25110 : /* Helper function to map common constraints to non-EGPR ones.
25111 : All related constraints have h prefix, and h plus Upper letter
25112 : means the constraint is strictly EGPR enabled, while h plus
25113 : lower letter indicates the constraint is strictly gpr16 only.
25114 :
25115 : Specially for "g" constraint, split it to rmi as there is
25116 : no corresponding general constraint define for backend.
25117 :
25118 : Here is the full list to map constraints that may involve
25119 : gpr to h prefixed.
25120 :
25121 : "g" -> "jrjmi"
25122 : "r" -> "jr"
25123 : "m" -> "jm"
25124 : "<" -> "j<"
25125 : ">" -> "j>"
25126 : "o" -> "jo"
25127 : "V" -> "jV"
25128 : "p" -> "jp"
25129 : "Bm" -> "ja"
25130 : */
25131 :
25132 43 : static void map_egpr_constraints (vec<const char *> &constraints)
25133 : {
25134 53 : for (size_t i = 0; i < constraints.length(); i++)
25135 : {
25136 10 : const char *cur = constraints[i];
25137 :
25138 10 : if (startswith (cur, "=@cc"))
25139 0 : continue;
25140 :
25141 10 : int len = strlen (cur);
25142 10 : auto_vec<char> buf;
25143 :
25144 24 : for (int j = 0; j < len; j++)
25145 : {
25146 14 : switch (cur[j])
25147 : {
25148 2 : case 'g':
25149 2 : buf.safe_push ('j');
25150 2 : buf.safe_push ('r');
25151 2 : buf.safe_push ('j');
25152 2 : buf.safe_push ('m');
25153 2 : buf.safe_push ('i');
25154 2 : break;
25155 8 : case 'r':
25156 8 : case 'm':
25157 8 : case '<':
25158 8 : case '>':
25159 8 : case 'o':
25160 8 : case 'V':
25161 8 : case 'p':
25162 8 : buf.safe_push ('j');
25163 8 : buf.safe_push (cur[j]);
25164 8 : break;
25165 0 : case 'B':
25166 0 : if (cur[j + 1] == 'm')
25167 : {
25168 0 : buf.safe_push ('j');
25169 0 : buf.safe_push ('a');
25170 0 : j++;
25171 : }
25172 : else
25173 : {
25174 0 : buf.safe_push (cur[j]);
25175 0 : buf.safe_push (cur[j + 1]);
25176 0 : j++;
25177 : }
25178 : break;
25179 0 : case 'T':
25180 0 : case 'Y':
25181 0 : case 'W':
25182 0 : case 'j':
25183 0 : buf.safe_push (cur[j]);
25184 0 : buf.safe_push (cur[j + 1]);
25185 0 : j++;
25186 0 : break;
25187 0 : case '{':
25188 0 : do
25189 : {
25190 0 : buf.safe_push (cur[j]);
25191 0 : } while (cur[j++] != '}');
25192 : break;
25193 4 : default:
25194 4 : buf.safe_push (cur[j]);
25195 4 : break;
25196 : }
25197 : }
25198 10 : buf.safe_push ('\0');
25199 20 : constraints[i] = xstrdup (buf.address ());
25200 10 : }
25201 43 : }
25202 :
25203 : /* Worker function for TARGET_MD_ASM_ADJUST.
25204 :
25205 : We implement asm flag outputs, and maintain source compatibility
25206 : with the old cc0-based compiler. */
25207 :
25208 : static rtx_insn *
25209 108265 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
25210 : vec<machine_mode> & /*input_modes*/,
25211 : vec<const char *> &constraints, vec<rtx> &/*uses*/,
25212 : vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
25213 : location_t loc)
25214 : {
25215 108265 : bool saw_asm_flag = false;
25216 :
25217 108265 : start_sequence ();
25218 :
25219 108265 : if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
25220 43 : map_egpr_constraints (constraints);
25221 :
25222 292349 : for (unsigned i = 0, n = outputs.length (); i < n; ++i)
25223 : {
25224 76686 : const char *con = constraints[i];
25225 76686 : if (!startswith (con, "=@cc"))
25226 76598 : continue;
25227 88 : con += 4;
25228 88 : if (strchr (con, ',') != NULL)
25229 : {
25230 1 : error_at (loc, "alternatives not allowed in %<asm%> flag output");
25231 1 : continue;
25232 : }
25233 :
25234 87 : bool invert = false;
25235 87 : if (con[0] == 'n')
25236 19 : invert = true, con++;
25237 :
25238 87 : machine_mode mode = CCmode;
25239 87 : rtx_code code = UNKNOWN;
25240 :
25241 87 : switch (con[0])
25242 : {
25243 15 : case 'a':
25244 15 : if (con[1] == 0)
25245 : mode = CCAmode, code = EQ;
25246 4 : else if (con[1] == 'e' && con[2] == 0)
25247 : mode = CCCmode, code = NE;
25248 : break;
25249 11 : case 'b':
25250 11 : if (con[1] == 0)
25251 : mode = CCCmode, code = EQ;
25252 6 : else if (con[1] == 'e' && con[2] == 0)
25253 : mode = CCAmode, code = NE;
25254 : break;
25255 14 : case 'c':
25256 14 : if (con[1] == 0)
25257 : mode = CCCmode, code = EQ;
25258 : break;
25259 8 : case 'e':
25260 8 : if (con[1] == 0)
25261 : mode = CCZmode, code = EQ;
25262 : break;
25263 11 : case 'g':
25264 11 : if (con[1] == 0)
25265 : mode = CCGCmode, code = GT;
25266 5 : else if (con[1] == 'e' && con[2] == 0)
25267 : mode = CCGCmode, code = GE;
25268 : break;
25269 10 : case 'l':
25270 10 : if (con[1] == 0)
25271 : mode = CCGCmode, code = LT;
25272 5 : else if (con[1] == 'e' && con[2] == 0)
25273 : mode = CCGCmode, code = LE;
25274 : break;
25275 4 : case 'o':
25276 4 : if (con[1] == 0)
25277 : mode = CCOmode, code = EQ;
25278 : break;
25279 4 : case 'p':
25280 4 : if (con[1] == 0)
25281 : mode = CCPmode, code = EQ;
25282 : break;
25283 4 : case 's':
25284 4 : if (con[1] == 0)
25285 : mode = CCSmode, code = EQ;
25286 : break;
25287 6 : case 'z':
25288 6 : if (con[1] == 0)
25289 : mode = CCZmode, code = EQ;
25290 : break;
25291 : }
25292 1 : if (code == UNKNOWN)
25293 : {
25294 1 : error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
25295 1 : continue;
25296 : }
25297 86 : if (invert)
25298 19 : code = reverse_condition (code);
25299 :
25300 86 : rtx dest = outputs[i];
25301 86 : if (!saw_asm_flag)
25302 : {
25303 : /* This is the first asm flag output. Here we put the flags
25304 : register in as the real output and adjust the condition to
25305 : allow it. */
25306 75 : constraints[i] = "=Bf";
25307 75 : outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
25308 75 : saw_asm_flag = true;
25309 : }
25310 : else
25311 : {
25312 : /* We don't need the flags register as output twice. */
25313 11 : constraints[i] = "=X";
25314 11 : outputs[i] = gen_rtx_SCRATCH (SImode);
25315 : }
25316 :
25317 86 : rtx x = gen_rtx_REG (mode, FLAGS_REG);
25318 86 : x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
25319 :
25320 86 : machine_mode dest_mode = GET_MODE (dest);
25321 86 : if (!SCALAR_INT_MODE_P (dest_mode))
25322 : {
25323 3 : error_at (loc, "invalid type for %<asm%> flag output");
25324 3 : continue;
25325 : }
25326 :
25327 83 : if (dest_mode == QImode)
25328 73 : emit_insn (gen_rtx_SET (dest, x));
25329 : else
25330 : {
25331 10 : rtx reg = gen_reg_rtx (QImode);
25332 10 : emit_insn (gen_rtx_SET (reg, x));
25333 :
25334 10 : reg = convert_to_mode (dest_mode, reg, 1);
25335 10 : emit_move_insn (dest, reg);
25336 : }
25337 : }
25338 :
25339 108265 : rtx_insn *seq = end_sequence ();
25340 :
25341 108265 : if (saw_asm_flag)
25342 : return seq;
25343 : else
25344 : {
25345 : /* If we had no asm flag outputs, clobber the flags. */
25346 108190 : clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
25347 108190 : SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
25348 108190 : return NULL;
25349 : }
25350 : }
25351 :
25352 : /* Implements target vector targetm.asm.encode_section_info. */
25353 :
25354 : static void ATTRIBUTE_UNUSED
25355 9919141 : ix86_encode_section_info (tree decl, rtx rtl, int first)
25356 : {
25357 9919141 : default_encode_section_info (decl, rtl, first);
25358 :
25359 9919141 : if (ix86_in_large_data_p (decl))
25360 32 : SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25361 9919141 : }
25362 :
25363 : /* Worker function for REVERSE_CONDITION. */
25364 :
25365 : enum rtx_code
25366 31648513 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
25367 : {
25368 31648513 : return (mode == CCFPmode
25369 31648513 : ? reverse_condition_maybe_unordered (code)
25370 27290037 : : reverse_condition (code));
25371 : }
25372 :
25373 : /* Output code to perform an x87 FP register move, from OPERANDS[1]
25374 : to OPERANDS[0]. */
25375 :
25376 : const char *
25377 648974 : output_387_reg_move (rtx_insn *insn, rtx *operands)
25378 : {
25379 648974 : if (REG_P (operands[0]))
25380 : {
25381 543915 : if (REG_P (operands[1])
25382 543915 : && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25383 : {
25384 295699 : if (REGNO (operands[0]) == FIRST_STACK_REG)
25385 275113 : return output_387_ffreep (operands, 0);
25386 : return "fstp\t%y0";
25387 : }
25388 248216 : if (STACK_TOP_P (operands[0]))
25389 248216 : return "fld%Z1\t%y1";
25390 : return "fst\t%y0";
25391 : }
25392 105059 : else if (MEM_P (operands[0]))
25393 : {
25394 105059 : gcc_assert (REG_P (operands[1]));
25395 105059 : if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25396 : return "fstp%Z0\t%y0";
25397 : else
25398 : {
25399 : /* There is no non-popping store to memory for XFmode.
25400 : So if we need one, follow the store with a load. */
25401 6219 : if (GET_MODE (operands[0]) == XFmode)
25402 : return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
25403 : else
25404 1888 : return "fst%Z0\t%y0";
25405 : }
25406 : }
25407 : else
25408 0 : gcc_unreachable();
25409 : }
25410 : #ifdef TARGET_SOLARIS
25411 : /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25412 :
25413 : static void
25414 : i386_solaris_elf_named_section (const char *name, unsigned int flags,
25415 : tree decl)
25416 : {
25417 : /* With Binutils 2.15, the "@unwind" marker must be specified on
25418 : every occurrence of the ".eh_frame" section, not just the first
25419 : one. */
25420 : if (TARGET_64BIT
25421 : && strcmp (name, ".eh_frame") == 0)
25422 : {
25423 : fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25424 : flags & SECTION_WRITE ? "aw" : "a");
25425 : return;
25426 : }
25427 :
25428 : #if HAVE_SOLARIS_AS
25429 : if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
25430 : {
25431 : solaris_elf_asm_comdat_section (name, flags, decl);
25432 : return;
25433 : }
25434 :
25435 : /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
25436 : SPARC assembler. One cannot mix single-letter flags and #exclude, so
25437 : only emit the latter here. */
25438 : if (flags & SECTION_EXCLUDE)
25439 : {
25440 : fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
25441 : return;
25442 : }
25443 : #endif
25444 :
25445 : default_elf_asm_named_section (name, flags, decl);
25446 : }
25447 : #endif /* TARGET_SOLARIS */
25448 :
25449 : /* Return the mangling of TYPE if it is an extended fundamental type. */
25450 :
25451 : static const char *
25452 1044940986 : ix86_mangle_type (const_tree type)
25453 : {
25454 1044940986 : type = TYPE_MAIN_VARIANT (type);
25455 :
25456 1044940986 : if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25457 : && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25458 : return NULL;
25459 :
25460 566870465 : if (type == float128_type_node || type == float64x_type_node)
25461 : return NULL;
25462 :
25463 566182858 : switch (TYPE_MODE (type))
25464 : {
25465 : case E_BFmode:
25466 : return "DF16b";
25467 301154 : case E_HFmode:
25468 : /* _Float16 is "DF16_".
25469 : Align with clang's decision in https://reviews.llvm.org/D33719. */
25470 301154 : return "DF16_";
25471 639048 : case E_TFmode:
25472 : /* __float128 is "g". */
25473 639048 : return "g";
25474 7854653 : case E_XFmode:
25475 : /* "long double" or __float80 is "e". */
25476 7854653 : return "e";
25477 : default:
25478 : return NULL;
25479 : }
25480 : }
25481 :
25482 : /* Create C++ tinfo symbols for only conditionally available fundamental
25483 : types. */
25484 :
25485 : static void
25486 5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
25487 : {
25488 5 : extern tree ix86_float16_type_node;
25489 5 : extern tree ix86_bf16_type_node;
25490 :
25491 5 : if (!TARGET_SSE2)
25492 : {
25493 0 : if (!float16_type_node)
25494 0 : float16_type_node = ix86_float16_type_node;
25495 0 : if (!bfloat16_type_node)
25496 0 : bfloat16_type_node = ix86_bf16_type_node;
25497 0 : callback (float16_type_node);
25498 0 : callback (bfloat16_type_node);
25499 0 : float16_type_node = NULL_TREE;
25500 0 : bfloat16_type_node = NULL_TREE;
25501 : }
25502 5 : }
25503 :
25504 : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
25505 :
25506 : static tree
25507 330 : ix86_stack_protect_guard (void)
25508 : {
25509 330 : if (TARGET_SSP_TLS_GUARD)
25510 : {
25511 254 : tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
25512 254 : int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
25513 254 : tree type = build_qualified_type (type_node, qual);
25514 254 : tree t;
25515 :
25516 254 : if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25517 : {
25518 1 : t = ix86_tls_stack_chk_guard_decl;
25519 :
25520 1 : if (t == NULL)
25521 : {
25522 1 : rtx x;
25523 :
25524 1 : t = build_decl
25525 1 : (UNKNOWN_LOCATION, VAR_DECL,
25526 : get_identifier (ix86_stack_protector_guard_symbol_str),
25527 : type);
25528 1 : TREE_STATIC (t) = 1;
25529 1 : TREE_PUBLIC (t) = 1;
25530 1 : DECL_EXTERNAL (t) = 1;
25531 1 : TREE_USED (t) = 1;
25532 1 : TREE_THIS_VOLATILE (t) = 1;
25533 1 : DECL_ARTIFICIAL (t) = 1;
25534 1 : DECL_IGNORED_P (t) = 1;
25535 :
25536 : /* Do not share RTL as the declaration is visible outside of
25537 : current function. */
25538 1 : x = DECL_RTL (t);
25539 1 : RTX_FLAG (x, used) = 1;
25540 :
25541 1 : ix86_tls_stack_chk_guard_decl = t;
25542 : }
25543 : }
25544 : else
25545 : {
25546 253 : tree asptrtype = build_pointer_type (type);
25547 :
25548 253 : t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25549 253 : t = build2 (MEM_REF, asptrtype, t,
25550 : build_int_cst (asptrtype, 0));
25551 253 : TREE_THIS_VOLATILE (t) = 1;
25552 : }
25553 :
25554 254 : return t;
25555 : }
25556 :
25557 76 : return default_stack_protect_guard ();
25558 : }
25559 :
25560 : /* Implement TARGET_STACK_PROTECT_GUARD_SYMBOL_P. */
25561 :
25562 : static bool
25563 210021 : ix86_stack_protect_guard_symbol_p (void)
25564 : {
25565 210021 : return TARGET_SSP_GLOBAL_GUARD;
25566 : }
25567 :
25568 : static bool
25569 903 : ix86_stack_protect_runtime_enabled_p (void)
25570 : {
25571 : /* Naked functions should not enable stack protector. */
25572 903 : return !ix86_function_naked (current_function_decl);
25573 : }
25574 :
25575 : /* For 32-bit code we can save PIC register setup by using
25576 : __stack_chk_fail_local hidden function instead of calling
25577 : __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25578 : register, so it is better to call __stack_chk_fail directly. */
25579 :
25580 : static tree ATTRIBUTE_UNUSED
25581 322 : ix86_stack_protect_fail (void)
25582 : {
25583 322 : return TARGET_64BIT
25584 322 : ? default_external_stack_protect_fail ()
25585 1 : : default_hidden_stack_protect_fail ();
25586 : }
25587 :
25588 : /* Select a format to encode pointers in exception handling data. CODE
25589 : is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25590 : true if the symbol may be affected by dynamic relocations.
25591 :
25592 : ??? All x86 object file formats are capable of representing this.
25593 : After all, the relocation needed is the same as for the call insn.
25594 : Whether or not a particular assembler allows us to enter such, I
25595 : guess we'll have to see. */
25596 :
25597 : int
25598 787705 : asm_preferred_eh_data_format (int code, int global)
25599 : {
25600 : /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25601 787705 : if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25602 : {
25603 38900 : int type = DW_EH_PE_sdata8;
25604 38900 : if (ptr_mode == SImode
25605 24928 : || ix86_cmodel == CM_SMALL_PIC
25606 38986 : || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25607 : type = DW_EH_PE_sdata4;
25608 54465 : return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25609 : }
25610 :
25611 748805 : if (ix86_cmodel == CM_SMALL
25612 18662 : || (ix86_cmodel == CM_MEDIUM && code))
25613 730156 : return DW_EH_PE_udata4;
25614 :
25615 : return DW_EH_PE_absptr;
25616 : }
25617 :
25618 : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
25619 : from ix86_vector_costs::add_stmt_cost. */
25620 : static int
25621 15280036 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
25622 : machine_mode mode)
25623 : {
25624 15280036 : bool fp = FLOAT_MODE_P (mode);
25625 15280036 : int index;
25626 15280036 : switch (type_of_cost)
25627 : {
25628 1744155 : case scalar_stmt:
25629 1744155 : return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25630 :
25631 1816823 : case scalar_load:
25632 : /* load/store costs are relative to register move which is 2. Recompute
25633 : it to COSTS_N_INSNS so everything have same base. */
25634 3633646 : return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25635 1816823 : : ix86_cost->int_load [2]) / 2;
25636 :
25637 3955936 : case scalar_store:
25638 7911872 : return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25639 3955936 : : ix86_cost->int_store [2]) / 2;
25640 :
25641 1198915 : case vector_stmt:
25642 2397830 : return ix86_vec_cost (mode,
25643 2397830 : fp ? ix86_cost->addss : ix86_cost->sse_op);
25644 :
25645 1932301 : case vector_load:
25646 1932301 : index = sse_store_index (mode);
25647 : /* See PR82713 - we may end up being called on non-vector type. */
25648 1932301 : if (index < 0)
25649 99592 : index = 2;
25650 1932301 : return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25651 :
25652 976071 : case vector_store:
25653 976071 : index = sse_store_index (mode);
25654 : /* See PR82713 - we may end up being called on non-vector type. */
25655 976071 : if (index < 0)
25656 91647 : index = 2;
25657 976071 : return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25658 :
25659 831614 : case vec_to_scalar:
25660 831614 : case scalar_to_vec:
25661 831614 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25662 :
25663 : /* We should have separate costs for unaligned loads and gather/scatter.
25664 : Do that incrementally. */
25665 513407 : case unaligned_load:
25666 513407 : index = sse_store_index (mode);
25667 : /* See PR82713 - we may end up being called on non-vector type. */
25668 513407 : if (index < 0)
25669 2708 : index = 2;
25670 513407 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25671 :
25672 841768 : case unaligned_store:
25673 841768 : index = sse_store_index (mode);
25674 : /* See PR82713 - we may end up being called on non-vector type. */
25675 841768 : if (index < 0)
25676 17180 : index = 2;
25677 841768 : return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25678 :
25679 0 : case vector_gather_load:
25680 0 : return ix86_vec_cost (mode,
25681 0 : COSTS_N_INSNS
25682 : (ix86_cost->gather_static
25683 : + ix86_cost->gather_per_elt
25684 0 : * GET_MODE_NUNITS (mode)) / 2);
25685 :
25686 0 : case vector_scatter_store:
25687 0 : return ix86_vec_cost (mode,
25688 0 : COSTS_N_INSNS
25689 : (ix86_cost->scatter_static
25690 : + ix86_cost->scatter_per_elt
25691 0 : * GET_MODE_NUNITS (mode)) / 2);
25692 :
25693 356788 : case cond_branch_taken:
25694 356788 : return ix86_cost->cond_taken_branch_cost;
25695 :
25696 8538 : case cond_branch_not_taken:
25697 8538 : return ix86_cost->cond_not_taken_branch_cost;
25698 :
25699 285446 : case vec_perm:
25700 285446 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25701 :
25702 89252 : case vec_promote_demote:
25703 89252 : if (fp)
25704 11654 : return vec_fp_conversion_cost (ix86_tune_cost, mode);
25705 77598 : return ix86_vec_cost (mode, ix86_cost->sse_op);
25706 :
25707 729022 : case vec_construct:
25708 729022 : {
25709 729022 : int n = GET_MODE_NUNITS (mode);
25710 : /* N - 1 element inserts into an SSE vector, the possible
25711 : GPR -> XMM move is accounted for in add_stmt_cost. */
25712 1458044 : if (GET_MODE_BITSIZE (mode) <= 128)
25713 722698 : return (n - 1) * ix86_cost->sse_op;
25714 : /* One vinserti128 for combining two SSE vectors for AVX256. */
25715 12648 : else if (GET_MODE_BITSIZE (mode) == 256)
25716 5052 : return ((n - 2) * ix86_cost->sse_op
25717 5052 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25718 : /* One vinserti64x4 and two vinserti128 for combining SSE
25719 : and AVX256 vectors to AVX512. */
25720 2544 : else if (GET_MODE_BITSIZE (mode) == 512)
25721 : {
25722 1272 : machine_mode half_mode
25723 1272 : = mode_for_vector (GET_MODE_INNER (mode),
25724 2544 : GET_MODE_NUNITS (mode) / 2).require ();
25725 1272 : return ((n - 4) * ix86_cost->sse_op
25726 1272 : + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
25727 1272 : + ix86_vec_cost (mode, ix86_cost->sse_op));
25728 : }
25729 0 : gcc_unreachable ();
25730 : }
25731 :
25732 0 : default:
25733 0 : gcc_unreachable ();
25734 : }
25735 : }
25736 :
25737 : /* Implement targetm.vectorize.builtin_vectorization_cost. */
25738 : static int
25739 9184377 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25740 : tree vectype, int)
25741 : {
25742 9184377 : machine_mode mode = TImode;
25743 9184377 : if (vectype != NULL)
25744 5836730 : mode = TYPE_MODE (vectype);
25745 9184377 : return ix86_default_vector_cost (type_of_cost, mode);
25746 : }
25747 :
25748 :
25749 : /* This function returns the calling abi specific va_list type node.
25750 : It returns the FNDECL specific va_list type. */
25751 :
25752 : static tree
25753 47389 : ix86_fn_abi_va_list (tree fndecl)
25754 : {
25755 47389 : if (!TARGET_64BIT)
25756 726 : return va_list_type_node;
25757 46663 : gcc_assert (fndecl != NULL_TREE);
25758 :
25759 46663 : if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
25760 12868 : return ms_va_list_type_node;
25761 : else
25762 33795 : return sysv_va_list_type_node;
25763 : }
25764 :
25765 : /* Returns the canonical va_list type specified by TYPE. If there
25766 : is no valid TYPE provided, it return NULL_TREE. */
25767 :
25768 : static tree
25769 246475 : ix86_canonical_va_list_type (tree type)
25770 : {
25771 246475 : if (TARGET_64BIT)
25772 : {
25773 245973 : if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
25774 5944 : return ms_va_list_type_node;
25775 :
25776 240029 : if ((TREE_CODE (type) == ARRAY_TYPE
25777 49945 : && integer_zerop (array_type_nelts_minus_one (type)))
25778 240029 : || POINTER_TYPE_P (type))
25779 : {
25780 188197 : tree elem_type = TREE_TYPE (type);
25781 188197 : if (TREE_CODE (elem_type) == RECORD_TYPE
25782 339650 : && lookup_attribute ("sysv_abi va_list",
25783 151453 : TYPE_ATTRIBUTES (elem_type)))
25784 151453 : return sysv_va_list_type_node;
25785 : }
25786 :
25787 88576 : return NULL_TREE;
25788 : }
25789 :
25790 502 : return std_canonical_va_list_type (type);
25791 : }
25792 :
25793 : /* Iterate through the target-specific builtin types for va_list.
25794 : IDX denotes the iterator, *PTREE is set to the result type of
25795 : the va_list builtin, and *PNAME to its internal type.
25796 : Returns zero if there is no element for this index, otherwise
25797 : IDX should be increased upon the next call.
25798 : Note, do not iterate a base builtin's name like __builtin_va_list.
25799 : Used from c_common_nodes_and_builtins. */
25800 :
25801 : static int
25802 619196 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25803 : {
25804 619196 : if (TARGET_64BIT)
25805 : {
25806 613824 : switch (idx)
25807 : {
25808 : default:
25809 : break;
25810 :
25811 204608 : case 0:
25812 204608 : *ptree = ms_va_list_type_node;
25813 204608 : *pname = "__builtin_ms_va_list";
25814 204608 : return 1;
25815 :
25816 204608 : case 1:
25817 204608 : *ptree = sysv_va_list_type_node;
25818 204608 : *pname = "__builtin_sysv_va_list";
25819 204608 : return 1;
25820 : }
25821 : }
25822 :
25823 : return 0;
25824 : }
25825 :
25826 : #undef TARGET_SCHED_DISPATCH
25827 : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25828 : #undef TARGET_SCHED_DISPATCH_DO
25829 : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25830 : #undef TARGET_SCHED_REASSOCIATION_WIDTH
25831 : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25832 : #undef TARGET_SCHED_REORDER
25833 : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25834 : #undef TARGET_SCHED_ADJUST_PRIORITY
25835 : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25836 : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25837 : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25838 : ix86_dependencies_evaluation_hook
25839 :
25840 :
25841 : /* Implementation of reassociation_width target hook used by
25842 : reassoc phase to identify parallelism level in reassociated
25843 : tree. Statements tree_code is passed in OPC. Arguments type
25844 : is passed in MODE. */
25845 :
25846 : static int
25847 28501 : ix86_reassociation_width (unsigned int op, machine_mode mode)
25848 : {
25849 28501 : int width = 1;
25850 : /* Vector part. */
25851 28501 : if (VECTOR_MODE_P (mode))
25852 : {
25853 8372 : int div = 1;
25854 8372 : if (INTEGRAL_MODE_P (mode))
25855 2705 : width = ix86_cost->reassoc_vec_int;
25856 5667 : else if (FLOAT_MODE_P (mode))
25857 5667 : width = ix86_cost->reassoc_vec_fp;
25858 :
25859 8372 : if (width == 1)
25860 : return 1;
25861 :
25862 : /* Znver1-4 Integer vector instructions execute in FP unit
25863 : and can execute 3 additions and one multiplication per cycle. */
25864 8366 : if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25865 8366 : || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
25866 8366 : || ix86_tune == PROCESSOR_C86_4G_M4
25867 8366 : || ix86_tune == PROCESSOR_C86_4G_M6
25868 8366 : || ix86_tune == PROCESSOR_C86_4G_M7)
25869 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25870 : return 1;
25871 : /* Znver5 can do 2 integer multiplications per cycle with latency
25872 : of 3. */
25873 8366 : if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
25874 0 : && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25875 8366 : width = 6;
25876 :
25877 : /* Account for targets that splits wide vectors into multiple parts. */
25878 8366 : if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25879 0 : div = GET_MODE_BITSIZE (mode) / 256;
25880 8366 : else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25881 0 : div = GET_MODE_BITSIZE (mode) / 128;
25882 8366 : else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25883 0 : div = GET_MODE_BITSIZE (mode) / 64;
25884 8366 : width = (width + div - 1) / div;
25885 8366 : }
25886 : /* Scalar part. */
25887 : else if (INTEGRAL_MODE_P (mode))
25888 14151 : width = ix86_cost->reassoc_int;
25889 : else if (FLOAT_MODE_P (mode))
25890 5978 : width = ix86_cost->reassoc_fp;
25891 :
25892 : /* Avoid using too many registers in 32bit mode. */
25893 28495 : if (!TARGET_64BIT && width > 2)
25894 28501 : width = 2;
25895 : return width;
25896 : }
25897 :
25898 : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
25899 : place emms and femms instructions. */
25900 :
25901 : static machine_mode
25902 5152797 : ix86_preferred_simd_mode (scalar_mode mode)
25903 : {
25904 5152797 : if (!TARGET_SSE)
25905 859 : return word_mode;
25906 :
25907 5151938 : switch (mode)
25908 : {
25909 417353 : case E_QImode:
25910 417353 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25911 : return V64QImode;
25912 405925 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25913 : return V32QImode;
25914 : else
25915 385750 : return V16QImode;
25916 :
25917 195980 : case E_HImode:
25918 195980 : if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25919 : return V32HImode;
25920 185510 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25921 : return V16HImode;
25922 : else
25923 169365 : return V8HImode;
25924 :
25925 1517797 : case E_SImode:
25926 1517797 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25927 : return V16SImode;
25928 1450244 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25929 : return V8SImode;
25930 : else
25931 1297123 : return V4SImode;
25932 :
25933 1881210 : case E_DImode:
25934 1881210 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25935 : return V8DImode;
25936 1477122 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25937 : return V4DImode;
25938 : else
25939 1415192 : return V2DImode;
25940 :
25941 142648 : case E_HFmode:
25942 142648 : if (TARGET_AVX512FP16)
25943 : {
25944 141902 : if (TARGET_AVX512VL)
25945 : {
25946 69031 : if (TARGET_PREFER_AVX128)
25947 : return V8HFmode;
25948 68809 : else if (TARGET_PREFER_AVX256)
25949 : return V16HFmode;
25950 : }
25951 139559 : return V32HFmode;
25952 : }
25953 746 : return word_mode;
25954 :
25955 62894 : case E_BFmode:
25956 62894 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25957 : return V32BFmode;
25958 26462 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25959 : return V16BFmode;
25960 : else
25961 13459 : return V8BFmode;
25962 :
25963 610034 : case E_SFmode:
25964 610034 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25965 : return V16SFmode;
25966 409048 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25967 : return V8SFmode;
25968 : else
25969 342695 : return V4SFmode;
25970 :
25971 288423 : case E_DFmode:
25972 288423 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25973 : return V8DFmode;
25974 167933 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25975 : return V4DFmode;
25976 115465 : else if (TARGET_SSE2)
25977 : return V2DFmode;
25978 : /* FALLTHRU */
25979 :
25980 35655 : default:
25981 35655 : return word_mode;
25982 : }
25983 : }
25984 :
25985 : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
25986 : vectors. If AVX512F is enabled then try vectorizing with 512bit,
25987 : 256bit and 128bit vectors. */
25988 :
25989 : static unsigned int
25990 2197412 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25991 : {
25992 2197412 : if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25993 : {
25994 74820 : modes->safe_push (V64QImode);
25995 74820 : modes->safe_push (V32QImode);
25996 74820 : modes->safe_push (V16QImode);
25997 : }
25998 2122592 : else if (TARGET_AVX512F && all)
25999 : {
26000 558 : modes->safe_push (V32QImode);
26001 558 : modes->safe_push (V16QImode);
26002 558 : modes->safe_push (V64QImode);
26003 : }
26004 2122034 : else if (TARGET_AVX && !TARGET_PREFER_AVX128)
26005 : {
26006 28728 : modes->safe_push (V32QImode);
26007 28728 : modes->safe_push (V16QImode);
26008 : }
26009 2093306 : else if (TARGET_AVX && all)
26010 : {
26011 24 : modes->safe_push (V16QImode);
26012 24 : modes->safe_push (V32QImode);
26013 : }
26014 2093282 : else if (TARGET_SSE2)
26015 2091016 : modes->safe_push (V16QImode);
26016 :
26017 2197412 : if (TARGET_MMX_WITH_SSE)
26018 1800857 : modes->safe_push (V8QImode);
26019 :
26020 2197412 : if (TARGET_SSE2)
26021 2195146 : modes->safe_push (V4QImode);
26022 :
26023 2197412 : return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
26024 : }
26025 :
26026 : /* Implemenation of targetm.vectorize.get_mask_mode. */
26027 :
26028 : static opt_machine_mode
26029 3365759 : ix86_get_mask_mode (machine_mode data_mode)
26030 : {
26031 3365759 : unsigned vector_size = GET_MODE_SIZE (data_mode);
26032 3365759 : unsigned nunits = GET_MODE_NUNITS (data_mode);
26033 3365759 : unsigned elem_size = vector_size / nunits;
26034 :
26035 : /* Scalar mask case. */
26036 479380 : if ((TARGET_AVX512F && vector_size == 64)
26037 3250527 : || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
26038 : /* AVX512FP16 only supports vector comparison
26039 : to kmask for _Float16. */
26040 3075961 : || (TARGET_AVX512VL && TARGET_AVX512FP16
26041 18345 : && GET_MODE_INNER (data_mode) == E_HFmode)
26042 6446393 : || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
26043 : {
26044 292821 : if (elem_size == 4
26045 292821 : || elem_size == 8
26046 135117 : || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
26047 262370 : return smallest_int_mode_for_size (nunits).require ();
26048 : }
26049 :
26050 3103389 : scalar_int_mode elem_mode
26051 3103389 : = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
26052 :
26053 3103389 : gcc_assert (elem_size * nunits == vector_size);
26054 :
26055 3103389 : return mode_for_vector (elem_mode, nunits);
26056 : }
26057 :
26058 :
26059 :
26060 : /* Return class of registers which could be used for pseudo of MODE
26061 : and of class RCLASS for spilling instead of memory. Return NO_REGS
26062 : if it is not possible or non-profitable. */
26063 :
26064 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26065 :
26066 : static reg_class_t
26067 6246357973 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
26068 : {
26069 6246357973 : if (0 && TARGET_GENERAL_REGS_SSE_SPILL
26070 : && TARGET_SSE2
26071 : && TARGET_INTER_UNIT_MOVES_TO_VEC
26072 : && TARGET_INTER_UNIT_MOVES_FROM_VEC
26073 : && (mode == SImode || (TARGET_64BIT && mode == DImode))
26074 : && INTEGER_CLASS_P (rclass))
26075 : return ALL_SSE_REGS;
26076 6246357973 : return NO_REGS;
26077 : }
26078 :
26079 : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
26080 : but returns a lower bound. */
26081 :
26082 : static unsigned int
26083 1856398 : ix86_max_noce_ifcvt_seq_cost (edge e)
26084 : {
26085 1856398 : bool predictable_p = predictable_edge_p (e);
26086 1856398 : if (predictable_p)
26087 : {
26088 143887 : if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
26089 8 : return param_max_rtl_if_conversion_predictable_cost;
26090 : }
26091 : else
26092 : {
26093 1712511 : if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
26094 73 : return param_max_rtl_if_conversion_unpredictable_cost;
26095 : }
26096 :
26097 : /* For modern machines with deeper pipeline, the penalty for branch
26098 : misprediction could be higher than before to reset the pipeline
26099 : slots. Add parameter br_mispredict_scale as a factor to describe
26100 : the impact of reseting the pipeline. */
26101 :
26102 1856317 : return BRANCH_COST (true, predictable_p)
26103 1856317 : * ix86_tune_cost->br_mispredict_scale;
26104 : }
26105 :
26106 : /* Return true if SEQ is a good candidate as a replacement for the
26107 : if-convertible sequence described in IF_INFO. */
26108 :
26109 : static bool
26110 201269 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
26111 : {
26112 201269 : if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
26113 : {
26114 : int cmov_cnt = 0;
26115 : /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
26116 : Maybe we should allow even more conditional moves as long as they
26117 : are used far enough not to stall the CPU, or also consider
26118 : IF_INFO->TEST_BB succ edge probabilities. */
26119 247 : for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
26120 : {
26121 205 : rtx set = single_set (insn);
26122 205 : if (!set)
26123 0 : continue;
26124 205 : if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
26125 163 : continue;
26126 42 : rtx src = SET_SRC (set);
26127 42 : machine_mode mode = GET_MODE (src);
26128 42 : if (GET_MODE_CLASS (mode) != MODE_INT
26129 0 : && GET_MODE_CLASS (mode) != MODE_FLOAT)
26130 0 : continue;
26131 42 : if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
26132 41 : || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
26133 1 : continue;
26134 : /* insn is CMOV or FCMOV. */
26135 41 : if (++cmov_cnt > 1)
26136 : return false;
26137 : }
26138 : }
26139 :
26140 : /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
26141 : for movdfcc/movsfcc, and could possibly fail cost comparison.
26142 : Increase branch cost will hurt performance for other modes, so
26143 : specially add some preference for floating point ifcvt. */
26144 201261 : if (!TARGET_SSE4_1 && if_info->x
26145 157779 : && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
26146 34073 : && if_info->speed_p)
26147 : {
26148 27049 : unsigned cost = seq_cost (seq, true);
26149 :
26150 27049 : if (cost <= if_info->original_cost)
26151 : return true;
26152 :
26153 25860 : return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
26154 : }
26155 :
26156 174212 : return default_noce_conversion_profitable_p (seq, if_info);
26157 : }
26158 :
26159 : /* x86-specific vector costs. */
26160 : class ix86_vector_costs : public vector_costs
26161 : {
26162 : public:
26163 : ix86_vector_costs (vec_info *, bool);
26164 :
26165 : unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
26166 : stmt_vec_info stmt_info, slp_tree node,
26167 : tree vectype, int misalign,
26168 : vect_cost_model_location where) override;
26169 : void finish_cost (const vector_costs *) override;
26170 : bool better_main_loop_than_p (const vector_costs *) const override;
26171 : bool better_epilogue_loop_than_p (const vector_costs *other,
26172 : loop_vec_info main_loop) const;
26173 :
26174 : private:
26175 :
26176 : /* Estimate register pressure of the vectorized code. */
26177 : void ix86_vect_estimate_reg_pressure ();
26178 : /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
26179 : estimation of register pressure.
26180 : ??? Currently it's only used by vec_construct/scalar_to_vec
26181 : where we know it's not loaded from memory. */
26182 : unsigned m_num_gpr_needed[3];
26183 : unsigned m_num_sse_needed[3];
26184 : /* Number of 256-bit vector permutation. */
26185 : unsigned m_num_avx256_vec_perm[3];
26186 : /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
26187 : unsigned m_num_reduc[X86_REDUC_LAST];
26188 : /* Don't do unroll if m_prefer_unroll is false, default is true. */
26189 : bool m_prefer_unroll;
26190 : };
26191 :
26192 2101264 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
26193 : : vector_costs (vinfo, costing_for_scalar),
26194 2101264 : m_num_gpr_needed (),
26195 2101264 : m_num_sse_needed (),
26196 2101264 : m_num_avx256_vec_perm (),
26197 2101264 : m_num_reduc (),
26198 2101264 : m_prefer_unroll (true)
26199 2101264 : {}
26200 :
26201 : /* Implement targetm.vectorize.create_costs. */
26202 :
26203 : static vector_costs *
26204 2101264 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
26205 : {
26206 2101264 : return new ix86_vector_costs (vinfo, costing_for_scalar);
26207 : }
26208 :
26209 : unsigned
26210 7499381 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
26211 : stmt_vec_info stmt_info, slp_tree node,
26212 : tree vectype, int,
26213 : vect_cost_model_location where)
26214 : {
26215 7499381 : unsigned retval = 0;
26216 7499381 : bool scalar_p
26217 : = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
26218 7499381 : int stmt_cost = - 1;
26219 :
26220 7499381 : bool fp = false;
26221 7499381 : machine_mode mode = scalar_p ? SImode : TImode;
26222 :
26223 7499381 : if (vectype != NULL)
26224 : {
26225 3374739 : fp = FLOAT_TYPE_P (vectype);
26226 3374739 : mode = TYPE_MODE (vectype);
26227 3374739 : if (scalar_p)
26228 284497 : mode = TYPE_MODE (TREE_TYPE (vectype));
26229 : }
26230 : /* When we are costing a scalar stmt use the scalar stmt to get at the
26231 : type of the operation. */
26232 4124642 : else if (scalar_p && stmt_info)
26233 4041207 : if (tree lhs = gimple_get_lhs (stmt_info->stmt))
26234 : {
26235 3865540 : fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
26236 3865540 : mode = TYPE_MODE (TREE_TYPE (lhs));
26237 : }
26238 :
26239 7499381 : if ((kind == vector_stmt || kind == scalar_stmt)
26240 1976343 : && stmt_info
26241 9466596 : && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
26242 : {
26243 1583998 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26244 : /*machine_mode inner_mode = mode;
26245 : if (VECTOR_MODE_P (mode))
26246 : inner_mode = GET_MODE_INNER (mode);*/
26247 :
26248 1583998 : switch (subcode)
26249 : {
26250 618535 : case PLUS_EXPR:
26251 618535 : case POINTER_PLUS_EXPR:
26252 618535 : case MINUS_EXPR:
26253 618535 : if (kind == scalar_stmt)
26254 : {
26255 391411 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26256 96973 : stmt_cost = ix86_cost->addss;
26257 294438 : else if (X87_FLOAT_MODE_P (mode))
26258 136 : stmt_cost = ix86_cost->fadd;
26259 : else
26260 294302 : stmt_cost = ix86_cost->add;
26261 : }
26262 : else
26263 227124 : stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
26264 : : ix86_cost->sse_op);
26265 : break;
26266 :
26267 253261 : case MULT_EXPR:
26268 : /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
26269 : take it as MULT_EXPR. */
26270 253261 : case MULT_HIGHPART_EXPR:
26271 253261 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26272 253261 : break;
26273 : /* There's no direct instruction for WIDEN_MULT_EXPR,
26274 : take emulation into account. */
26275 1076 : case WIDEN_MULT_EXPR:
26276 2152 : stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
26277 1076 : TYPE_UNSIGNED (vectype));
26278 1076 : break;
26279 :
26280 10672 : case NEGATE_EXPR:
26281 10672 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26282 3547 : stmt_cost = ix86_cost->sse_op;
26283 7125 : else if (X87_FLOAT_MODE_P (mode))
26284 0 : stmt_cost = ix86_cost->fchs;
26285 7125 : else if (VECTOR_MODE_P (mode))
26286 3626 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26287 : else
26288 3499 : stmt_cost = ix86_cost->add;
26289 : break;
26290 14027 : case TRUNC_DIV_EXPR:
26291 14027 : case CEIL_DIV_EXPR:
26292 14027 : case FLOOR_DIV_EXPR:
26293 14027 : case ROUND_DIV_EXPR:
26294 14027 : case TRUNC_MOD_EXPR:
26295 14027 : case CEIL_MOD_EXPR:
26296 14027 : case FLOOR_MOD_EXPR:
26297 14027 : case RDIV_EXPR:
26298 14027 : case ROUND_MOD_EXPR:
26299 14027 : case EXACT_DIV_EXPR:
26300 14027 : stmt_cost = ix86_division_cost (ix86_cost, mode);
26301 14027 : break;
26302 :
26303 72462 : case RSHIFT_EXPR:
26304 72462 : case LSHIFT_EXPR:
26305 72462 : case LROTATE_EXPR:
26306 72462 : case RROTATE_EXPR:
26307 72462 : {
26308 72462 : tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
26309 72462 : tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
26310 72462 : stmt_cost = ix86_shift_rotate_cost
26311 72462 : (ix86_cost,
26312 : (subcode == RSHIFT_EXPR
26313 37787 : && !TYPE_UNSIGNED (TREE_TYPE (op1)))
26314 : ? ASHIFTRT : LSHIFTRT, mode,
26315 72462 : TREE_CODE (op2) == INTEGER_CST,
26316 72462 : cst_and_fits_in_hwi (op2)
26317 41413 : ? int_cst_value (op2) : -1,
26318 : false, false, NULL, NULL);
26319 : }
26320 72462 : break;
26321 98592 : case NOP_EXPR:
26322 : /* Only sign-conversions are free. */
26323 98592 : if (tree_nop_conversion_p
26324 98592 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
26325 98592 : TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
26326 : stmt_cost = 0;
26327 98592 : else if (fp)
26328 10240 : stmt_cost = vec_fp_conversion_cost
26329 10240 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26330 : break;
26331 :
26332 23208 : case FLOAT_EXPR:
26333 23208 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26334 17412 : stmt_cost = ix86_cost->cvtsi2ss;
26335 5796 : else if (X87_FLOAT_MODE_P (mode))
26336 : /* TODO: We do not have cost tables for x87. */
26337 50 : stmt_cost = ix86_cost->fadd;
26338 : else
26339 5746 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26340 : break;
26341 :
26342 2203 : case FIX_TRUNC_EXPR:
26343 2203 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26344 0 : stmt_cost = ix86_cost->cvtss2si;
26345 2203 : else if (X87_FLOAT_MODE_P (mode))
26346 : /* TODO: We do not have cost tables for x87. */
26347 0 : stmt_cost = ix86_cost->fadd;
26348 : else
26349 2203 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26350 : break;
26351 :
26352 56015 : case COND_EXPR:
26353 56015 : {
26354 : /* SSE2 conditinal move sequence is:
26355 : pcmpgtd %xmm5, %xmm0 (accounted separately)
26356 : pand %xmm0, %xmm2
26357 : pandn %xmm1, %xmm0
26358 : por %xmm2, %xmm0
26359 : while SSE4 uses cmp + blend
26360 : and AVX512 masked moves.
26361 :
26362 : The condition is accounted separately since we usually have
26363 : p = a < b
26364 : c = p ? x : y
26365 : and we will account first statement as setcc. Exception is when
26366 : p is loaded from memory as bool and then we will not acocunt
26367 : the compare, but there is no way to check for this. */
26368 :
26369 56015 : int ninsns = TARGET_SSE4_1 ? 1 : 3;
26370 :
26371 : /* If one of parameters is 0 or -1 the sequence will be simplified:
26372 : (if_true & mask) | (if_false & ~mask) -> if_true & mask */
26373 23446 : if (ninsns > 1
26374 23446 : && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26375 23095 : || zerop (gimple_assign_rhs3 (stmt_info->stmt))
26376 13145 : || integer_minus_onep
26377 13145 : (gimple_assign_rhs2 (stmt_info->stmt))
26378 12716 : || integer_minus_onep
26379 12716 : (gimple_assign_rhs3 (stmt_info->stmt))))
26380 : ninsns = 1;
26381 :
26382 56015 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26383 5044 : stmt_cost = ninsns * ix86_cost->sse_op;
26384 50971 : else if (X87_FLOAT_MODE_P (mode))
26385 : /* x87 requires conditional branch. We don't have cost for
26386 : that. */
26387 : ;
26388 50962 : else if (VECTOR_MODE_P (mode))
26389 21217 : stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
26390 : else
26391 : /* compare (accounted separately) + cmov. */
26392 29745 : stmt_cost = ix86_cost->add;
26393 : }
26394 : break;
26395 :
26396 26740 : case MIN_EXPR:
26397 26740 : case MAX_EXPR:
26398 26740 : if (fp)
26399 : {
26400 1486 : if (X87_FLOAT_MODE_P (mode)
26401 520 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26402 : /* x87 requires conditional branch. We don't have cost for
26403 : that. */
26404 : ;
26405 : else
26406 : /* minss */
26407 1486 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26408 : }
26409 : else
26410 : {
26411 25254 : if (VECTOR_MODE_P (mode))
26412 : {
26413 5151 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26414 : /* vpmin was introduced in SSE3.
26415 : SSE2 needs pcmpgtd + pand + pandn + pxor.
26416 : If one of parameters is 0 or -1 the sequence is simplified
26417 : to pcmpgtd + pand. */
26418 5151 : if (!TARGET_SSSE3)
26419 : {
26420 3191 : if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
26421 4611 : || integer_minus_onep
26422 1420 : (gimple_assign_rhs2 (stmt_info->stmt)))
26423 1771 : stmt_cost *= 2;
26424 : else
26425 1420 : stmt_cost *= 4;
26426 : }
26427 : }
26428 : else
26429 : /* cmp + cmov. */
26430 20103 : stmt_cost = ix86_cost->add * 2;
26431 : }
26432 : break;
26433 :
26434 1307 : case ABS_EXPR:
26435 1307 : case ABSU_EXPR:
26436 1307 : if (fp)
26437 : {
26438 471 : if (X87_FLOAT_MODE_P (mode)
26439 171 : && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26440 : /* fabs. */
26441 0 : stmt_cost = ix86_cost->fabs;
26442 : else
26443 : /* andss of sign bit. */
26444 471 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26445 : }
26446 : else
26447 : {
26448 836 : if (VECTOR_MODE_P (mode))
26449 : {
26450 105 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26451 : /* vabs was introduced in SSE3.
26452 : SSE3 uses psrat + pxor + psub. */
26453 105 : if (!TARGET_SSSE3)
26454 75 : stmt_cost *= 3;
26455 : }
26456 : else
26457 : /* neg + cmov. */
26458 731 : stmt_cost = ix86_cost->add * 2;
26459 : }
26460 : break;
26461 :
26462 152216 : case BIT_IOR_EXPR:
26463 152216 : case BIT_XOR_EXPR:
26464 152216 : case BIT_AND_EXPR:
26465 152216 : case BIT_NOT_EXPR:
26466 152216 : gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
26467 : && !X87_FLOAT_MODE_P (mode));
26468 152216 : if (VECTOR_MODE_P (mode))
26469 52913 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26470 : else
26471 99303 : stmt_cost = ix86_cost->add;
26472 : break;
26473 :
26474 253684 : default:
26475 253684 : if (truth_value_p (subcode))
26476 : {
26477 99000 : if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26478 : /* CMPccS? insructions are cheap, so use sse_op. While they
26479 : produce a mask which may need to be turned to 0/1 by and,
26480 : expect that this will be optimized away in a common case. */
26481 0 : stmt_cost = ix86_cost->sse_op;
26482 99000 : else if (X87_FLOAT_MODE_P (mode))
26483 : /* fcmp + setcc. */
26484 0 : stmt_cost = ix86_cost->fadd + ix86_cost->add;
26485 99000 : else if (VECTOR_MODE_P (mode))
26486 20598 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26487 : else
26488 : /* setcc. */
26489 78402 : stmt_cost = ix86_cost->add;
26490 : break;
26491 : }
26492 : break;
26493 : }
26494 : }
26495 :
26496 : /* Record number of load/store/gather/scatter in vectorized body. */
26497 7499381 : if (where == vect_body && !m_costing_for_scalar)
26498 : {
26499 1948001 : int scale = 1;
26500 1948001 : if (vectype
26501 3887219 : && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
26502 59608 : && TARGET_AVX512_SPLIT_REGS)
26503 3878330 : || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26504 118072 : && TARGET_AVX256_SPLIT_REGS)))
26505 : scale = 2;
26506 :
26507 1948001 : switch (kind)
26508 : {
26509 : /* Emulated gather/scatter or any scalarization. */
26510 112347 : case scalar_load:
26511 112347 : case scalar_stmt:
26512 112347 : case scalar_store:
26513 112347 : case vector_gather_load:
26514 112347 : case vector_scatter_store:
26515 112347 : m_prefer_unroll = false;
26516 112347 : break;
26517 :
26518 596137 : case vector_stmt:
26519 596137 : case vec_to_scalar:
26520 : /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
26521 : unroll in the vectorizer will enable partial sum. */
26522 596137 : if (stmt_info
26523 596111 : && vect_is_reduction (stmt_info)
26524 669363 : && stmt_info->stmt)
26525 : {
26526 : /* Handle __builtin_fma. */
26527 73226 : if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
26528 : {
26529 11 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26530 11 : break;
26531 : }
26532 :
26533 73215 : if (!is_gimple_assign (stmt_info->stmt))
26534 : break;
26535 :
26536 70381 : tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
26537 70381 : machine_mode inner_mode = GET_MODE_INNER (mode);
26538 70381 : tree rhs1, rhs2;
26539 70381 : bool native_vnni_p = true;
26540 70381 : gimple* def;
26541 70381 : machine_mode mode_rhs;
26542 70381 : switch (subcode)
26543 : {
26544 53773 : case PLUS_EXPR:
26545 53773 : case MINUS_EXPR:
26546 53773 : if (!fp || !flag_associative_math
26547 26148 : || flag_fp_contract_mode != FP_CONTRACT_FAST)
26548 : break;
26549 :
26550 : /* FMA condition for different modes. */
26551 26148 : if (((inner_mode == DFmode || inner_mode == SFmode)
26552 26118 : && !TARGET_FMA && !TARGET_AVX512VL)
26553 8489 : || (inner_mode == HFmode && !TARGET_AVX512FP16)
26554 8489 : || (inner_mode == BFmode && !TARGET_AVX10_2))
26555 : break;
26556 :
26557 : /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
26558 : to FMA/FNMA after vectorization. */
26559 8489 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26560 8489 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26561 8489 : if (subcode == PLUS_EXPR
26562 6668 : && TREE_CODE (rhs1) == SSA_NAME
26563 6668 : && (def = SSA_NAME_DEF_STMT (rhs1), true)
26564 6668 : && is_gimple_assign (def)
26565 11819 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26566 1951 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26567 6538 : else if (TREE_CODE (rhs2) == SSA_NAME
26568 6538 : && (def = SSA_NAME_DEF_STMT (rhs2), true)
26569 6538 : && is_gimple_assign (def)
26570 12989 : && gimple_assign_rhs_code (def) == MULT_EXPR)
26571 6445 : m_num_reduc[X86_REDUC_FMA] += count * scale;
26572 : break;
26573 :
26574 : /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
26575 : WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
26576 : SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
26577 608 : case DOT_PROD_EXPR:
26578 608 : rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
26579 608 : mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
26580 608 : if (mode_rhs == QImode)
26581 : {
26582 335 : rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
26583 335 : signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
26584 335 : signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
26585 :
26586 : /* vpdpbusd. */
26587 335 : if (signop1_p != signop2_p)
26588 85 : native_vnni_p
26589 85 : = (GET_MODE_SIZE (mode) == 64
26590 85 : ? TARGET_AVX512VNNI
26591 28 : : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
26592 85 : || TARGET_AVXVNNI));
26593 : else
26594 : /* vpdpbssd. */
26595 250 : native_vnni_p
26596 266 : = (GET_MODE_SIZE (mode) == 64
26597 250 : ? TARGET_AVX10_2
26598 234 : : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
26599 : }
26600 608 : m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
26601 :
26602 : /* Dislike to do unroll and partial sum for
26603 : emulated DOT_PROD_EXPR. */
26604 608 : if (!native_vnni_p)
26605 151 : m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
26606 : break;
26607 :
26608 106 : case SAD_EXPR:
26609 106 : m_num_reduc[X86_REDUC_SAD] += count * scale;
26610 106 : break;
26611 :
26612 : default:
26613 : break;
26614 : }
26615 : }
26616 :
26617 : default:
26618 : break;
26619 : }
26620 : }
26621 :
26622 :
26623 7499381 : combined_fn cfn;
26624 7499381 : if ((kind == vector_stmt || kind == scalar_stmt)
26625 1976343 : && stmt_info
26626 1967215 : && stmt_info->stmt
26627 9466596 : && is_gimple_call (stmt_info->stmt))
26628 : {
26629 26415 : tree fndecl = gimple_call_fndecl (stmt_info->stmt);
26630 26415 : cgraph_node *node;
26631 26415 : if ((fndecl
26632 5305 : && (node = cgraph_node::get (fndecl))
26633 5280 : && node->simd_clones)
26634 30703 : || gimple_call_internal_p (stmt_info->stmt, IFN_MASK_CALL))
26635 2484 : stmt_cost = 10 * ix86_vec_cost (mode,
26636 1242 : mode == SFmode ? ix86_cost->fmass
26637 : : ix86_cost->fmasd);
26638 25173 : else if ((cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
26639 23818 : switch (cfn)
26640 : {
26641 107 : case CFN_FMA:
26642 107 : stmt_cost = ix86_vec_cost (mode,
26643 107 : mode == SFmode ? ix86_cost->fmass
26644 : : ix86_cost->fmasd);
26645 107 : break;
26646 62 : case CFN_MULH:
26647 62 : stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
26648 62 : break;
26649 : default:
26650 : break;
26651 : }
26652 : }
26653 :
26654 7499381 : if (kind == vec_promote_demote)
26655 : {
26656 61358 : int outer_size
26657 : = tree_to_uhwi
26658 61358 : (TYPE_SIZE
26659 61358 : (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
26660 61358 : int inner_size
26661 : = tree_to_uhwi
26662 61358 : (TYPE_SIZE
26663 61358 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
26664 61358 : bool inner_fp = FLOAT_TYPE_P
26665 : (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
26666 :
26667 5581 : if (fp && inner_fp)
26668 5076 : stmt_cost = vec_fp_conversion_cost
26669 5076 : (ix86_tune_cost, GET_MODE_BITSIZE (mode));
26670 56282 : else if (fp && !inner_fp)
26671 6116 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
26672 50166 : else if (!fp && inner_fp)
26673 505 : stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
26674 : else
26675 49661 : stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
26676 : /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
26677 : greater than inner size we will end up doing two conversions and
26678 : packing them. We always pack pairs; if the size difference is greater
26679 : it is split into multiple demote operations. */
26680 61358 : if (inner_size > outer_size)
26681 23316 : stmt_cost = stmt_cost * 2
26682 23316 : + ix86_vec_cost (mode, ix86_cost->sse_op);
26683 : }
26684 :
26685 : /* If we do elementwise loads into a vector then we are bound by
26686 : latency and execution resources for the many scalar loads
26687 : (AGU and load ports). Try to account for this by scaling the
26688 : construction cost by the number of elements involved. */
26689 7499381 : if ((kind == vec_construct || kind == vec_to_scalar)
26690 7499381 : && ((node
26691 430590 : && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26692 443277 : || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
26693 37623 : && SLP_TREE_LANES (node) == 1))
26694 40332 : && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26695 : (SLP_TREE_REPRESENTATIVE (node))))
26696 : != INTEGER_CST))
26697 62775 : || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
26698 : {
26699 31420 : stmt_cost = ix86_default_vector_cost (kind, mode);
26700 31420 : stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
26701 : }
26702 7467961 : else if ((kind == vec_construct || kind == scalar_to_vec)
26703 486537 : && node
26704 455040 : && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26705 : {
26706 311626 : stmt_cost = ix86_default_vector_cost (kind, mode);
26707 311626 : unsigned i;
26708 311626 : tree op;
26709 1328833 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26710 705581 : if (TREE_CODE (op) == SSA_NAME)
26711 482353 : TREE_VISITED (op) = 0;
26712 1017207 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26713 : {
26714 705581 : if (TREE_CODE (op) != SSA_NAME
26715 482353 : || TREE_VISITED (op))
26716 257697 : continue;
26717 447884 : TREE_VISITED (op) = 1;
26718 447884 : gimple *def = SSA_NAME_DEF_STMT (op);
26719 447884 : tree tem;
26720 : /* Look through a conversion. */
26721 447884 : if (is_gimple_assign (def)
26722 256281 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26723 28713 : && ((tem = gimple_assign_rhs1 (def)), true)
26724 476597 : && TREE_CODE (tem) == SSA_NAME)
26725 28502 : def = SSA_NAME_DEF_STMT (tem);
26726 : /* When the component is loaded from memory without sign-
26727 : or zero-extension we can move it to a vector register and/or
26728 : insert it via vpinsr with a memory operand. */
26729 447884 : if (gimple_assign_load_p (def)
26730 133827 : && tree_nop_conversion_p (TREE_TYPE (op),
26731 133827 : TREE_TYPE (gimple_assign_lhs (def)))
26732 709242 : && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
26733 5584 : || TARGET_SSE4_1))
26734 : ;
26735 : /* When the component is extracted from a vector it is already
26736 : in a vector register. */
26737 321768 : else if (is_gimple_assign (def)
26738 125229 : && gimple_assign_rhs_code (def) == BIT_FIELD_REF
26739 324646 : && VECTOR_TYPE_P (TREE_TYPE
26740 : (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
26741 : ;
26742 : else
26743 : {
26744 319321 : if (fp)
26745 : {
26746 : /* Scalar FP values residing in x87 registers need to be
26747 : spilled and reloaded. */
26748 13684 : auto mode2 = TYPE_MODE (TREE_TYPE (op));
26749 13684 : if (IS_STACK_MODE (mode2))
26750 : {
26751 967 : int cost
26752 : = (ix86_cost->hard_register.fp_store[mode2 == SFmode
26753 967 : ? 0 : 1]
26754 967 : + ix86_cost->sse_load[sse_store_index (mode2)]);
26755 967 : stmt_cost += COSTS_N_INSNS (cost) / 2;
26756 : }
26757 13684 : m_num_sse_needed[where]++;
26758 : }
26759 : else
26760 : {
26761 305637 : m_num_gpr_needed[where]++;
26762 :
26763 305637 : stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26764 : }
26765 : }
26766 : }
26767 1017207 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26768 705581 : if (TREE_CODE (op) == SSA_NAME)
26769 482353 : TREE_VISITED (op) = 0;
26770 : }
26771 7499381 : if (stmt_cost == -1)
26772 5752613 : stmt_cost = ix86_default_vector_cost (kind, mode);
26773 :
26774 7499381 : if (kind == vec_perm && vectype
26775 198228 : && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26776 : /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body. */
26777 7503496 : && count != 0)
26778 : {
26779 4115 : bool real_perm = true;
26780 4115 : unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
26781 :
26782 4115 : if (node
26783 4112 : && SLP_TREE_LOAD_PERMUTATION (node).exists ()
26784 : /* Loop vectorization will have 4 times vec_perm
26785 : with index as {0, 0, 0, 0}.
26786 : But it actually generates
26787 : vec_perm_expr <vect, vect, 0, 0, 0, 0>
26788 : vec_perm_expr <vect, vect, 1, 1, 1, 1>
26789 : vec_perm_expr <vect, vect, 2, 2, 2, 2>
26790 : Need to be handled separately. */
26791 7528 : && is_a <bb_vec_info> (m_vinfo))
26792 : {
26793 35 : unsigned half = nunits / 2;
26794 35 : unsigned i = 0;
26795 35 : bool allsame = true;
26796 35 : unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
26797 35 : bool cross_lane_p = false;
26798 184 : for (i = 0 ; i != SLP_TREE_LANES (node); i++)
26799 : {
26800 183 : unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
26801 : /* allsame is just a broadcast. */
26802 183 : if (tmp != first)
26803 84 : allsame = false;
26804 :
26805 : /* 4 times vec_perm with number of lanes multiple of nunits. */
26806 183 : tmp = tmp & (nunits - 1);
26807 183 : unsigned index = i & (nunits - 1);
26808 183 : if ((index < half && tmp >= half)
26809 183 : || (index >= half && tmp < half))
26810 59 : cross_lane_p = true;
26811 :
26812 183 : if (!allsame && cross_lane_p)
26813 : break;
26814 : }
26815 :
26816 35 : if (i == SLP_TREE_LANES (node))
26817 : real_perm = false;
26818 : }
26819 :
26820 : if (real_perm)
26821 : {
26822 4114 : m_num_avx256_vec_perm[where] += count;
26823 4114 : if (dump_file && (dump_flags & TDF_DETAILS))
26824 : {
26825 247 : fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
26826 247 : if (stmt_info)
26827 244 : print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
26828 247 : fprintf (dump_file, " \n");
26829 : }
26830 : }
26831 : }
26832 :
26833 : /* Penalize DFmode vector operations for Bonnell. */
26834 7499381 : if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26835 7499464 : && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26836 12 : stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26837 :
26838 : /* Statements in an inner loop relative to the loop being
26839 : vectorized are weighted more heavily. The value here is
26840 : arbitrary and could potentially be improved with analysis. */
26841 7499381 : retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26842 :
26843 : /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26844 : for Silvermont as it has out of order integer pipeline and can execute
26845 : 2 scalar instruction per tick, but has in order SIMD pipeline. */
26846 7499381 : if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26847 7499381 : || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26848 2476 : && stmt_info && stmt_info->stmt)
26849 : {
26850 2132 : tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26851 2132 : if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26852 1597 : retval = (retval * 17) / 10;
26853 : }
26854 :
26855 7499381 : m_costs[where] += retval;
26856 :
26857 7499381 : return retval;
26858 : }
26859 :
26860 : void
26861 1813607 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26862 : {
26863 1813607 : unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26864 1813607 : unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26865 :
26866 : /* Any better way to have target available fp registers, currently use SSE_REGS. */
26867 1813607 : unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26868 7254428 : for (unsigned i = 0; i != 3; i++)
26869 : {
26870 5440821 : if (m_num_gpr_needed[i] > target_avail_regs)
26871 754 : m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26872 : /* Only measure sse registers pressure. */
26873 5440821 : if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26874 90 : m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26875 : }
26876 1813607 : }
26877 :
26878 : void
26879 1813607 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26880 : {
26881 1813607 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
26882 488639 : if (loop_vinfo && !m_costing_for_scalar)
26883 : {
26884 : /* We are currently not asking the vectorizer to compare costs
26885 : between different vector mode sizes. When using predication
26886 : that will end up always choosing the prefered mode size even
26887 : if there's a smaller mode covering all lanes. Test for this
26888 : situation and artificially reject the larger mode attempt.
26889 : ??? We currently lack masked ops for sub-SSE sized modes,
26890 : so we could restrict this rejection to AVX and AVX512 modes
26891 : but error on the safe side for now. */
26892 125122 : if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26893 26 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26894 16 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26895 125132 : && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26896 20 : > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26897 8 : m_costs[vect_body] = INT_MAX;
26898 :
26899 : /* We'd like to avoid using masking if there's an in-order reduction
26900 : to vectorize because that will also perform in-order adds of
26901 : masked elements (as neutral value, of course) here, but there
26902 : is currently no way to indicate to try un-masked with the same
26903 : mode. */
26904 :
26905 125122 : bool any_reduc_p = false;
26906 497150 : for (int i = 0; i != X86_REDUC_LAST; i++)
26907 373284 : if (m_num_reduc[i])
26908 : {
26909 : any_reduc_p = true;
26910 : break;
26911 : }
26912 :
26913 125122 : if (any_reduc_p
26914 : /* Not much gain for loop with gather and scatter. */
26915 1256 : && m_prefer_unroll
26916 1098 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
26917 : {
26918 1770 : unsigned unroll_factor
26919 885 : = OPTION_SET_P (ix86_vect_unroll_limit)
26920 885 : ? ix86_vect_unroll_limit
26921 885 : : ix86_cost->vect_unroll_limit;
26922 :
26923 885 : if (unroll_factor > 1)
26924 : {
26925 3540 : for (int i = 0 ; i != X86_REDUC_LAST; i++)
26926 : {
26927 2655 : if (m_num_reduc[i])
26928 : {
26929 885 : unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
26930 : m_num_reduc[i]);
26931 2655 : unroll_factor = MIN (unroll_factor, tmp);
26932 : }
26933 : }
26934 :
26935 1770 : m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor);
26936 : }
26937 : }
26938 :
26939 : }
26940 :
26941 1813607 : ix86_vect_estimate_reg_pressure ();
26942 :
26943 7254428 : for (int i = 0; i != 3; i++)
26944 5440821 : if (m_num_avx256_vec_perm[i]
26945 517 : && TARGET_AVX256_AVOID_VEC_PERM)
26946 7 : m_costs[i] = INT_MAX;
26947 :
26948 : /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26949 : a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26950 1813607 : if (loop_vinfo
26951 488639 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26952 43740 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26953 1814378 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26954 23 : m_suggested_epilogue_mode = V16QImode;
26955 : /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26956 : enable a 64bit SSE epilogue. */
26957 1813607 : if (loop_vinfo
26958 488639 : && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26959 43740 : && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26960 1816291 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26961 104 : m_suggested_epilogue_mode = V8QImode;
26962 :
26963 : /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
26964 : a masked epilogue if that doesn't seem detrimental. */
26965 1813607 : if (loop_vinfo
26966 488639 : && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26967 466769 : && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
26968 : /* Avoid a masked epilog if cascaded epilogues eventually get us
26969 : to one with VF 1 as that means no scalar epilog at all. */
26970 75886 : && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
26971 75886 : / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
26972 34 : && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26973 75885 : && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
26974 1813779 : && !OPTION_SET_P (param_vect_partial_vector_usage))
26975 : {
26976 154 : bool avoid = false;
26977 154 : if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26978 122 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
26979 : {
26980 122 : unsigned int peel_niter
26981 : = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
26982 122 : if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
26983 0 : peel_niter += 1;
26984 : /* When we know the number of scalar iterations of the epilogue,
26985 : avoid masking when a single vector epilog iteration handles
26986 : it in full. */
26987 122 : if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
26988 122 : % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
26989 : avoid = true;
26990 : }
26991 152 : if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
26992 14 : for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
26993 : {
26994 4 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
26995 : ;
26996 4 : else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
26997 : ;
26998 : else
26999 : {
27000 2 : int loop_depth
27001 4 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
27002 2 : DDR_LOOP_NEST (ddr));
27003 4 : if (DDR_NUM_DIST_VECTS (ddr) == 1
27004 2 : && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
27005 : {
27006 : /* Avoid the case when there's an outer loop that might
27007 : traverse a multi-dimensional array with the inner
27008 : loop just executing the masked epilogue with a
27009 : read-write where the next outer iteration might
27010 : read from the masked part of the previous write,
27011 : 'n' filling half a vector.
27012 : for (j = 0; j < m; ++j)
27013 : for (i = 0; i < n; ++i)
27014 : a[j][i] = c * a[j][i]; */
27015 : avoid = true;
27016 : break;
27017 : }
27018 : }
27019 : }
27020 : /* Avoid using masking if there's an in-order reduction
27021 : to vectorize because that will also perform in-order adds of
27022 : masked elements (as neutral value, of course). */
27023 154 : if (!avoid)
27024 : {
27025 604 : for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
27026 158 : if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
27027 158 : && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
27028 : == FOLD_LEFT_REDUCTION))
27029 : {
27030 : avoid = true;
27031 : break;
27032 : }
27033 : }
27034 150 : if (!avoid)
27035 : {
27036 146 : m_suggested_epilogue_mode = loop_vinfo->vector_mode;
27037 146 : m_masked_epilogue = 1;
27038 : }
27039 : }
27040 :
27041 1813607 : vector_costs::finish_cost (scalar_costs);
27042 1813607 : }
27043 :
27044 : /* Return true if THIS should be preferred over OTHER as main vector loop. */
27045 :
27046 : bool
27047 30878 : ix86_vector_costs::better_main_loop_than_p (const vector_costs *other) const
27048 : {
27049 30878 : loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->vinfo ());
27050 30878 : loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->vinfo ());
27051 :
27052 : /* If the other loop is masked it does not need an epilog. Prefer that
27053 : if the current loop cannot be vectorized fully with a vector
27054 : epilogs with at most one scalar iteration left. */
27055 21050 : if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
27056 21050 : && LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
27057 4 : && known_gt (LOOP_VINFO_VECT_FACTOR (other_loop_vinfo),
27058 : LOOP_VINFO_INT_NITERS (this_loop_vinfo))
27059 30882 : && (popcount_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo) & ~1)
27060 4 : > (param_vect_epilogues_nomask != 0)))
27061 : return false;
27062 :
27063 30874 : return vector_costs::better_main_loop_than_p (other);
27064 : }
27065 :
27066 : /* Return true if THIS should be preferred over OTHER as epilog vector
27067 : loop when vectorizing MAIN_LOOP. */
27068 :
27069 : bool
27070 1515 : ix86_vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
27071 : loop_vec_info main_loop) const
27072 : {
27073 1515 : loop_vec_info this_loop_info = as_a <loop_vec_info> (this->vinfo ());
27074 : /* The x86 target allows for multiple vector epilogues, if THIS is
27075 : the suggested epilog mode of OTHER then keep the latter unless
27076 : THIS has a VF of one which means no further epilog needed. */
27077 1515 : int tem;
27078 1515 : if (known_gt (LOOP_VINFO_VECT_FACTOR (this_loop_info), 1U)
27079 1515 : && (GET_MODE_SIZE (other->suggested_epilogue_mode (tem))
27080 2980 : == GET_MODE_SIZE (this_loop_info->vector_mode)))
27081 : return false;
27082 1438 : return vector_costs::better_epilogue_loop_than_p (other, main_loop);
27083 : }
27084 :
27085 : /* Validate target specific memory model bits in VAL. */
27086 :
27087 : static unsigned HOST_WIDE_INT
27088 410611 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
27089 : {
27090 410611 : enum memmodel model = memmodel_from_int (val);
27091 410611 : bool strong;
27092 :
27093 410611 : if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
27094 : |MEMMODEL_MASK)
27095 410607 : || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
27096 : {
27097 4 : warning (OPT_Winvalid_memory_model,
27098 : "unknown architecture specific memory model");
27099 4 : return MEMMODEL_SEQ_CST;
27100 : }
27101 410607 : strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
27102 410607 : if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
27103 : {
27104 0 : warning (OPT_Winvalid_memory_model,
27105 : "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
27106 : "memory model");
27107 0 : return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
27108 : }
27109 410607 : if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
27110 : {
27111 0 : warning (OPT_Winvalid_memory_model,
27112 : "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
27113 : "memory model");
27114 0 : return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
27115 : }
27116 : return val;
27117 : }
27118 :
27119 : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
27120 : CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
27121 : CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
27122 : or number of vecsize_mangle variants that should be emitted. */
27123 :
27124 : static int
27125 7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
27126 : struct cgraph_simd_clone *clonei,
27127 : tree base_type, int num,
27128 : bool explicit_p)
27129 : {
27130 7593 : int ret = 1;
27131 :
27132 7593 : if (clonei->simdlen
27133 7593 : && (clonei->simdlen < 2
27134 1321 : || clonei->simdlen > 1024
27135 1321 : || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
27136 : {
27137 0 : if (explicit_p)
27138 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27139 : "unsupported simdlen %wd", clonei->simdlen.to_constant ());
27140 0 : return 0;
27141 : }
27142 :
27143 7593 : tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
27144 7593 : if (TREE_CODE (ret_type) != VOID_TYPE)
27145 6801 : switch (TYPE_MODE (ret_type))
27146 : {
27147 6801 : case E_QImode:
27148 6801 : case E_HImode:
27149 6801 : case E_SImode:
27150 6801 : case E_DImode:
27151 6801 : case E_SFmode:
27152 6801 : case E_DFmode:
27153 : /* case E_SCmode: */
27154 : /* case E_DCmode: */
27155 6801 : if (!AGGREGATE_TYPE_P (ret_type))
27156 : break;
27157 : /* FALLTHRU */
27158 2 : default:
27159 2 : if (explicit_p)
27160 2 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27161 : "unsupported return type %qT for simd", ret_type);
27162 2 : return 0;
27163 : }
27164 :
27165 7591 : tree t;
27166 7591 : int i;
27167 7591 : tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
27168 7591 : bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
27169 :
27170 7591 : for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
27171 20438 : t && t != void_list_node; t = TREE_CHAIN (t), i++)
27172 : {
27173 16678 : tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
27174 12852 : switch (TYPE_MODE (arg_type))
27175 : {
27176 12833 : case E_QImode:
27177 12833 : case E_HImode:
27178 12833 : case E_SImode:
27179 12833 : case E_DImode:
27180 12833 : case E_SFmode:
27181 12833 : case E_DFmode:
27182 : /* case E_SCmode: */
27183 : /* case E_DCmode: */
27184 12833 : if (!AGGREGATE_TYPE_P (arg_type))
27185 : break;
27186 : /* FALLTHRU */
27187 41 : default:
27188 41 : if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
27189 : break;
27190 5 : if (explicit_p)
27191 5 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27192 : "unsupported argument type %qT for simd", arg_type);
27193 : return 0;
27194 : }
27195 : }
27196 :
27197 7586 : if (!TREE_PUBLIC (node->decl) || !explicit_p)
27198 : {
27199 : /* If the function isn't exported, we can pick up just one ISA
27200 : for the clones. */
27201 114 : if (TARGET_AVX512F)
27202 0 : clonei->vecsize_mangle = 'e';
27203 114 : else if (TARGET_AVX2)
27204 1 : clonei->vecsize_mangle = 'd';
27205 113 : else if (TARGET_AVX)
27206 88 : clonei->vecsize_mangle = 'c';
27207 : else
27208 25 : clonei->vecsize_mangle = 'b';
27209 : ret = 1;
27210 : }
27211 : else
27212 : {
27213 7472 : clonei->vecsize_mangle = "bcde"[num];
27214 7472 : ret = 4;
27215 : }
27216 7586 : clonei->mask_mode = VOIDmode;
27217 7586 : switch (clonei->vecsize_mangle)
27218 : {
27219 1893 : case 'b':
27220 1893 : clonei->vecsize_int = 128;
27221 1893 : clonei->vecsize_float = 128;
27222 1893 : break;
27223 1956 : case 'c':
27224 1956 : clonei->vecsize_int = 128;
27225 1956 : clonei->vecsize_float = 256;
27226 1956 : break;
27227 1869 : case 'd':
27228 1869 : clonei->vecsize_int = 256;
27229 1869 : clonei->vecsize_float = 256;
27230 1869 : break;
27231 1868 : case 'e':
27232 1868 : clonei->vecsize_int = 512;
27233 1868 : clonei->vecsize_float = 512;
27234 1868 : if (TYPE_MODE (base_type) == QImode)
27235 19 : clonei->mask_mode = DImode;
27236 : else
27237 1849 : clonei->mask_mode = SImode;
27238 : break;
27239 : }
27240 7586 : if (clonei->simdlen == 0)
27241 : {
27242 6265 : if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
27243 3297 : clonei->simdlen = clonei->vecsize_int;
27244 : else
27245 2968 : clonei->simdlen = clonei->vecsize_float;
27246 6265 : clonei->simdlen = clonei->simdlen
27247 12530 : / GET_MODE_BITSIZE (TYPE_MODE (base_type));
27248 : }
27249 1321 : else if (clonei->simdlen > 16)
27250 : {
27251 : /* For compatibility with ICC, use the same upper bounds
27252 : for simdlen. In particular, for CTYPE below, use the return type,
27253 : unless the function returns void, in that case use the characteristic
27254 : type. If it is possible for given SIMDLEN to pass CTYPE value
27255 : in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
27256 : for 64-bit code), accept that SIMDLEN, otherwise warn and don't
27257 : emit corresponding clone. */
27258 12 : tree ctype = ret_type;
27259 12 : if (VOID_TYPE_P (ret_type))
27260 0 : ctype = base_type;
27261 24 : int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
27262 12 : if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
27263 8 : cnt /= clonei->vecsize_int;
27264 : else
27265 4 : cnt /= clonei->vecsize_float;
27266 12 : if (cnt > (TARGET_64BIT ? 16 : 8))
27267 : {
27268 0 : if (explicit_p)
27269 0 : warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
27270 : "unsupported simdlen %wd",
27271 : clonei->simdlen.to_constant ());
27272 0 : return 0;
27273 : }
27274 : }
27275 : return ret;
27276 : }
27277 :
27278 : /* If SIMD clone NODE can't be used in a vectorized loop
27279 : in current function, return -1, otherwise return a badness of using it
27280 : (0 if it is most desirable from vecsize_mangle point of view, 1
27281 : slightly less desirable, etc.). */
27282 :
27283 : static int
27284 1790 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
27285 : {
27286 1790 : switch (node->simdclone->vecsize_mangle)
27287 : {
27288 638 : case 'b':
27289 638 : if (!TARGET_SSE2)
27290 : return -1;
27291 638 : if (!TARGET_AVX)
27292 : return 0;
27293 537 : return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
27294 630 : case 'c':
27295 630 : if (!TARGET_AVX)
27296 : return -1;
27297 585 : return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
27298 334 : case 'd':
27299 334 : if (!TARGET_AVX2)
27300 : return -1;
27301 141 : return TARGET_AVX512F ? 1 : 0;
27302 188 : case 'e':
27303 188 : if (!TARGET_AVX512F)
27304 130 : return -1;
27305 : return 0;
27306 0 : default:
27307 0 : gcc_unreachable ();
27308 : }
27309 : }
27310 :
27311 : /* This function adjusts the unroll factor based on
27312 : the hardware capabilities. For ex, bdver3 has
27313 : a loop buffer which makes unrolling of smaller
27314 : loops less important. This function decides the
27315 : unroll factor using number of memory references
27316 : (value 32 is used) as a heuristic. */
27317 :
27318 : static unsigned
27319 808322 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
27320 : {
27321 808322 : basic_block *bbs;
27322 808322 : rtx_insn *insn;
27323 808322 : unsigned i;
27324 808322 : unsigned mem_count = 0;
27325 :
27326 : /* Unroll small size loop when unroll factor is not explicitly
27327 : specified. */
27328 808322 : if (ix86_unroll_only_small_loops && !loop->unroll)
27329 : {
27330 762614 : if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
27331 72386 : return MIN (nunroll, ix86_cost->small_unroll_factor);
27332 : else
27333 : return 1;
27334 : }
27335 :
27336 45708 : if (!TARGET_ADJUST_UNROLL)
27337 : return nunroll;
27338 :
27339 : /* Count the number of memory references within the loop body.
27340 : This value determines the unrolling factor for bdver3 and bdver4
27341 : architectures. */
27342 8 : subrtx_iterator::array_type array;
27343 8 : bbs = get_loop_body (loop);
27344 24 : for (i = 0; i < loop->num_nodes; i++)
27345 120 : FOR_BB_INSNS (bbs[i], insn)
27346 104 : if (NONDEBUG_INSN_P (insn))
27347 588 : FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
27348 516 : if (const_rtx x = *iter)
27349 516 : if (MEM_P (x))
27350 : {
27351 28 : machine_mode mode = GET_MODE (x);
27352 56 : unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27353 28 : if (n_words > 4)
27354 0 : mem_count += 2;
27355 : else
27356 28 : mem_count += 1;
27357 : }
27358 8 : free (bbs);
27359 :
27360 8 : if (mem_count && mem_count <=32)
27361 8 : return MIN (nunroll, 32 / mem_count);
27362 :
27363 : return nunroll;
27364 8 : }
27365 :
27366 :
27367 : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
27368 :
27369 : static bool
27370 418884 : ix86_float_exceptions_rounding_supported_p (void)
27371 : {
27372 : /* For x87 floating point with standard excess precision handling,
27373 : there is no adddf3 pattern (since x87 floating point only has
27374 : XFmode operations) so the default hook implementation gets this
27375 : wrong. */
27376 418884 : return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
27377 : }
27378 :
27379 : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
27380 :
27381 : static void
27382 7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27383 : {
27384 7054 : if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
27385 : return;
27386 7054 : tree exceptions_var = create_tmp_var_raw (integer_type_node);
27387 7054 : if (TARGET_80387)
27388 : {
27389 7054 : tree fenv_index_type = build_index_type (size_int (6));
27390 7054 : tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
27391 7054 : tree fenv_var = create_tmp_var_raw (fenv_type);
27392 7054 : TREE_ADDRESSABLE (fenv_var) = 1;
27393 7054 : tree fenv_ptr = build_pointer_type (fenv_type);
27394 7054 : tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
27395 7054 : fenv_addr = fold_convert (ptr_type_node, fenv_addr);
27396 7054 : tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
27397 7054 : tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
27398 7054 : tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
27399 7054 : tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
27400 7054 : tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
27401 7054 : tree hold_fnclex = build_call_expr (fnclex, 0);
27402 7054 : fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
27403 : NULL_TREE, NULL_TREE);
27404 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
27405 : hold_fnclex);
27406 7054 : *clear = build_call_expr (fnclex, 0);
27407 7054 : tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
27408 7054 : tree fnstsw_call = build_call_expr (fnstsw, 0);
27409 7054 : tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
27410 : fnstsw_call, NULL_TREE, NULL_TREE);
27411 7054 : tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
27412 7054 : tree update_mod = build4 (TARGET_EXPR, integer_type_node,
27413 : exceptions_var, exceptions_x87,
27414 : NULL_TREE, NULL_TREE);
27415 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node,
27416 : sw_mod, update_mod);
27417 7054 : tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
27418 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
27419 : }
27420 7054 : if (TARGET_SSE && TARGET_SSE_MATH)
27421 : {
27422 7054 : tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
27423 7054 : tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
27424 7054 : tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
27425 7054 : tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
27426 7054 : tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
27427 7054 : tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
27428 : mxcsr_orig_var, stmxcsr_hold_call,
27429 : NULL_TREE, NULL_TREE);
27430 7054 : tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
27431 : mxcsr_orig_var,
27432 : build_int_cst (unsigned_type_node, 0x1f80));
27433 7054 : hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
27434 : build_int_cst (unsigned_type_node, 0xffffffc0));
27435 7054 : tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
27436 : mxcsr_mod_var, hold_mod_val,
27437 : NULL_TREE, NULL_TREE);
27438 7054 : tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27439 7054 : tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
27440 : hold_assign_orig, hold_assign_mod);
27441 7054 : hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
27442 : ldmxcsr_hold_call);
27443 7054 : if (*hold)
27444 7054 : *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
27445 : else
27446 0 : *hold = hold_all;
27447 7054 : tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27448 7054 : if (*clear)
27449 7054 : *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
27450 : ldmxcsr_clear_call);
27451 : else
27452 0 : *clear = ldmxcsr_clear_call;
27453 7054 : tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
27454 7054 : tree exceptions_sse = fold_convert (integer_type_node,
27455 : stxmcsr_update_call);
27456 7054 : if (*update)
27457 : {
27458 7054 : tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
27459 : exceptions_var, exceptions_sse);
27460 7054 : tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
27461 : exceptions_var, exceptions_mod);
27462 7054 : *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
27463 : exceptions_assign);
27464 : }
27465 : else
27466 0 : *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
27467 : exceptions_sse, NULL_TREE, NULL_TREE);
27468 7054 : tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
27469 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27470 : ldmxcsr_update_call);
27471 : }
27472 7054 : tree atomic_feraiseexcept
27473 7054 : = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
27474 7054 : tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
27475 : 1, exceptions_var);
27476 7054 : *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27477 : atomic_feraiseexcept_call);
27478 : }
27479 :
27480 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27481 : /* For i386, common symbol is local only for non-PIE binaries. For
27482 : x86-64, common symbol is local only for non-PIE binaries or linker
27483 : supports copy reloc in PIE binaries. */
27484 :
27485 : static bool
27486 773860184 : ix86_binds_local_p (const_tree exp)
27487 : {
27488 773860184 : bool direct_extern_access
27489 773860184 : = (ix86_direct_extern_access
27490 1544201800 : && !(VAR_OR_FUNCTION_DECL_P (exp)
27491 770341616 : && lookup_attribute ("nodirect_extern_access",
27492 770341616 : DECL_ATTRIBUTES (exp))));
27493 773860184 : if (!direct_extern_access)
27494 1225 : ix86_has_no_direct_extern_access = true;
27495 773860184 : return default_binds_local_p_3 (exp, flag_shlib != 0, true,
27496 : direct_extern_access,
27497 : (direct_extern_access
27498 773858959 : && (!flag_pic
27499 132684209 : || (TARGET_64BIT
27500 773860184 : && HAVE_LD_PIE_COPYRELOC != 0))));
27501 : }
27502 :
27503 : /* If flag_pic or ix86_direct_extern_access is false, then neither
27504 : local nor global relocs should be placed in readonly memory. */
27505 :
27506 : static int
27507 5141460 : ix86_reloc_rw_mask (void)
27508 : {
27509 5141460 : return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
27510 : }
27511 : #endif
27512 :
27513 : /* Return true iff ADDR can be used as a symbolic base address. */
27514 :
27515 : static bool
27516 3004 : symbolic_base_address_p (rtx addr)
27517 : {
27518 0 : if (SYMBOL_REF_P (addr))
27519 : return true;
27520 :
27521 2980 : if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
27522 0 : return true;
27523 :
27524 : return false;
27525 : }
27526 :
27527 : /* Return true iff ADDR can be used as a base address. */
27528 :
27529 : static bool
27530 4577 : base_address_p (rtx addr)
27531 : {
27532 0 : if (REG_P (addr))
27533 : return true;
27534 :
27535 2830 : if (symbolic_base_address_p (addr))
27536 0 : return true;
27537 :
27538 : return false;
27539 : }
27540 :
27541 : /* If MEM is in the form of [(base+symbase)+offset], extract the three
27542 : parts of address and set to BASE, SYMBASE and OFFSET, otherwise
27543 : return false. */
27544 :
27545 : static bool
27546 2953 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
27547 : {
27548 2953 : rtx addr;
27549 :
27550 2953 : gcc_assert (MEM_P (mem));
27551 :
27552 2953 : addr = XEXP (mem, 0);
27553 :
27554 2953 : if (GET_CODE (addr) == CONST)
27555 10 : addr = XEXP (addr, 0);
27556 :
27557 2953 : if (base_address_p (addr))
27558 : {
27559 1329 : *base = addr;
27560 1329 : *symbase = const0_rtx;
27561 1329 : *offset = const0_rtx;
27562 1329 : return true;
27563 : }
27564 :
27565 1624 : if (GET_CODE (addr) == PLUS
27566 1624 : && base_address_p (XEXP (addr, 0)))
27567 : {
27568 442 : rtx addend = XEXP (addr, 1);
27569 :
27570 442 : if (GET_CODE (addend) == CONST)
27571 0 : addend = XEXP (addend, 0);
27572 :
27573 442 : if (CONST_INT_P (addend))
27574 : {
27575 268 : *base = XEXP (addr, 0);
27576 268 : *symbase = const0_rtx;
27577 268 : *offset = addend;
27578 268 : return true;
27579 : }
27580 :
27581 : /* Also accept REG + symbolic ref, with or without a CONST_INT
27582 : offset. */
27583 174 : if (REG_P (XEXP (addr, 0)))
27584 : {
27585 174 : if (symbolic_base_address_p (addend))
27586 : {
27587 0 : *base = XEXP (addr, 0);
27588 0 : *symbase = addend;
27589 0 : *offset = const0_rtx;
27590 0 : return true;
27591 : }
27592 :
27593 174 : if (GET_CODE (addend) == PLUS
27594 0 : && symbolic_base_address_p (XEXP (addend, 0))
27595 174 : && CONST_INT_P (XEXP (addend, 1)))
27596 : {
27597 0 : *base = XEXP (addr, 0);
27598 0 : *symbase = XEXP (addend, 0);
27599 0 : *offset = XEXP (addend, 1);
27600 0 : return true;
27601 : }
27602 : }
27603 : }
27604 :
27605 : return false;
27606 : }
27607 :
27608 : /* Given OPERANDS of consecutive load/store, check if we can merge
27609 : them into move multiple. LOAD is true if they are load instructions.
27610 : MODE is the mode of memory operands. */
27611 :
27612 : bool
27613 1618 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
27614 : machine_mode mode)
27615 : {
27616 1618 : HOST_WIDE_INT offval_1, offval_2, msize;
27617 1618 : rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
27618 : symbase_1, symbase_2, offset_1, offset_2;
27619 :
27620 1618 : if (load)
27621 : {
27622 1322 : mem_1 = operands[1];
27623 1322 : mem_2 = operands[3];
27624 1322 : reg_1 = operands[0];
27625 1322 : reg_2 = operands[2];
27626 : }
27627 : else
27628 : {
27629 296 : mem_1 = operands[0];
27630 296 : mem_2 = operands[2];
27631 296 : reg_1 = operands[1];
27632 296 : reg_2 = operands[3];
27633 : }
27634 :
27635 1618 : gcc_assert (REG_P (reg_1) && REG_P (reg_2));
27636 :
27637 1618 : if (REGNO (reg_1) != REGNO (reg_2))
27638 : return false;
27639 :
27640 : /* Check if the addresses are in the form of [base+offset]. */
27641 1616 : if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
27642 : return false;
27643 1337 : if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
27644 : return false;
27645 :
27646 : /* Check if the bases are the same. */
27647 260 : if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
27648 115 : return false;
27649 :
27650 145 : offval_1 = INTVAL (offset_1);
27651 145 : offval_2 = INTVAL (offset_2);
27652 145 : msize = GET_MODE_SIZE (mode);
27653 : /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
27654 145 : if (offval_1 + msize != offval_2)
27655 : return false;
27656 :
27657 : return true;
27658 : }
27659 :
27660 : /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27661 :
27662 : static bool
27663 363920 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
27664 : optimization_type opt_type)
27665 : {
27666 363920 : switch (op)
27667 : {
27668 231 : case asin_optab:
27669 231 : case acos_optab:
27670 231 : case log1p_optab:
27671 231 : case exp_optab:
27672 231 : case exp10_optab:
27673 231 : case exp2_optab:
27674 231 : case expm1_optab:
27675 231 : case ldexp_optab:
27676 231 : case scalb_optab:
27677 231 : case round_optab:
27678 231 : case lround_optab:
27679 231 : return opt_type == OPTIMIZE_FOR_SPEED;
27680 :
27681 286 : case rint_optab:
27682 286 : if (SSE_FLOAT_MODE_P (mode1)
27683 139 : && TARGET_SSE_MATH
27684 127 : && !flag_trapping_math
27685 21 : && !TARGET_SSE4_1
27686 : && mode1 != HFmode)
27687 21 : return opt_type == OPTIMIZE_FOR_SPEED;
27688 : return true;
27689 :
27690 1972 : case floor_optab:
27691 1972 : case ceil_optab:
27692 1972 : case btrunc_optab:
27693 1972 : if ((SSE_FLOAT_MODE_P (mode1)
27694 1582 : && TARGET_SSE_MATH
27695 1515 : && TARGET_SSE4_1)
27696 1905 : || mode1 == HFmode)
27697 : return true;
27698 1836 : return opt_type == OPTIMIZE_FOR_SPEED;
27699 :
27700 66 : case rsqrt_optab:
27701 66 : return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
27702 :
27703 : default:
27704 : return true;
27705 : }
27706 : }
27707 :
27708 : /* Address space support.
27709 :
27710 : This is not "far pointers" in the 16-bit sense, but an easy way
27711 : to use %fs and %gs segment prefixes. Therefore:
27712 :
27713 : (a) All address spaces have the same modes,
27714 : (b) All address spaces have the same addresss forms,
27715 : (c) While %fs and %gs are technically subsets of the generic
27716 : address space, they are probably not subsets of each other.
27717 : (d) Since we have no access to the segment base register values
27718 : without resorting to a system call, we cannot convert a
27719 : non-default address space to a default address space.
27720 : Therefore we do not claim %fs or %gs are subsets of generic.
27721 :
27722 : Therefore we can (mostly) use the default hooks. */
27723 :
27724 : /* All use of segmentation is assumed to make address 0 valid. */
27725 :
27726 : static bool
27727 67987938 : ix86_addr_space_zero_address_valid (addr_space_t as)
27728 : {
27729 67987938 : return as != ADDR_SPACE_GENERIC;
27730 : }
27731 :
27732 : static void
27733 778167 : ix86_init_libfuncs (void)
27734 : {
27735 778167 : if (TARGET_64BIT)
27736 : {
27737 763217 : set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
27738 763217 : set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
27739 : }
27740 : else
27741 : {
27742 14950 : set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
27743 14950 : set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
27744 : }
27745 :
27746 : #if TARGET_MACHO
27747 : darwin_rename_builtins ();
27748 : #endif
27749 778167 : }
27750 :
27751 : /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
27752 : FPU, assume that the fpcw is set to extended precision; when using
27753 : only SSE, rounding is correct; when using both SSE and the FPU,
27754 : the rounding precision is indeterminate, since either may be chosen
27755 : apparently at random. */
27756 :
27757 : static enum flt_eval_method
27758 89719083 : ix86_get_excess_precision (enum excess_precision_type type)
27759 : {
27760 89719083 : switch (type)
27761 : {
27762 85724855 : case EXCESS_PRECISION_TYPE_FAST:
27763 : /* The fastest type to promote to will always be the native type,
27764 : whether that occurs with implicit excess precision or
27765 : otherwise. */
27766 85724855 : return TARGET_AVX512FP16
27767 85724855 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
27768 85724855 : : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27769 3994147 : case EXCESS_PRECISION_TYPE_STANDARD:
27770 3994147 : case EXCESS_PRECISION_TYPE_IMPLICIT:
27771 : /* Otherwise, the excess precision we want when we are
27772 : in a standards compliant mode, and the implicit precision we
27773 : provide would be identical were it not for the unpredictable
27774 : cases. */
27775 3994147 : if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
27776 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27777 3988341 : else if (!TARGET_80387)
27778 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27779 3982219 : else if (!TARGET_MIX_SSE_I387)
27780 : {
27781 3982047 : if (!(TARGET_SSE && TARGET_SSE_MATH))
27782 : return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
27783 2993429 : else if (TARGET_SSE2)
27784 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27785 : }
27786 :
27787 : /* If we are in standards compliant mode, but we know we will
27788 : calculate in unpredictable precision, return
27789 : FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
27790 : excess precision if the target can't guarantee it will honor
27791 : it. */
27792 320 : return (type == EXCESS_PRECISION_TYPE_STANDARD
27793 320 : ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
27794 : : FLT_EVAL_METHOD_UNPREDICTABLE);
27795 81 : case EXCESS_PRECISION_TYPE_FLOAT16:
27796 81 : if (TARGET_80387
27797 75 : && !(TARGET_SSE_MATH && TARGET_SSE))
27798 4 : error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
27799 : return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27800 0 : default:
27801 0 : gcc_unreachable ();
27802 : }
27803 :
27804 : return FLT_EVAL_METHOD_UNPREDICTABLE;
27805 : }
27806 :
27807 : /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
27808 : bool
27809 356241 : ix86_bitint_type_info (int n, struct bitint_info *info)
27810 : {
27811 356241 : if (n <= 8)
27812 9031 : info->limb_mode = QImode;
27813 347210 : else if (n <= 16)
27814 1893 : info->limb_mode = HImode;
27815 345317 : else if (n <= 32 || (!TARGET_64BIT && n > 64))
27816 45572 : info->limb_mode = SImode;
27817 : else
27818 299745 : info->limb_mode = DImode;
27819 356241 : info->abi_limb_mode = info->limb_mode;
27820 356241 : info->big_endian = false;
27821 356241 : info->extended = bitint_ext_undef;
27822 356241 : return true;
27823 : }
27824 :
27825 : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
27826 : or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
27827 : based on long double bits, go with the default one for the others. */
27828 :
27829 : static machine_mode
27830 3656455 : ix86_c_mode_for_floating_type (enum tree_index ti)
27831 : {
27832 3656455 : if (ti == TI_LONG_DOUBLE_TYPE)
27833 610377 : return (TARGET_LONG_DOUBLE_64 ? DFmode
27834 610345 : : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
27835 3046078 : return default_mode_for_floating_type (ti);
27836 : }
27837 :
27838 : /* Returns modified FUNCTION_TYPE for cdtor callabi. */
27839 : tree
27840 13904 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
27841 : {
27842 13904 : if (TARGET_64BIT
27843 71 : || TARGET_RTD
27844 13975 : || ix86_function_type_abi (fntype) != MS_ABI)
27845 13904 : return fntype;
27846 : /* For 32-bit MS ABI add thiscall attribute. */
27847 0 : tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
27848 0 : TYPE_ATTRIBUTES (fntype));
27849 0 : return build_type_attribute_variant (fntype, attribs);
27850 : }
27851 :
27852 : /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
27853 : decrements by exactly 2 no matter what the position was, there is no pushb.
27854 :
27855 : But as CIE data alignment factor on this arch is -4 for 32bit targets
27856 : and -8 for 64bit targets, we need to make sure all stack pointer adjustments
27857 : are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
27858 :
27859 : poly_int64
27860 273906841 : ix86_push_rounding (poly_int64 bytes)
27861 : {
27862 353608129 : return ROUND_UP (bytes, UNITS_PER_WORD);
27863 : }
27864 :
27865 : /* Use 8 bits metadata start from bit48 for LAM_U48,
27866 : 6 bits metadat start from bit57 for LAM_U57. */
27867 : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
27868 : ? 48 \
27869 : : (ix86_lam_type == lam_u57 ? 57 : 0))
27870 : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
27871 : ? 8 \
27872 : : (ix86_lam_type == lam_u57 ? 6 : 0))
27873 :
27874 : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
27875 : bool
27876 6241371 : ix86_memtag_can_tag_addresses ()
27877 : {
27878 6241371 : return ix86_lam_type != lam_none && TARGET_LP64;
27879 : }
27880 :
27881 : /* Implement TARGET_MEMTAG_TAG_BITSIZE. */
27882 : unsigned char
27883 435 : ix86_memtag_tag_bitsize ()
27884 : {
27885 435 : return IX86_HWASAN_TAG_SIZE;
27886 : }
27887 :
27888 : /* Implement TARGET_MEMTAG_SET_TAG. */
27889 : rtx
27890 103 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
27891 : {
27892 : /* default_memtag_insert_random_tag may
27893 : generate tag with value more than 6 bits. */
27894 103 : if (ix86_lam_type == lam_u57)
27895 : {
27896 103 : unsigned HOST_WIDE_INT and_imm
27897 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27898 :
27899 103 : emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27900 : }
27901 103 : tag = expand_simple_binop (Pmode, ASHIFT, tag,
27902 103 : GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27903 : /* unsignedp = */1, OPTAB_WIDEN);
27904 103 : rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27905 : /* unsignedp = */1, OPTAB_DIRECT);
27906 103 : return ret;
27907 : }
27908 :
27909 : /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27910 : rtx
27911 174 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27912 : {
27913 174 : rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27914 174 : GEN_INT (IX86_HWASAN_SHIFT), target,
27915 : /* unsignedp = */0,
27916 : OPTAB_DIRECT);
27917 174 : rtx ret = gen_reg_rtx (QImode);
27918 : /* Mask off bit63 when LAM_U57. */
27919 174 : if (ix86_lam_type == lam_u57)
27920 : {
27921 174 : unsigned HOST_WIDE_INT and_imm
27922 : = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27923 174 : emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27924 174 : gen_int_mode (and_imm, QImode)));
27925 : }
27926 : else
27927 0 : emit_move_insn (ret, gen_lowpart (QImode, tag));
27928 174 : return ret;
27929 : }
27930 :
27931 : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27932 : rtx
27933 111 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27934 : {
27935 : /* Leave bit63 alone. */
27936 111 : rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27937 111 : + (HOST_WIDE_INT_1U << 63) - 1),
27938 111 : Pmode);
27939 111 : rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27940 : tag_mask, target, true,
27941 : OPTAB_DIRECT);
27942 111 : gcc_assert (untagged_base);
27943 111 : return untagged_base;
27944 : }
27945 :
27946 : /* Implement TARGET_MEMTAG_ADD_TAG. */
27947 : rtx
27948 87 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27949 : {
27950 87 : rtx base_tag = gen_reg_rtx (QImode);
27951 87 : rtx base_addr = gen_reg_rtx (Pmode);
27952 87 : rtx tagged_addr = gen_reg_rtx (Pmode);
27953 87 : rtx new_tag = gen_reg_rtx (QImode);
27954 174 : unsigned HOST_WIDE_INT and_imm
27955 87 : = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27956 :
27957 : /* When there's "overflow" in tag adding,
27958 : need to mask the most significant bit off. */
27959 87 : emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
27960 87 : emit_move_insn (base_addr,
27961 : ix86_memtag_untagged_pointer (base, NULL_RTX));
27962 87 : emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27963 87 : emit_move_insn (new_tag, base_tag);
27964 87 : emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27965 87 : emit_move_insn (tagged_addr,
27966 : ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
27967 87 : return plus_constant (Pmode, tagged_addr, offset);
27968 : }
27969 :
27970 : /* Implement TARGET_HAVE_CCMP. */
27971 : static bool
27972 8079737 : ix86_have_ccmp ()
27973 : {
27974 8079737 : return (bool) TARGET_APX_CCMP;
27975 : }
27976 :
27977 : /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27978 : static bool
27979 4591201 : ix86_mode_can_transfer_bits (machine_mode mode)
27980 : {
27981 4591201 : if (GET_MODE_CLASS (mode) == MODE_FLOAT
27982 4544198 : || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
27983 112540 : switch (GET_MODE_INNER (mode))
27984 : {
27985 54066 : case E_SFmode:
27986 54066 : case E_DFmode:
27987 : /* These suffer from normalization upon load when not using SSE. */
27988 54066 : return !(ix86_fpmath & FPMATH_387);
27989 : default:
27990 : return true;
27991 : }
27992 :
27993 : return true;
27994 : }
27995 :
27996 : /* Implement TARGET_REDZONE_CLOBBER. */
27997 : static rtx
27998 2 : ix86_redzone_clobber ()
27999 : {
28000 2 : cfun->machine->asm_redzone_clobber_seen = true;
28001 2 : if (ix86_using_red_zone ())
28002 : {
28003 2 : rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
28004 2 : rtx mem = gen_rtx_MEM (BLKmode, base);
28005 2 : set_mem_size (mem, RED_ZONE_SIZE);
28006 2 : return mem;
28007 : }
28008 : return NULL_RTX;
28009 : }
28010 :
28011 : /* Target-specific selftests. */
28012 :
28013 : #if CHECKING_P
28014 :
28015 : namespace selftest {
28016 :
28017 : /* Verify that hard regs are dumped as expected (in compact mode). */
28018 :
28019 : static void
28020 4 : ix86_test_dumping_hard_regs ()
28021 : {
28022 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
28023 4 : ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
28024 4 : }
28025 :
28026 : /* Test dumping an insn with repeated references to the same SCRATCH,
28027 : to verify the rtx_reuse code. */
28028 :
28029 : static void
28030 4 : ix86_test_dumping_memory_blockage ()
28031 : {
28032 4 : set_new_first_and_last_insn (NULL, NULL);
28033 :
28034 4 : rtx pat = gen_memory_blockage ();
28035 4 : rtx_reuse_manager r;
28036 4 : r.preprocess (pat);
28037 :
28038 : /* Verify that the repeated references to the SCRATCH show use
28039 : reuse IDS. The first should be prefixed with a reuse ID,
28040 : and the second should be dumped as a "reuse_rtx" of that ID.
28041 : The expected string assumes Pmode == DImode. */
28042 4 : if (Pmode == DImode)
28043 4 : ASSERT_RTL_DUMP_EQ_WITH_REUSE
28044 : ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
28045 : " (unspec:BLK [\n"
28046 : " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
28047 : " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
28048 4 : }
28049 :
28050 : /* Verify loading an RTL dump; specifically a dump of copying
28051 : a param on x86_64 from a hard reg into the frame.
28052 : This test is target-specific since the dump contains target-specific
28053 : hard reg names. */
28054 :
28055 : static void
28056 4 : ix86_test_loading_dump_fragment_1 ()
28057 : {
28058 4 : rtl_dump_test t (SELFTEST_LOCATION,
28059 4 : locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
28060 :
28061 4 : rtx_insn *insn = get_insn_by_uid (1);
28062 :
28063 : /* The block structure and indentation here is purely for
28064 : readability; it mirrors the structure of the rtx. */
28065 4 : tree mem_expr;
28066 4 : {
28067 4 : rtx pat = PATTERN (insn);
28068 4 : ASSERT_EQ (SET, GET_CODE (pat));
28069 4 : {
28070 4 : rtx dest = SET_DEST (pat);
28071 4 : ASSERT_EQ (MEM, GET_CODE (dest));
28072 : /* Verify the "/c" was parsed. */
28073 4 : ASSERT_TRUE (RTX_FLAG (dest, call));
28074 4 : ASSERT_EQ (SImode, GET_MODE (dest));
28075 4 : {
28076 4 : rtx addr = XEXP (dest, 0);
28077 4 : ASSERT_EQ (PLUS, GET_CODE (addr));
28078 4 : ASSERT_EQ (DImode, GET_MODE (addr));
28079 4 : {
28080 4 : rtx lhs = XEXP (addr, 0);
28081 : /* Verify that the "frame" REG was consolidated. */
28082 4 : ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
28083 : }
28084 4 : {
28085 4 : rtx rhs = XEXP (addr, 1);
28086 4 : ASSERT_EQ (CONST_INT, GET_CODE (rhs));
28087 4 : ASSERT_EQ (-4, INTVAL (rhs));
28088 : }
28089 : }
28090 : /* Verify the "[1 i+0 S4 A32]" was parsed. */
28091 4 : ASSERT_EQ (1, MEM_ALIAS_SET (dest));
28092 : /* "i" should have been handled by synthesizing a global int
28093 : variable named "i". */
28094 4 : mem_expr = MEM_EXPR (dest);
28095 4 : ASSERT_NE (mem_expr, NULL);
28096 4 : ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
28097 4 : ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
28098 4 : ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
28099 4 : ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
28100 : /* "+0". */
28101 4 : ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
28102 4 : ASSERT_EQ (0, MEM_OFFSET (dest));
28103 : /* "S4". */
28104 4 : ASSERT_EQ (4, MEM_SIZE (dest));
28105 : /* "A32. */
28106 4 : ASSERT_EQ (32, MEM_ALIGN (dest));
28107 : }
28108 4 : {
28109 4 : rtx src = SET_SRC (pat);
28110 4 : ASSERT_EQ (REG, GET_CODE (src));
28111 4 : ASSERT_EQ (SImode, GET_MODE (src));
28112 4 : ASSERT_EQ (5, REGNO (src));
28113 4 : tree reg_expr = REG_EXPR (src);
28114 : /* "i" here should point to the same var as for the MEM_EXPR. */
28115 4 : ASSERT_EQ (reg_expr, mem_expr);
28116 : }
28117 : }
28118 4 : }
28119 :
28120 : /* Verify that the RTL loader copes with a call_insn dump.
28121 : This test is target-specific since the dump contains a target-specific
28122 : hard reg name. */
28123 :
28124 : static void
28125 4 : ix86_test_loading_call_insn ()
28126 : {
28127 : /* The test dump includes register "xmm0", where requires TARGET_SSE
28128 : to exist. */
28129 4 : if (!TARGET_SSE)
28130 0 : return;
28131 :
28132 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
28133 :
28134 4 : rtx_insn *insn = get_insns ();
28135 4 : ASSERT_EQ (CALL_INSN, GET_CODE (insn));
28136 :
28137 : /* "/j". */
28138 4 : ASSERT_TRUE (RTX_FLAG (insn, jump));
28139 :
28140 4 : rtx pat = PATTERN (insn);
28141 4 : ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
28142 :
28143 : /* Verify REG_NOTES. */
28144 4 : {
28145 : /* "(expr_list:REG_CALL_DECL". */
28146 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
28147 4 : rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
28148 4 : ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
28149 :
28150 : /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
28151 4 : rtx_expr_list *note1 = note0->next ();
28152 4 : ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
28153 :
28154 4 : ASSERT_EQ (NULL, note1->next ());
28155 : }
28156 :
28157 : /* Verify CALL_INSN_FUNCTION_USAGE. */
28158 4 : {
28159 : /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
28160 4 : rtx_expr_list *usage
28161 4 : = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
28162 4 : ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
28163 4 : ASSERT_EQ (DFmode, GET_MODE (usage));
28164 4 : ASSERT_EQ (USE, GET_CODE (usage->element ()));
28165 4 : ASSERT_EQ (NULL, usage->next ());
28166 : }
28167 4 : }
28168 :
28169 : /* Verify that the RTL loader copes a dump from print_rtx_function.
28170 : This test is target-specific since the dump contains target-specific
28171 : hard reg names. */
28172 :
28173 : static void
28174 4 : ix86_test_loading_full_dump ()
28175 : {
28176 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
28177 :
28178 4 : ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28179 :
28180 4 : rtx_insn *insn_1 = get_insn_by_uid (1);
28181 4 : ASSERT_EQ (NOTE, GET_CODE (insn_1));
28182 :
28183 4 : rtx_insn *insn_7 = get_insn_by_uid (7);
28184 4 : ASSERT_EQ (INSN, GET_CODE (insn_7));
28185 4 : ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
28186 :
28187 4 : rtx_insn *insn_15 = get_insn_by_uid (15);
28188 4 : ASSERT_EQ (INSN, GET_CODE (insn_15));
28189 4 : ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
28190 :
28191 : /* Verify crtl->return_rtx. */
28192 4 : ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
28193 4 : ASSERT_EQ (0, REGNO (crtl->return_rtx));
28194 4 : ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
28195 4 : }
28196 :
28197 : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
28198 : In particular, verify that it correctly loads the 2nd operand.
28199 : This test is target-specific since these are machine-specific
28200 : operands (and enums). */
28201 :
28202 : static void
28203 4 : ix86_test_loading_unspec ()
28204 : {
28205 4 : rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
28206 :
28207 4 : ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
28208 :
28209 4 : ASSERT_TRUE (cfun);
28210 :
28211 : /* Test of an UNSPEC. */
28212 4 : rtx_insn *insn = get_insns ();
28213 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28214 4 : rtx set = single_set (insn);
28215 4 : ASSERT_NE (NULL, set);
28216 4 : rtx dst = SET_DEST (set);
28217 4 : ASSERT_EQ (MEM, GET_CODE (dst));
28218 4 : rtx src = SET_SRC (set);
28219 4 : ASSERT_EQ (UNSPEC, GET_CODE (src));
28220 4 : ASSERT_EQ (BLKmode, GET_MODE (src));
28221 4 : ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
28222 :
28223 4 : rtx v0 = XVECEXP (src, 0, 0);
28224 :
28225 : /* Verify that the two uses of the first SCRATCH have pointer
28226 : equality. */
28227 4 : rtx scratch_a = XEXP (dst, 0);
28228 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
28229 :
28230 4 : rtx scratch_b = XEXP (v0, 0);
28231 4 : ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
28232 :
28233 4 : ASSERT_EQ (scratch_a, scratch_b);
28234 :
28235 : /* Verify that the two mems are thus treated as equal. */
28236 4 : ASSERT_TRUE (rtx_equal_p (dst, v0));
28237 :
28238 : /* Verify that the insn is recognized. */
28239 4 : ASSERT_NE(-1, recog_memoized (insn));
28240 :
28241 : /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
28242 4 : insn = NEXT_INSN (insn);
28243 4 : ASSERT_EQ (INSN, GET_CODE (insn));
28244 :
28245 4 : set = single_set (insn);
28246 4 : ASSERT_NE (NULL, set);
28247 :
28248 4 : src = SET_SRC (set);
28249 4 : ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
28250 4 : ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
28251 4 : }
28252 :
28253 : /* Run all target-specific selftests. */
28254 :
28255 : static void
28256 4 : ix86_run_selftests (void)
28257 : {
28258 4 : ix86_test_dumping_hard_regs ();
28259 4 : ix86_test_dumping_memory_blockage ();
28260 :
28261 : /* Various tests of loading RTL dumps, here because they contain
28262 : ix86-isms (e.g. names of hard regs). */
28263 4 : ix86_test_loading_dump_fragment_1 ();
28264 4 : ix86_test_loading_call_insn ();
28265 4 : ix86_test_loading_full_dump ();
28266 4 : ix86_test_loading_unspec ();
28267 4 : }
28268 :
28269 : } // namespace selftest
28270 :
28271 : #endif /* CHECKING_P */
28272 :
28273 : static const scoped_attribute_specs *const ix86_attribute_table[] =
28274 : {
28275 : &ix86_gnu_attribute_table
28276 : };
28277 :
28278 : /* Initialize the GCC target structure. */
28279 : #undef TARGET_RETURN_IN_MEMORY
28280 : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28281 :
28282 : #undef TARGET_LEGITIMIZE_ADDRESS
28283 : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
28284 :
28285 : #undef TARGET_ATTRIBUTE_TABLE
28286 : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28287 : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
28288 : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
28289 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28290 : # undef TARGET_MERGE_DECL_ATTRIBUTES
28291 : # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28292 : #endif
28293 :
28294 : #undef TARGET_INVALID_CONVERSION
28295 : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
28296 :
28297 : #undef TARGET_INVALID_UNARY_OP
28298 : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
28299 :
28300 : #undef TARGET_INVALID_BINARY_OP
28301 : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
28302 :
28303 : #undef TARGET_COMP_TYPE_ATTRIBUTES
28304 : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28305 :
28306 : #undef TARGET_INIT_BUILTINS
28307 : #define TARGET_INIT_BUILTINS ix86_init_builtins
28308 : #undef TARGET_BUILTIN_DECL
28309 : #define TARGET_BUILTIN_DECL ix86_builtin_decl
28310 : #undef TARGET_EXPAND_BUILTIN
28311 : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28312 :
28313 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28314 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
28315 : ix86_builtin_vectorized_function
28316 :
28317 : #undef TARGET_VECTORIZE_BUILTIN_GATHER
28318 : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
28319 :
28320 : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
28321 : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
28322 :
28323 : #undef TARGET_BUILTIN_RECIPROCAL
28324 : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
28325 :
28326 : #undef TARGET_ASM_FUNCTION_EPILOGUE
28327 : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
28328 :
28329 : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
28330 : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
28331 : ix86_print_patchable_function_entry
28332 :
28333 : #undef TARGET_ENCODE_SECTION_INFO
28334 : #ifndef SUBTARGET_ENCODE_SECTION_INFO
28335 : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28336 : #else
28337 : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28338 : #endif
28339 :
28340 : #undef TARGET_ASM_OPEN_PAREN
28341 : #define TARGET_ASM_OPEN_PAREN ""
28342 : #undef TARGET_ASM_CLOSE_PAREN
28343 : #define TARGET_ASM_CLOSE_PAREN ""
28344 :
28345 : #undef TARGET_ASM_BYTE_OP
28346 : #define TARGET_ASM_BYTE_OP ASM_BYTE
28347 :
28348 : #undef TARGET_ASM_ALIGNED_HI_OP
28349 : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28350 : #undef TARGET_ASM_ALIGNED_SI_OP
28351 : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28352 : #ifdef ASM_QUAD
28353 : #undef TARGET_ASM_ALIGNED_DI_OP
28354 : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28355 : #endif
28356 :
28357 : #undef TARGET_PROFILE_BEFORE_PROLOGUE
28358 : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
28359 :
28360 : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
28361 : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
28362 :
28363 : #undef TARGET_ASM_UNALIGNED_HI_OP
28364 : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28365 : #undef TARGET_ASM_UNALIGNED_SI_OP
28366 : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28367 : #undef TARGET_ASM_UNALIGNED_DI_OP
28368 : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28369 :
28370 : #undef TARGET_PRINT_OPERAND
28371 : #define TARGET_PRINT_OPERAND ix86_print_operand
28372 : #undef TARGET_PRINT_OPERAND_ADDRESS
28373 : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
28374 : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
28375 : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
28376 : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
28377 : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
28378 :
28379 : #undef TARGET_SCHED_INIT_GLOBAL
28380 : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
28381 : #undef TARGET_SCHED_ADJUST_COST
28382 : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28383 : #undef TARGET_SCHED_ISSUE_RATE
28384 : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28385 : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28386 : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28387 : ia32_multipass_dfa_lookahead
28388 : #undef TARGET_SCHED_MACRO_FUSION_P
28389 : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
28390 : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
28391 : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
28392 :
28393 : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28394 : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28395 :
28396 : #undef TARGET_MEMMODEL_CHECK
28397 : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
28398 :
28399 : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
28400 : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
28401 :
28402 : #ifdef HAVE_AS_TLS
28403 : #undef TARGET_HAVE_TLS
28404 : #define TARGET_HAVE_TLS true
28405 : #endif
28406 : #undef TARGET_CANNOT_FORCE_CONST_MEM
28407 : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28408 : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28409 : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28410 :
28411 : #undef TARGET_DELEGITIMIZE_ADDRESS
28412 : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28413 :
28414 : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
28415 : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
28416 :
28417 : #undef TARGET_MS_BITFIELD_LAYOUT_P
28418 : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28419 :
28420 : #if TARGET_MACHO
28421 : #undef TARGET_BINDS_LOCAL_P
28422 : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28423 : #else
28424 : #undef TARGET_BINDS_LOCAL_P
28425 : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
28426 : #endif
28427 : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28428 : #undef TARGET_BINDS_LOCAL_P
28429 : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28430 : #endif
28431 :
28432 : #undef TARGET_ASM_OUTPUT_MI_THUNK
28433 : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28434 : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28435 : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28436 :
28437 : #undef TARGET_ASM_FILE_START
28438 : #define TARGET_ASM_FILE_START x86_file_start
28439 :
28440 : #undef TARGET_OPTION_OVERRIDE
28441 : #define TARGET_OPTION_OVERRIDE ix86_option_override
28442 :
28443 : #undef TARGET_REGISTER_MOVE_COST
28444 : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
28445 : #undef TARGET_MEMORY_MOVE_COST
28446 : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
28447 : #undef TARGET_RTX_COSTS
28448 : #define TARGET_RTX_COSTS ix86_rtx_costs
28449 : #undef TARGET_INSN_COST
28450 : #define TARGET_INSN_COST ix86_insn_cost
28451 : #undef TARGET_ADDRESS_COST
28452 : #define TARGET_ADDRESS_COST ix86_address_cost
28453 :
28454 : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
28455 : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
28456 : ix86_use_by_pieces_infrastructure_p
28457 :
28458 : #undef TARGET_OVERLAP_OP_BY_PIECES_P
28459 : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
28460 :
28461 : #undef TARGET_FLAGS_REGNUM
28462 : #define TARGET_FLAGS_REGNUM FLAGS_REG
28463 : #undef TARGET_FIXED_CONDITION_CODE_REGS
28464 : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28465 : #undef TARGET_CC_MODES_COMPATIBLE
28466 : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28467 :
28468 : #undef TARGET_MACHINE_DEPENDENT_REORG
28469 : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28470 :
28471 : #undef TARGET_BUILD_BUILTIN_VA_LIST
28472 : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28473 :
28474 : #undef TARGET_FOLD_BUILTIN
28475 : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
28476 :
28477 : #undef TARGET_GIMPLE_FOLD_BUILTIN
28478 : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
28479 :
28480 : #undef TARGET_COMPARE_VERSION_PRIORITY
28481 : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
28482 :
28483 : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
28484 : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
28485 : ix86_generate_version_dispatcher_body
28486 :
28487 : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
28488 : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
28489 : ix86_get_function_versions_dispatcher
28490 :
28491 : #undef TARGET_ENUM_VA_LIST_P
28492 : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
28493 :
28494 : #undef TARGET_FN_ABI_VA_LIST
28495 : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28496 :
28497 : #undef TARGET_CANONICAL_VA_LIST_TYPE
28498 : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28499 :
28500 : #undef TARGET_EXPAND_BUILTIN_VA_START
28501 : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28502 :
28503 : #undef TARGET_MD_ASM_ADJUST
28504 : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
28505 :
28506 : #undef TARGET_C_EXCESS_PRECISION
28507 : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
28508 : #undef TARGET_C_BITINT_TYPE_INFO
28509 : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
28510 : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
28511 : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
28512 : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
28513 : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
28514 : #undef TARGET_PROMOTE_PROTOTYPES
28515 : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28516 : #undef TARGET_PUSH_ARGUMENT
28517 : #define TARGET_PUSH_ARGUMENT ix86_push_argument
28518 : #undef TARGET_SETUP_INCOMING_VARARGS
28519 : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28520 : #undef TARGET_MUST_PASS_IN_STACK
28521 : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28522 : #undef TARGET_OVERALIGNED_STACK_SLOT_REQUIRED
28523 : #define TARGET_OVERALIGNED_STACK_SLOT_REQUIRED ix86_overaligned_stack_slot_required
28524 : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
28525 : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
28526 : #undef TARGET_FUNCTION_ARG_ADVANCE
28527 : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
28528 : #undef TARGET_FUNCTION_ARG
28529 : #define TARGET_FUNCTION_ARG ix86_function_arg
28530 : #undef TARGET_INIT_PIC_REG
28531 : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
28532 : #undef TARGET_USE_PSEUDO_PIC_REG
28533 : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
28534 : #undef TARGET_FUNCTION_ARG_BOUNDARY
28535 : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
28536 : #undef TARGET_PASS_BY_REFERENCE
28537 : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28538 : #undef TARGET_INTERNAL_ARG_POINTER
28539 : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28540 : #undef TARGET_UPDATE_STACK_BOUNDARY
28541 : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28542 : #undef TARGET_GET_DRAP_RTX
28543 : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28544 : #undef TARGET_STRICT_ARGUMENT_NAMING
28545 : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28546 : #undef TARGET_STATIC_CHAIN
28547 : #define TARGET_STATIC_CHAIN ix86_static_chain
28548 : #undef TARGET_TRAMPOLINE_INIT
28549 : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
28550 : #undef TARGET_RETURN_POPS_ARGS
28551 : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
28552 :
28553 : #undef TARGET_WARN_FUNC_RETURN
28554 : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
28555 :
28556 : #undef TARGET_LEGITIMATE_COMBINED_INSN
28557 : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
28558 :
28559 : #undef TARGET_ASAN_SHADOW_OFFSET
28560 : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
28561 :
28562 : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
28563 : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28564 :
28565 : #undef TARGET_SCALAR_MODE_SUPPORTED_P
28566 : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28567 :
28568 : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
28569 : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
28570 : ix86_libgcc_floating_mode_supported_p
28571 :
28572 : #undef TARGET_VECTOR_MODE_SUPPORTED_P
28573 : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28574 :
28575 : #undef TARGET_C_MODE_FOR_SUFFIX
28576 : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
28577 :
28578 : #ifdef HAVE_AS_TLS
28579 : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
28580 : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
28581 : #endif
28582 :
28583 : #ifdef SUBTARGET_INSERT_ATTRIBUTES
28584 : #undef TARGET_INSERT_ATTRIBUTES
28585 : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
28586 : #endif
28587 :
28588 : #undef TARGET_MANGLE_TYPE
28589 : #define TARGET_MANGLE_TYPE ix86_mangle_type
28590 :
28591 : #undef TARGET_EMIT_SUPPORT_TINFOS
28592 : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
28593 :
28594 : #undef TARGET_STACK_PROTECT_GUARD
28595 : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
28596 :
28597 : #undef TARGET_STACK_PROTECT_GUARD_SYMBOL_P
28598 : #define TARGET_STACK_PROTECT_GUARD_SYMBOL_P \
28599 : ix86_stack_protect_guard_symbol_p
28600 :
28601 : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
28602 : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
28603 : ix86_stack_protect_runtime_enabled_p
28604 :
28605 : #if !TARGET_MACHO
28606 : #undef TARGET_STACK_PROTECT_FAIL
28607 : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
28608 : #endif
28609 :
28610 : #undef TARGET_FUNCTION_VALUE
28611 : #define TARGET_FUNCTION_VALUE ix86_function_value
28612 :
28613 : #undef TARGET_FUNCTION_VALUE_REGNO_P
28614 : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
28615 :
28616 : #undef TARGET_ZERO_CALL_USED_REGS
28617 : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
28618 :
28619 : #undef TARGET_PROMOTE_FUNCTION_MODE
28620 : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
28621 :
28622 : #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
28623 : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
28624 :
28625 : #undef TARGET_MEMBER_TYPE_FORCES_BLK
28626 : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
28627 :
28628 : #undef TARGET_INSTANTIATE_DECLS
28629 : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
28630 :
28631 : #undef TARGET_SECONDARY_RELOAD
28632 : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
28633 : #undef TARGET_SECONDARY_MEMORY_NEEDED
28634 : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
28635 : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
28636 : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
28637 :
28638 : #undef TARGET_CLASS_MAX_NREGS
28639 : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
28640 :
28641 : #undef TARGET_PREFERRED_RELOAD_CLASS
28642 : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
28643 : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
28644 : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
28645 : /* When this hook returns true for MODE, the compiler allows
28646 : registers explicitly used in the rtl to be used as spill registers
28647 : but prevents the compiler from extending the lifetime of these
28648 : registers. */
28649 : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
28650 : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
28651 : #undef TARGET_CLASS_LIKELY_SPILLED_P
28652 : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
28653 : #undef TARGET_CALLEE_SAVE_COST
28654 : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
28655 :
28656 : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
28657 : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
28658 : ix86_builtin_vectorization_cost
28659 : #undef TARGET_VECTORIZE_VEC_PERM_CONST
28660 : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
28661 : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
28662 : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
28663 : ix86_preferred_simd_mode
28664 : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
28665 : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
28666 : ix86_split_reduction
28667 : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
28668 : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
28669 : ix86_autovectorize_vector_modes
28670 : #undef TARGET_VECTORIZE_GET_MASK_MODE
28671 : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
28672 : #undef TARGET_VECTORIZE_CREATE_COSTS
28673 : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
28674 :
28675 : #undef TARGET_SET_CURRENT_FUNCTION
28676 : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
28677 :
28678 : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
28679 : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
28680 :
28681 : #undef TARGET_OPTION_SAVE
28682 : #define TARGET_OPTION_SAVE ix86_function_specific_save
28683 :
28684 : #undef TARGET_OPTION_RESTORE
28685 : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
28686 :
28687 : #undef TARGET_OPTION_POST_STREAM_IN
28688 : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
28689 :
28690 : #undef TARGET_OPTION_PRINT
28691 : #define TARGET_OPTION_PRINT ix86_function_specific_print
28692 :
28693 : #undef TARGET_CAN_INLINE_P
28694 : #define TARGET_CAN_INLINE_P ix86_can_inline_p
28695 :
28696 : #undef TARGET_LEGITIMATE_ADDRESS_P
28697 : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
28698 :
28699 : #undef TARGET_REGISTER_PRIORITY
28700 : #define TARGET_REGISTER_PRIORITY ix86_register_priority
28701 :
28702 : #undef TARGET_REGISTER_USAGE_LEVELING_P
28703 : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
28704 :
28705 : #undef TARGET_LEGITIMATE_CONSTANT_P
28706 : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
28707 :
28708 : #undef TARGET_COMPUTE_FRAME_LAYOUT
28709 : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
28710 :
28711 : #undef TARGET_FRAME_POINTER_REQUIRED
28712 : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
28713 :
28714 : #undef TARGET_CAN_ELIMINATE
28715 : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
28716 :
28717 : #undef TARGET_EXTRA_LIVE_ON_ENTRY
28718 : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
28719 :
28720 : #undef TARGET_ASM_CODE_END
28721 : #define TARGET_ASM_CODE_END ix86_code_end
28722 :
28723 : #undef TARGET_CONDITIONAL_REGISTER_USAGE
28724 : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
28725 :
28726 : #undef TARGET_CANONICALIZE_COMPARISON
28727 : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
28728 :
28729 : #undef TARGET_LOOP_UNROLL_ADJUST
28730 : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
28731 :
28732 : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
28733 : #undef TARGET_SPILL_CLASS
28734 : #define TARGET_SPILL_CLASS ix86_spill_class
28735 :
28736 : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
28737 : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
28738 : ix86_simd_clone_compute_vecsize_and_simdlen
28739 :
28740 : #undef TARGET_SIMD_CLONE_ADJUST
28741 : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
28742 :
28743 : #undef TARGET_SIMD_CLONE_USABLE
28744 : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
28745 :
28746 : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
28747 : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
28748 :
28749 : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
28750 : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
28751 : ix86_float_exceptions_rounding_supported_p
28752 :
28753 : #undef TARGET_MODE_EMIT
28754 : #define TARGET_MODE_EMIT ix86_emit_mode_set
28755 :
28756 : #undef TARGET_MODE_NEEDED
28757 : #define TARGET_MODE_NEEDED ix86_mode_needed
28758 :
28759 : #undef TARGET_MODE_AFTER
28760 : #define TARGET_MODE_AFTER ix86_mode_after
28761 :
28762 : #undef TARGET_MODE_ENTRY
28763 : #define TARGET_MODE_ENTRY ix86_mode_entry
28764 :
28765 : #undef TARGET_MODE_EXIT
28766 : #define TARGET_MODE_EXIT ix86_mode_exit
28767 :
28768 : #undef TARGET_MODE_PRIORITY
28769 : #define TARGET_MODE_PRIORITY ix86_mode_priority
28770 :
28771 : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
28772 : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
28773 :
28774 : #undef TARGET_OFFLOAD_OPTIONS
28775 : #define TARGET_OFFLOAD_OPTIONS \
28776 : ix86_offload_options
28777 :
28778 : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
28779 : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
28780 :
28781 : #undef TARGET_OPTAB_SUPPORTED_P
28782 : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
28783 :
28784 : #undef TARGET_HARD_REGNO_SCRATCH_OK
28785 : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
28786 :
28787 : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
28788 : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
28789 :
28790 : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
28791 : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
28792 :
28793 : #undef TARGET_INIT_LIBFUNCS
28794 : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
28795 :
28796 : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
28797 : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
28798 :
28799 : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
28800 : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
28801 :
28802 : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
28803 : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
28804 :
28805 : #undef TARGET_HARD_REGNO_NREGS
28806 : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
28807 : #undef TARGET_HARD_REGNO_MODE_OK
28808 : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
28809 :
28810 : #undef TARGET_MODES_TIEABLE_P
28811 : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
28812 :
28813 : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
28814 : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
28815 : ix86_hard_regno_call_part_clobbered
28816 :
28817 : #undef TARGET_INSN_CALLEE_ABI
28818 : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
28819 :
28820 : #undef TARGET_CAN_CHANGE_MODE_CLASS
28821 : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
28822 :
28823 : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
28824 : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
28825 :
28826 : #undef TARGET_STATIC_RTX_ALIGNMENT
28827 : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
28828 : #undef TARGET_CONSTANT_ALIGNMENT
28829 : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
28830 :
28831 : #undef TARGET_EMPTY_RECORD_P
28832 : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
28833 :
28834 : #undef TARGET_WARN_PARAMETER_PASSING_ABI
28835 : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
28836 :
28837 : #undef TARGET_GET_MULTILIB_ABI_NAME
28838 : #define TARGET_GET_MULTILIB_ABI_NAME \
28839 : ix86_get_multilib_abi_name
28840 :
28841 : #undef TARGET_IFUNC_REF_LOCAL_OK
28842 : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
28843 :
28844 : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
28845 : # undef TARGET_ASM_RELOC_RW_MASK
28846 : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
28847 : #endif
28848 :
28849 : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
28850 : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
28851 :
28852 : #undef TARGET_MEMTAG_ADD_TAG
28853 : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
28854 :
28855 : #undef TARGET_MEMTAG_SET_TAG
28856 : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
28857 :
28858 : #undef TARGET_MEMTAG_EXTRACT_TAG
28859 : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
28860 :
28861 : #undef TARGET_MEMTAG_UNTAGGED_POINTER
28862 : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
28863 :
28864 : #undef TARGET_MEMTAG_TAG_BITSIZE
28865 : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
28866 :
28867 : #undef TARGET_GEN_CCMP_FIRST
28868 : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
28869 :
28870 : #undef TARGET_GEN_CCMP_NEXT
28871 : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
28872 :
28873 : #undef TARGET_HAVE_CCMP
28874 : #define TARGET_HAVE_CCMP ix86_have_ccmp
28875 :
28876 : #undef TARGET_MODE_CAN_TRANSFER_BITS
28877 : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
28878 :
28879 : #undef TARGET_REDZONE_CLOBBER
28880 : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
28881 :
28882 : static bool
28883 95287 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
28884 : {
28885 : #ifdef OPTION_GLIBC
28886 95287 : if (OPTION_GLIBC)
28887 95287 : return (built_in_function)fcode == BUILT_IN_MEMPCPY;
28888 : else
28889 : return false;
28890 : #else
28891 : return false;
28892 : #endif
28893 : }
28894 :
28895 : #undef TARGET_LIBC_HAS_FAST_FUNCTION
28896 : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
28897 :
28898 : static unsigned
28899 78134 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
28900 : bool boundary_p)
28901 : {
28902 : #ifdef OPTION_GLIBC
28903 78134 : bool glibc_p = OPTION_GLIBC;
28904 : #else
28905 : bool glibc_p = false;
28906 : #endif
28907 78134 : if (glibc_p)
28908 : {
28909 : /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28910 78134 : unsigned int libmvec_ret = 0;
28911 78134 : if (!flag_trapping_math
28912 8300 : && flag_unsafe_math_optimizations
28913 3378 : && flag_finite_math_only
28914 3352 : && !flag_signed_zeros
28915 3352 : && !flag_errno_math)
28916 3352 : switch (cfn)
28917 : {
28918 1396 : CASE_CFN_COS:
28919 1396 : CASE_CFN_COS_FN:
28920 1396 : CASE_CFN_SIN:
28921 1396 : CASE_CFN_SIN_FN:
28922 1396 : if (!boundary_p)
28923 : {
28924 : /* With non-default rounding modes, libmvec provides
28925 : complete garbage in results. E.g.
28926 : _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28927 : returns 0.00333309174f rather than 1.40129846e-45f. */
28928 587 : if (flag_rounding_math)
28929 : return ~0U;
28930 : /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28931 : claims libmvec maximum error is 4ulps.
28932 : My own random testing indicates 2ulps for SFmode and
28933 : 0.5ulps for DFmode, but let's go with the 4ulps. */
28934 : libmvec_ret = 4;
28935 : }
28936 : break;
28937 : default:
28938 : break;
28939 : }
28940 78134 : unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28941 : boundary_p);
28942 78134 : return MAX (ret, libmvec_ret);
28943 : }
28944 0 : return default_libm_function_max_error (cfn, mode, boundary_p);
28945 : }
28946 :
28947 : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
28948 : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28949 :
28950 : #if TARGET_MACHO
28951 : static bool
28952 : ix86_cannot_copy_insn_p (rtx_insn *insn)
28953 : {
28954 : if (TARGET_64BIT)
28955 : return false;
28956 :
28957 : rtx set = single_set (insn);
28958 : if (set)
28959 : {
28960 : rtx src = SET_SRC (set);
28961 : if (GET_CODE (src) == UNSPEC
28962 : && XINT (src, 1) == UNSPEC_SET_GOT)
28963 : return true;
28964 : }
28965 : return false;
28966 : }
28967 :
28968 : #undef TARGET_CANNOT_COPY_INSN_P
28969 : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28970 :
28971 : #endif
28972 :
28973 : #if CHECKING_P
28974 : #undef TARGET_RUN_TARGET_SELFTESTS
28975 : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28976 : #endif /* #if CHECKING_P */
28977 :
28978 : #undef TARGET_DOCUMENTATION_NAME
28979 : #define TARGET_DOCUMENTATION_NAME "x86"
28980 :
28981 : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28982 : sbitmap
28983 737990 : ix86_get_separate_components (void)
28984 : {
28985 737990 : HOST_WIDE_INT offset, to_allocate;
28986 737990 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28987 737990 : bitmap_clear (components);
28988 737990 : struct machine_function *m = cfun->machine;
28989 :
28990 737990 : offset = m->frame.stack_pointer_offset;
28991 737990 : to_allocate = offset - m->frame.sse_reg_save_offset;
28992 :
28993 : /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
28994 : Experiments show that APX PPX can speed up the prologue. If the function
28995 : does not exit early during actual execution, then using APX PPX is faster.
28996 : If the function always exits early during actual execution, then shrink
28997 : wrap separate reduces the number of MOV (PUSH/POP) instructions actually
28998 : executed, thus speeding up execution.
28999 : foo:
29000 : movl $1, %eax
29001 : testq %rdi, %rdi
29002 : jne.L60
29003 : ret ---> early return.
29004 : .L60:
29005 : subq $88, %rsp ---> belong to prologue.
29006 : xorl %eax, %eax
29007 : movq %rbx, 40 (%rsp) ---> belong to prologue.
29008 : movq 8 (%rdi), %rbx
29009 : movq %rbp, 48 (%rsp) ---> belong to prologue.
29010 : movq %rdi, %rbp
29011 : testq %rbx, %rbx
29012 : jne.L61
29013 : movq 40 (%rsp), %rbx
29014 : movq 48 (%rsp), %rbp
29015 : addq $88, %rsp
29016 : ret
29017 : .L61:
29018 : movq %r12, 56 (%rsp) ---> belong to prologue.
29019 : movq %r13, 64 (%rsp) ---> belong to prologue.
29020 : movq %r14, 72 (%rsp) ---> belong to prologue.
29021 : ... ...
29022 :
29023 : Disable shrink wrap separate when PPX is enabled. */
29024 737990 : if ((TARGET_APX_PPX && !crtl->calls_eh_return)
29025 737523 : || cfun->machine->func_type != TYPE_NORMAL
29026 : || TARGET_SEH
29027 737425 : || crtl->stack_realign_needed
29028 727821 : || m->call_ms2sysv)
29029 : return components;
29030 :
29031 : /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
29032 : Disable shrink wrap separate when MOV is prohibited. */
29033 725899 : if (save_regs_using_push_pop (to_allocate))
29034 : return components;
29035 :
29036 32748276 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29037 32396144 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29038 : {
29039 : /* Skip registers with large offsets, where a pseudo may be needed. */
29040 608499 : if (IN_RANGE (offset, -0x8000, 0x7fff))
29041 607433 : bitmap_set_bit (components, regno);
29042 654457 : offset += UNITS_PER_WORD;
29043 : }
29044 :
29045 : /* Don't mess with the following registers. */
29046 352132 : if (frame_pointer_needed)
29047 6349 : bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29048 :
29049 352132 : if (crtl->drap_reg)
29050 129 : bitmap_clear_bit (components, REGNO (crtl->drap_reg));
29051 :
29052 352132 : if (pic_offset_table_rtx)
29053 29890 : bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
29054 :
29055 : return components;
29056 : }
29057 :
29058 : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29059 : sbitmap
29060 9380733 : ix86_components_for_bb (basic_block bb)
29061 : {
29062 9380733 : bitmap in = DF_LIVE_IN (bb);
29063 9380733 : bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29064 9380733 : bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29065 :
29066 9380733 : sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
29067 9380733 : bitmap_clear (components);
29068 :
29069 9380733 : function_abi_aggregator callee_abis;
29070 9380733 : rtx_insn *insn;
29071 110329227 : FOR_BB_INSNS (bb, insn)
29072 100948494 : if (CALL_P (insn))
29073 3080334 : callee_abis.note_callee_abi (insn_callee_abi (insn));
29074 9380733 : HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
29075 :
29076 : /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
29077 872408169 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29078 863027436 : if (!fixed_regs[regno]
29079 863027436 : && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
29080 438604005 : || bitmap_bit_p (in, regno)
29081 413106099 : || bitmap_bit_p (gen, regno)
29082 400561765 : || bitmap_bit_p (kill, regno)))
29083 38307633 : bitmap_set_bit (components, regno);
29084 :
29085 9380733 : return components;
29086 : }
29087 :
29088 : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29089 : void
29090 478144 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
29091 : {
29092 : /* Nothing to do for x86. */
29093 478144 : }
29094 :
29095 : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29096 : void
29097 166473 : ix86_emit_prologue_components (sbitmap components)
29098 : {
29099 166473 : HOST_WIDE_INT cfa_offset;
29100 166473 : struct machine_function *m = cfun->machine;
29101 :
29102 166473 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
29103 166473 : - m->frame.stack_pointer_offset;
29104 15481989 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29105 15315516 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29106 : {
29107 760453 : if (bitmap_bit_p (components, regno))
29108 194522 : ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
29109 810322 : cfa_offset -= UNITS_PER_WORD;
29110 : }
29111 166473 : }
29112 :
29113 : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29114 : void
29115 148615 : ix86_emit_epilogue_components (sbitmap components)
29116 : {
29117 148615 : HOST_WIDE_INT cfa_offset;
29118 148615 : struct machine_function *m = cfun->machine;
29119 148615 : cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
29120 148615 : - m->frame.stack_pointer_offset;
29121 :
29122 13821195 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29123 13672580 : if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
29124 : {
29125 681344 : if (bitmap_bit_p (components, regno))
29126 : {
29127 257206 : rtx reg = gen_rtx_REG (word_mode, regno);
29128 257206 : rtx mem;
29129 257206 : rtx_insn *insn;
29130 :
29131 257206 : mem = choose_baseaddr (cfa_offset, NULL);
29132 257206 : mem = gen_frame_mem (word_mode, mem);
29133 257206 : insn = emit_move_insn (reg, mem);
29134 :
29135 257206 : RTX_FRAME_RELATED_P (insn) = 1;
29136 257206 : add_reg_note (insn, REG_CFA_RESTORE, reg);
29137 : }
29138 737784 : cfa_offset -= UNITS_PER_WORD;
29139 : }
29140 148615 : }
29141 :
29142 : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29143 : void
29144 44725 : ix86_set_handled_components (sbitmap components)
29145 : {
29146 4159425 : for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
29147 4114700 : if (bitmap_bit_p (components, regno))
29148 : {
29149 105903 : cfun->machine->reg_is_wrapped_separately[regno] = true;
29150 105903 : cfun->machine->use_fast_prologue_epilogue = true;
29151 105903 : cfun->machine->frame.save_regs_using_mov = true;
29152 : }
29153 44725 : }
29154 :
29155 : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
29156 : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
29157 : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
29158 : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
29159 : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
29160 : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
29161 : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
29162 : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
29163 : ix86_emit_prologue_components
29164 : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
29165 : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
29166 : ix86_emit_epilogue_components
29167 : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
29168 : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
29169 :
29170 : struct gcc_target targetm = TARGET_INITIALIZER;
29171 :
29172 : #include "gt-i386.h"
|