LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.2 % 13032 11627
Test Date: 2026-05-30 15:37:04 Functions: 97.0 % 473 459
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Subroutines used for code generation on IA-32.
       2              :    Copyright (C) 1988-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #define INCLUDE_STRING
      21              : #define IN_TARGET_CODE 1
      22              : 
      23              : #include "config.h"
      24              : #include "system.h"
      25              : #include "coretypes.h"
      26              : #include "backend.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "memmodel.h"
      30              : #include "gimple.h"
      31              : #include "cfghooks.h"
      32              : #include "cfgloop.h"
      33              : #include "df.h"
      34              : #include "tm_p.h"
      35              : #include "stringpool.h"
      36              : #include "expmed.h"
      37              : #include "optabs.h"
      38              : #include "regs.h"
      39              : #include "emit-rtl.h"
      40              : #include "recog.h"
      41              : #include "cgraph.h"
      42              : #include "diagnostic.h"
      43              : #include "cfgbuild.h"
      44              : #include "alias.h"
      45              : #include "fold-const.h"
      46              : #include "attribs.h"
      47              : #include "calls.h"
      48              : #include "stor-layout.h"
      49              : #include "varasm.h"
      50              : #include "output.h"
      51              : #include "insn-attr.h"
      52              : #include "flags.h"
      53              : #include "except.h"
      54              : #include "explow.h"
      55              : #include "expr.h"
      56              : #include "cfgrtl.h"
      57              : #include "common/common-target.h"
      58              : #include "langhooks.h"
      59              : #include "reload.h"
      60              : #include "gimplify.h"
      61              : #include "dwarf2.h"
      62              : #include "tm-constrs.h"
      63              : #include "cselib.h"
      64              : #include "sched-int.h"
      65              : #include "opts.h"
      66              : #include "tree-pass.h"
      67              : #include "context.h"
      68              : #include "pass_manager.h"
      69              : #include "target-globals.h"
      70              : #include "gimple-iterator.h"
      71              : #include "gimple-fold.h"
      72              : #include "tree-vectorizer.h"
      73              : #include "shrink-wrap.h"
      74              : #include "builtins.h"
      75              : #include "rtl-iter.h"
      76              : #include "tree-iterator.h"
      77              : #include "dbgcnt.h"
      78              : #include "case-cfn-macros.h"
      79              : #include "dojump.h"
      80              : #include "fold-const-call.h"
      81              : #include "tree-vrp.h"
      82              : #include "tree-ssanames.h"
      83              : #include "selftest.h"
      84              : #include "selftest-rtl.h"
      85              : #include "print-rtl.h"
      86              : #include "intl.h"
      87              : #include "ifcvt.h"
      88              : #include "symbol-summary.h"
      89              : #include "sreal.h"
      90              : #include "ipa-cp.h"
      91              : #include "ipa-prop.h"
      92              : #include "ipa-fnsummary.h"
      93              : #include "wide-int-bitmask.h"
      94              : #include "tree-vector-builder.h"
      95              : #include "debug.h"
      96              : #include "dwarf2out.h"
      97              : #include "i386-options.h"
      98              : #include "i386-builtins.h"
      99              : #include "i386-expand.h"
     100              : #include "i386-features.h"
     101              : #include "function-abi.h"
     102              : #include "rtl-error.h"
     103              : #include "gimple-pretty-print.h"
     104              : 
     105              : /* This file should be included last.  */
     106              : #include "target-def.h"
     107              : 
     108              : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
     109              : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
     110              : 
     111              : 
     112              : #ifndef CHECK_STACK_LIMIT
     113              : #define CHECK_STACK_LIMIT (-1)
     114              : #endif
     115              : 
     116              : /* Return index of given mode in mult and division cost tables.  */
     117              : #define MODE_INDEX(mode)                                        \
     118              :   ((mode) == QImode ? 0                                         \
     119              :    : (mode) == HImode ? 1                                       \
     120              :    : (mode) == SImode ? 2                                       \
     121              :    : (mode) == DImode ? 3                                       \
     122              :    : 4)
     123              : 
     124              : 
     125              : /* Set by -mtune.  */
     126              : const struct processor_costs *ix86_tune_cost = NULL;
     127              : 
     128              : /* Set by -mtune or -Os.  */
     129              : const struct processor_costs *ix86_cost = NULL;
     130              : 
     131              : /* In case the average insn count for single function invocation is
     132              :    lower than this constant, emit fast (but longer) prologue and
     133              :    epilogue code.  */
     134              : #define FAST_PROLOGUE_INSN_COUNT 20
     135              : 
     136              : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
     137              : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
     138              : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
     139              : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
     140              : 
     141              : /* Array of the smallest class containing reg number REGNO, indexed by
     142              :    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
     143              : 
     144              : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
     145              : {
     146              :   /* ax, dx, cx, bx */
     147              :   AREG, DREG, CREG, BREG,
     148              :   /* si, di, bp, sp */
     149              :   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
     150              :   /* FP registers */
     151              :   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
     152              :   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
     153              :   /* arg pointer, flags, fpsr, frame */
     154              :   NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
     155              :   /* SSE registers */
     156              :   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
     157              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     158              :   /* MMX registers */
     159              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     160              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     161              :   /* REX registers */
     162              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     163              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     164              :   /* SSE REX registers */
     165              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     166              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     167              :   /* AVX-512 SSE registers */
     168              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     169              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     170              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     171              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     172              :   /* Mask registers.  */
     173              :   ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     174              :   MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     175              :   /* REX2 registers */
     176              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     177              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     178              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     179              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     180              : };
     181              : 
     182              : /* The "default" register map used in 32bit mode.  */
     183              : 
     184              : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
     185              : {
     186              :   /* general regs */
     187              :   0, 2, 1, 3, 6, 7, 4, 5,
     188              :   /* fp regs */
     189              :   12, 13, 14, 15, 16, 17, 18, 19,
     190              :   /* arg, flags, fpsr, frame */
     191              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     192              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     193              :   /* SSE */
     194              :   21, 22, 23, 24, 25, 26, 27, 28,
     195              :   /* MMX */
     196              :   29, 30, 31, 32, 33, 34, 35, 36,
     197              :   /* extended integer registers */
     198              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     199              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     200              :   /* extended sse registers */
     201              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     202              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     203              :   /* AVX-512 registers 16-23 */
     204              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     205              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     206              :   /* AVX-512 registers 24-31 */
     207              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     208              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     209              :   /* Mask registers */
     210              :   93, 94, 95, 96, 97, 98, 99, 100
     211              : };
     212              : 
     213              : /* The "default" register map used in 64bit mode.  */
     214              : 
     215              : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
     216              : {
     217              :   /* general regs */
     218              :   0, 1, 2, 3, 4, 5, 6, 7,
     219              :   /* fp regs */
     220              :   33, 34, 35, 36, 37, 38, 39, 40,
     221              :   /* arg, flags, fpsr, frame */
     222              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     223              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     224              :   /* SSE */
     225              :   17, 18, 19, 20, 21, 22, 23, 24,
     226              :   /* MMX */
     227              :   41, 42, 43, 44, 45, 46, 47, 48,
     228              :   /* extended integer registers */
     229              :   8, 9, 10, 11, 12, 13, 14, 15,
     230              :   /* extended SSE registers */
     231              :   25, 26, 27, 28, 29, 30, 31, 32,
     232              :   /* AVX-512 registers 16-23 */
     233              :   67, 68, 69, 70, 71, 72, 73, 74,
     234              :   /* AVX-512 registers 24-31 */
     235              :   75, 76, 77, 78, 79, 80, 81, 82,
     236              :   /* Mask registers */
     237              :   118, 119, 120, 121, 122, 123, 124, 125,
     238              :   /* rex2 extend interger registers */
     239              :   130, 131, 132, 133, 134, 135, 136, 137,
     240              :   138, 139, 140, 141, 142, 143, 144, 145
     241              : };
     242              : 
     243              : /* Define the register numbers to be used in Dwarf debugging information.
     244              :    The SVR4 reference port C compiler uses the following register numbers
     245              :    in its Dwarf output code:
     246              :         0 for %eax (gcc regno = 0)
     247              :         1 for %ecx (gcc regno = 2)
     248              :         2 for %edx (gcc regno = 1)
     249              :         3 for %ebx (gcc regno = 3)
     250              :         4 for %esp (gcc regno = 7)
     251              :         5 for %ebp (gcc regno = 6)
     252              :         6 for %esi (gcc regno = 4)
     253              :         7 for %edi (gcc regno = 5)
     254              :    The following three DWARF register numbers are never generated by
     255              :    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
     256              :    believed these numbers have these meanings.
     257              :         8  for %eip    (no gcc equivalent)
     258              :         9  for %eflags (gcc regno = 17)
     259              :         10 for %trapno (no gcc equivalent)
     260              :    It is not at all clear how we should number the FP stack registers
     261              :    for the x86 architecture.  If the version of SDB on x86/svr4 were
     262              :    a bit less brain dead with respect to floating-point then we would
     263              :    have a precedent to follow with respect to DWARF register numbers
     264              :    for x86 FP registers, but the SDB on x86/svr4 was so completely
     265              :    broken with respect to FP registers that it is hardly worth thinking
     266              :    of it as something to strive for compatibility with.
     267              :    The version of x86/svr4 SDB I had does (partially)
     268              :    seem to believe that DWARF register number 11 is associated with
     269              :    the x86 register %st(0), but that's about all.  Higher DWARF
     270              :    register numbers don't seem to be associated with anything in
     271              :    particular, and even for DWARF regno 11, SDB only seemed to under-
     272              :    stand that it should say that a variable lives in %st(0) (when
     273              :    asked via an `=' command) if we said it was in DWARF regno 11,
     274              :    but SDB still printed garbage when asked for the value of the
     275              :    variable in question (via a `/' command).
     276              :    (Also note that the labels SDB printed for various FP stack regs
     277              :    when doing an `x' command were all wrong.)
     278              :    Note that these problems generally don't affect the native SVR4
     279              :    C compiler because it doesn't allow the use of -O with -g and
     280              :    because when it is *not* optimizing, it allocates a memory
     281              :    location for each floating-point variable, and the memory
     282              :    location is what gets described in the DWARF AT_location
     283              :    attribute for the variable in question.
     284              :    Regardless of the severe mental illness of the x86/svr4 SDB, we
     285              :    do something sensible here and we use the following DWARF
     286              :    register numbers.  Note that these are all stack-top-relative
     287              :    numbers.
     288              :         11 for %st(0) (gcc regno = 8)
     289              :         12 for %st(1) (gcc regno = 9)
     290              :         13 for %st(2) (gcc regno = 10)
     291              :         14 for %st(3) (gcc regno = 11)
     292              :         15 for %st(4) (gcc regno = 12)
     293              :         16 for %st(5) (gcc regno = 13)
     294              :         17 for %st(6) (gcc regno = 14)
     295              :         18 for %st(7) (gcc regno = 15)
     296              : */
     297              : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
     298              : {
     299              :   /* general regs */
     300              :   0, 2, 1, 3, 6, 7, 5, 4,
     301              :   /* fp regs */
     302              :   11, 12, 13, 14, 15, 16, 17, 18,
     303              :   /* arg, flags, fpsr, frame */
     304              :   IGNORED_DWARF_REGNUM, 9,
     305              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     306              :   /* SSE registers */
     307              :   21, 22, 23, 24, 25, 26, 27, 28,
     308              :   /* MMX registers */
     309              :   29, 30, 31, 32, 33, 34, 35, 36,
     310              :   /* extended integer registers */
     311              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     312              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     313              :   /* extended sse registers */
     314              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     315              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     316              :   /* AVX-512 registers 16-23 */
     317              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     318              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     319              :   /* AVX-512 registers 24-31 */
     320              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     321              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     322              :   /* Mask registers */
     323              :   93, 94, 95, 96, 97, 98, 99, 100
     324              : };
     325              : 
     326              : /* Define parameter passing and return registers.  */
     327              : 
     328              : static int const x86_64_int_parameter_registers[6] =
     329              : {
     330              :   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
     331              : };
     332              : 
     333              : static int const x86_64_ms_abi_int_parameter_registers[4] =
     334              : {
     335              :   CX_REG, DX_REG, R8_REG, R9_REG
     336              : };
     337              : 
     338              : /* Similar as Clang's preserve_none function parameter passing.
     339              :    NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p.  */
     340              : 
     341              : static int const x86_64_preserve_none_int_parameter_registers[6] =
     342              : {
     343              :   R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
     344              : };
     345              : 
     346              : static int const x86_64_int_return_registers[2] =
     347              : {
     348              :   AX_REG, DX_REG
     349              : };
     350              : 
     351              : /* Define the structure for the machine field in struct function.  */
     352              : 
     353              : struct GTY(()) stack_local_entry {
     354              :   unsigned short mode;
     355              :   unsigned short n;
     356              :   rtx rtl;
     357              :   struct stack_local_entry *next;
     358              : };
     359              : 
     360              : /* Which cpu are we scheduling for.  */
     361              : enum attr_cpu ix86_schedule;
     362              : 
     363              : /* Which cpu are we optimizing for.  */
     364              : enum processor_type ix86_tune;
     365              : 
     366              : /* Which instruction set architecture to use.  */
     367              : enum processor_type ix86_arch;
     368              : 
     369              : /* True if processor has SSE prefetch instruction.  */
     370              : unsigned char ix86_prefetch_sse;
     371              : 
     372              : /* Preferred alignment for stack boundary in bits.  */
     373              : unsigned int ix86_preferred_stack_boundary;
     374              : 
     375              : /* Alignment for incoming stack boundary in bits specified at
     376              :    command line.  */
     377              : unsigned int ix86_user_incoming_stack_boundary;
     378              : 
     379              : /* Default alignment for incoming stack boundary in bits.  */
     380              : unsigned int ix86_default_incoming_stack_boundary;
     381              : 
     382              : /* Alignment for incoming stack boundary in bits.  */
     383              : unsigned int ix86_incoming_stack_boundary;
     384              : 
     385              : /* True if there is no direct access to extern symbols.  */
     386              : bool ix86_has_no_direct_extern_access;
     387              : 
     388              : /* Calling abi specific va_list type nodes.  */
     389              : tree sysv_va_list_type_node;
     390              : tree ms_va_list_type_node;
     391              : 
     392              : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
     393              : char internal_label_prefix[16];
     394              : int internal_label_prefix_len;
     395              : 
     396              : /* Fence to use after loop using movnt.  */
     397              : tree x86_mfence;
     398              : 
     399              : /* Register class used for passing given 64bit part of the argument.
     400              :    These represent classes as documented by the PS ABI, with the exception
     401              :    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     402              :    use SF or DFmode move instead of DImode to avoid reformatting penalties.
     403              : 
     404              :    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
     405              :    whenever possible (upper half does contain padding).  */
     406              : enum x86_64_reg_class
     407              :   {
     408              :     X86_64_NO_CLASS,
     409              :     X86_64_INTEGER_CLASS,
     410              :     X86_64_INTEGERSI_CLASS,
     411              :     X86_64_SSE_CLASS,
     412              :     X86_64_SSEHF_CLASS,
     413              :     X86_64_SSESF_CLASS,
     414              :     X86_64_SSEDF_CLASS,
     415              :     X86_64_SSEUP_CLASS,
     416              :     X86_64_X87_CLASS,
     417              :     X86_64_X87UP_CLASS,
     418              :     X86_64_COMPLEX_X87_CLASS,
     419              :     X86_64_MEMORY_CLASS
     420              :   };
     421              : 
     422              : #define MAX_CLASSES 8
     423              : 
     424              : /* Table of constants used by fldpi, fldln2, etc....  */
     425              : static REAL_VALUE_TYPE ext_80387_constants_table [5];
     426              : static bool ext_80387_constants_init;
     427              : 
     428              : 
     429              : static rtx ix86_function_value (const_tree, const_tree, bool);
     430              : static bool ix86_function_value_regno_p (const unsigned int);
     431              : static unsigned int ix86_function_arg_boundary (machine_mode,
     432              :                                                 const_tree);
     433              : static bool ix86_overaligned_stack_slot_required (void);
     434              : static rtx ix86_static_chain (const_tree, bool);
     435              : static int ix86_function_regparm (const_tree, const_tree);
     436              : static void ix86_compute_frame_layout (void);
     437              : static tree ix86_canonical_va_list_type (tree);
     438              : static unsigned int split_stack_prologue_scratch_regno (void);
     439              : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
     440              : 
     441              : static bool ix86_can_inline_p (tree, tree);
     442              : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
     443              : 
     444              : typedef enum ix86_flags_cc
     445              : {
     446              :   X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
     447              :   X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
     448              :   X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
     449              :   X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
     450              : } ix86_cc;
     451              : 
     452              : static const char *ix86_ccmp_dfv_mapping[] =
     453              : {
     454              :   "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     455              :   "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
     456              :   "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     457              :   "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
     458              : };
     459              : 
     460              : 
     461              : /* Whether -mtune= or -march= were specified */
     462              : int ix86_tune_defaulted;
     463              : int ix86_arch_specified;
     464              : 
     465              : /* Return true if a red-zone is in use.  We can't use red-zone when
     466              :    there are local indirect jumps, like "indirect_jump" or "tablejump",
     467              :    which jumps to another place in the function, since "call" in the
     468              :    indirect thunk pushes the return address onto stack, destroying
     469              :    red-zone.
     470              : 
     471              :    NB: Don't use red-zone for functions with no_caller_saved_registers
     472              :    and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
     473              :    for 31 GPRs or 15 GPRs + 16 XMM registers.
     474              : 
     475              :    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
     476              :    for CALL, in red-zone, we can allow local indirect jumps with
     477              :    indirect thunk.  */
     478              : 
     479              : bool
     480      9898619 : ix86_using_red_zone (void)
     481              : {
     482      9898619 :   return (TARGET_RED_ZONE
     483      8956449 :           && !TARGET_64BIT_MS_ABI
     484      8653740 :           && ((!TARGET_APX_EGPR && !TARGET_SSE)
     485      8630735 :               || (cfun->machine->call_saved_registers
     486      8630735 :                   != TYPE_NO_CALLER_SAVED_REGISTERS))
     487     18552298 :           && (!cfun->machine->has_local_indirect_jump
     488        47510 :               || cfun->machine->indirect_branch_type == indirect_branch_keep));
     489              : }
     490              : 
     491              : /* Return true, if profiling code should be emitted before
     492              :    prologue. Otherwise it returns false.
     493              :    Note: For x86 with "hotfix" it is sorried.  */
     494              : static bool
     495      4496363 : ix86_profile_before_prologue (void)
     496              : {
     497      4496363 :   return flag_fentry != 0;
     498              : }
     499              : 
     500              : /* Update register usage after having seen the compiler flags.  */
     501              : 
     502              : static void
     503       829760 : ix86_conditional_register_usage (void)
     504              : {
     505       829760 :   int i, c_mask;
     506              : 
     507              :   /* If there are no caller-saved registers, preserve all registers.
     508              :      except fixed_regs and registers used for function return value
     509              :      since aggregate_value_p checks call_used_regs[regno] on return
     510              :      value.  */
     511       829760 :   if (cfun
     512        67800 :       && (cfun->machine->call_saved_registers
     513        67800 :           == TYPE_NO_CALLER_SAVED_REGISTERS))
     514       462489 :     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     515       457516 :       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
     516       422283 :         call_used_regs[i] = 0;
     517              : 
     518              :   /* For 32-bit targets, disable the REX registers.  */
     519       829760 :   if (! TARGET_64BIT)
     520              :     {
     521       134550 :       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
     522       119600 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     523       134550 :       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
     524       119600 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     525       254150 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     526       239200 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     527              :     }
     528              : 
     529              :   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
     530       829760 :   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
     531              : 
     532       829760 :   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
     533              : 
     534     77167680 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     535              :     {
     536              :       /* Set/reset conditionally defined registers from
     537              :          CALL_USED_REGISTERS initializer.  */
     538     76337920 :       if (call_used_regs[i] > 1)
     539     13196537 :         call_used_regs[i] = !!(call_used_regs[i] & c_mask);
     540              : 
     541              :       /* Calculate registers of CLOBBERED_REGS register set
     542              :          as call used registers from GENERAL_REGS register set.  */
     543     76337920 :       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
     544     76337920 :           && call_used_regs[i])
     545     23107545 :         SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
     546              :     }
     547              : 
     548              :   /* If MMX is disabled, disable the registers.  */
     549       829760 :   if (! TARGET_MMX)
     550       400526 :     accessible_reg_set &= ~reg_class_contents[MMX_REGS];
     551              : 
     552              :   /* If SSE is disabled, disable the registers.  */
     553       829760 :   if (! TARGET_SSE)
     554       394552 :     accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
     555              : 
     556              :   /* If the FPU is disabled, disable the registers.  */
     557       829760 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
     558       395772 :     accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
     559              : 
     560              :   /* If AVX512F is disabled, disable the registers.  */
     561       829760 :   if (! TARGET_AVX512F)
     562              :     {
     563     10014020 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     564      9424960 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     565              : 
     566      1178120 :       accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
     567              :     }
     568              : 
     569              :   /* If APX is disabled, disable the registers.  */
     570       829760 :   if (! (TARGET_APX_EGPR && TARGET_64BIT))
     571              :     {
     572     14094530 :       for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
     573     13265440 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     574              :     }
     575       829760 : }
     576              : 
     577              : /* Canonicalize a comparison from one we don't have to one we do have.  */
     578              : 
     579              : static void
     580     24135710 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
     581              :                               bool op0_preserve_value)
     582              : {
     583              :   /* The order of operands in x87 ficom compare is forced by combine in
     584              :      simplify_comparison () function. Float operator is treated as RTX_OBJ
     585              :      with a precedence over other operators and is always put in the first
     586              :      place. Swap condition and operands to match ficom instruction.  */
     587     24135710 :   if (!op0_preserve_value
     588     23331142 :       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
     589              :     {
     590           14 :       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
     591              : 
     592              :       /* We are called only for compares that are split to SAHF instruction.
     593              :          Ensure that we have setcc/jcc insn for the swapped condition.  */
     594           14 :       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
     595              :         {
     596            6 :           std::swap (*op0, *op1);
     597            6 :           *code = (int) scode;
     598            6 :           return;
     599              :         }
     600              :     }
     601              : 
     602              :   /* SUB (a, b) underflows precisely when a < b.  Convert
     603              :      (compare (minus (a b)) a) to (compare (a b))
     604              :      to match *sub<mode>_3 pattern.  */
     605     23331136 :   if (!op0_preserve_value
     606     23331136 :       && (*code == GTU || *code == LEU)
     607      1809527 :       && GET_CODE (*op0) == MINUS
     608        80494 :       && rtx_equal_p (XEXP (*op0, 0), *op1))
     609              :     {
     610          488 :       *op1 = XEXP (*op0, 1);
     611          488 :       *op0 = XEXP (*op0, 0);
     612          488 :       *code = (int) swap_condition ((enum rtx_code) *code);
     613          488 :       return;
     614              :     }
     615              : 
     616              :   /* Swap operands of GTU comparison to canonicalize
     617              :      addcarry/subborrow comparison.  */
     618     24135216 :   if (!op0_preserve_value
     619     23330648 :       && *code == GTU
     620       825511 :       && GET_CODE (*op0) == PLUS
     621       321189 :       && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
     622        44232 :       && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
     623     24175346 :       && GET_CODE (*op1) == ZERO_EXTEND)
     624              :     {
     625        36826 :       std::swap (*op0, *op1);
     626        36826 :       *code = (int) swap_condition ((enum rtx_code) *code);
     627        36826 :       return;
     628              :     }
     629              : }
     630              : 
     631              : /* Hook to determine if one function can safely inline another.  */
     632              : 
     633              : static bool
     634      9873628 : ix86_can_inline_p (tree caller, tree callee)
     635              : {
     636      9873628 :   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
     637      9873628 :   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
     638              : 
     639              :   /* Changes of those flags can be tolerated for always inlines. Lets hope
     640              :      user knows what he is doing.  */
     641      9873628 :   unsigned HOST_WIDE_INT always_inline_safe_mask
     642              :          = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
     643              :             | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
     644              :             | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
     645              :             | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
     646              :             | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
     647              :             | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
     648              :             | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
     649              : 
     650              : 
     651      9873628 :   if (!callee_tree)
     652      9276553 :     callee_tree = target_option_default_node;
     653      9873628 :   if (!caller_tree)
     654      9276618 :     caller_tree = target_option_default_node;
     655      9873628 :   if (callee_tree == caller_tree)
     656              :     return true;
     657              : 
     658         5292 :   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
     659         5292 :   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
     660         5292 :   bool ret = false;
     661         5292 :   bool always_inline
     662         5292 :     = (DECL_DISREGARD_INLINE_LIMITS (callee)
     663         9939 :        && lookup_attribute ("always_inline",
     664         4647 :                             DECL_ATTRIBUTES (callee)));
     665              : 
     666              :   /* If callee only uses GPRs, ignore MASK_80387.  */
     667         5292 :   if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
     668         1030 :     always_inline_safe_mask |= MASK_80387;
     669              : 
     670         5292 :   cgraph_node *callee_node = cgraph_node::get (callee);
     671              :   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
     672              :      function can inline a SSE2 function but a SSE2 function can't inline
     673              :      a SSE4 function.  */
     674         5292 :   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
     675              :        != callee_opts->x_ix86_isa_flags)
     676         5056 :       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
     677              :           != callee_opts->x_ix86_isa_flags2))
     678              :     ret = false;
     679              : 
     680              :   /* See if we have the same non-isa options.  */
     681         5019 :   else if ((!always_inline
     682          388 :             && caller_opts->x_target_flags != callee_opts->x_target_flags)
     683         4975 :            || (caller_opts->x_target_flags & ~always_inline_safe_mask)
     684         4975 :                != (callee_opts->x_target_flags & ~always_inline_safe_mask))
     685              :     ret = false;
     686              : 
     687         4975 :   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
     688              :            /* If the calle doesn't use FP expressions differences in
     689              :               ix86_fpmath can be ignored.  We are called from FEs
     690              :               for multi-versioning call optimization, so beware of
     691              :               ipa_fn_summaries not available.  */
     692         1247 :            && (! ipa_fn_summaries
     693         1247 :                || ipa_fn_summaries->get (callee_node) == NULL
     694         1247 :                || ipa_fn_summaries->get (callee_node)->fp_expressions))
     695              :     ret = false;
     696              : 
     697              :   /* At this point we cannot identify whether arch or tune setting
     698              :      comes from target attribute or not. So the most conservative way
     699              :      is to allow the callee that uses default arch and tune string to
     700              :      be inlined.  */
     701         4701 :   else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
     702         1430 :            && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
     703              :     ret = true;
     704              : 
     705              :   /* See if arch, tune, etc. are the same. As previous ISA flags already
     706              :      checks if callee's ISA is subset of caller's, do not block
     707              :      always_inline attribute for callee even it has different arch. */
     708         3279 :   else if (!always_inline && caller_opts->arch != callee_opts->arch)
     709              :     ret = false;
     710              : 
     711           15 :   else if (!always_inline && caller_opts->tune != callee_opts->tune)
     712              :     ret = false;
     713              : 
     714         3279 :   else if (!always_inline
     715           15 :            && caller_opts->branch_cost != callee_opts->branch_cost)
     716              :     ret = false;
     717              : 
     718              :   else
     719      9873037 :     ret = true;
     720              : 
     721              :   return ret;
     722              : }
     723              : 
     724              : /* Return true if this goes in large data/bss.  */
     725              : 
     726              : static bool
     727     80801768 : ix86_in_large_data_p (tree exp)
     728              : {
     729     80801768 :   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
     730     80801530 :       && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
     731              :     return false;
     732              : 
     733         1147 :   if (exp == NULL_TREE)
     734              :     return false;
     735              : 
     736              :   /* Functions are never large data.  */
     737         1147 :   if (TREE_CODE (exp) == FUNCTION_DECL)
     738              :     return false;
     739              : 
     740              :   /* Automatic variables are never large data.  */
     741          279 :   if (VAR_P (exp) && !is_global_var (exp))
     742              :     return false;
     743              : 
     744          279 :   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
     745              :     {
     746           51 :       const char *section = DECL_SECTION_NAME (exp);
     747           51 :       if (strcmp (section, ".ldata") == 0
     748           51 :           || startswith (section, ".ldata.")
     749           51 :           || strcmp (section, ".lbss") == 0
     750           51 :           || startswith (section, ".lbss.")
     751           99 :           || startswith (section, ".gnu.linkonce.lb."))
     752              :         return true;
     753              :       return false;
     754              :     }
     755              :   else
     756              :     {
     757          228 :       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
     758              : 
     759              :       /* If this is an incomplete type with size 0, then we can't put it
     760              :          in data because it might be too big when completed.  Also,
     761              :          int_size_in_bytes returns -1 if size can vary or is larger than
     762              :          an integer in which case also it is safer to assume that it goes in
     763              :          large data.  */
     764          228 :       if (size <= 0 || size > ix86_section_threshold)
     765              :         return true;
     766              :     }
     767              : 
     768              :   return false;
     769              : }
     770              : 
     771              : /* i386-specific section flag to mark large sections.  */
     772              : #define SECTION_LARGE SECTION_MACH_DEP
     773              : 
     774              : /* Switch to the appropriate section for output of DECL.
     775              :    DECL is either a `VAR_DECL' node or a constant of some sort.
     776              :    RELOC indicates whether forming the initial value of DECL requires
     777              :    link-time relocations.  */
     778              : 
     779              : ATTRIBUTE_UNUSED static section *
     780      1663816 : x86_64_elf_select_section (tree decl, int reloc,
     781              :                            unsigned HOST_WIDE_INT align)
     782              : {
     783      1663816 :   if (ix86_in_large_data_p (decl))
     784              :     {
     785            6 :       const char *sname = NULL;
     786            6 :       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
     787            6 :       switch (categorize_decl_for_section (decl, reloc))
     788              :         {
     789            1 :         case SECCAT_DATA:
     790            1 :           sname = ".ldata";
     791            1 :           break;
     792            0 :         case SECCAT_DATA_REL:
     793            0 :           sname = ".ldata.rel";
     794            0 :           break;
     795            0 :         case SECCAT_DATA_REL_LOCAL:
     796            0 :           sname = ".ldata.rel.local";
     797            0 :           break;
     798            0 :         case SECCAT_DATA_REL_RO:
     799            0 :           sname = ".ldata.rel.ro";
     800            0 :           break;
     801            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     802            0 :           sname = ".ldata.rel.ro.local";
     803            0 :           break;
     804            0 :         case SECCAT_BSS:
     805            0 :           sname = ".lbss";
     806            0 :           flags |= SECTION_BSS;
     807            0 :           break;
     808              :         case SECCAT_RODATA:
     809              :         case SECCAT_RODATA_MERGE_STR:
     810              :         case SECCAT_RODATA_MERGE_STR_INIT:
     811              :         case SECCAT_RODATA_MERGE_CONST:
     812              :           sname = ".lrodata";
     813              :           flags &= ~SECTION_WRITE;
     814              :           break;
     815            0 :         case SECCAT_SRODATA:
     816            0 :         case SECCAT_SDATA:
     817            0 :         case SECCAT_SBSS:
     818            0 :           gcc_unreachable ();
     819              :         case SECCAT_TEXT:
     820              :         case SECCAT_TDATA:
     821              :         case SECCAT_TBSS:
     822              :           /* We don't split these for medium model.  Place them into
     823              :              default sections and hope for best.  */
     824              :           break;
     825              :         }
     826            1 :       if (sname)
     827              :         {
     828              :           /* We might get called with string constants, but get_named_section
     829              :              doesn't like them as they are not DECLs.  Also, we need to set
     830              :              flags in that case.  */
     831            6 :           if (!DECL_P (decl))
     832            3 :             return get_section (sname, flags, NULL);
     833            3 :           return get_named_section (decl, sname, reloc);
     834              :         }
     835              :     }
     836      1663810 :   return default_elf_select_section (decl, reloc, align);
     837              : }
     838              : 
     839              : /* Select a set of attributes for section NAME based on the properties
     840              :    of DECL and whether or not RELOC indicates that DECL's initializer
     841              :    might contain runtime relocations.  */
     842              : 
     843              : static unsigned int ATTRIBUTE_UNUSED
     844     67413942 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
     845              : {
     846     67413942 :   unsigned int flags = default_section_type_flags (decl, name, reloc);
     847              : 
     848     67413942 :   if (ix86_in_large_data_p (decl))
     849           10 :     flags |= SECTION_LARGE;
     850              : 
     851     67413942 :   if (decl == NULL_TREE
     852          375 :       && (strcmp (name, ".ldata.rel.ro") == 0
     853          375 :           || strcmp (name, ".ldata.rel.ro.local") == 0))
     854            0 :     flags |= SECTION_RELRO;
     855              : 
     856     67413942 :   if (strcmp (name, ".lbss") == 0
     857     67413938 :       || startswith (name, ".lbss.")
     858    134827877 :       || startswith (name, ".gnu.linkonce.lb."))
     859              :     {
     860            7 :       flags |= SECTION_BSS;
     861              :       /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
     862              :          default_elf_asm_named_section.  */
     863            7 :       flags &= ~SECTION_NOTYPE;
     864              :     }
     865              : 
     866     67413942 :   return flags;
     867              : }
     868              : 
     869              : /* Build up a unique section name, expressed as a
     870              :    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
     871              :    RELOC indicates whether the initial value of EXP requires
     872              :    link-time relocations.  */
     873              : 
     874              : static void ATTRIBUTE_UNUSED
     875      1804869 : x86_64_elf_unique_section (tree decl, int reloc)
     876              : {
     877      1804869 :   if (ix86_in_large_data_p (decl))
     878              :     {
     879            3 :       const char *prefix = NULL;
     880              :       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
     881            3 :       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
     882              : 
     883            3 :       switch (categorize_decl_for_section (decl, reloc))
     884              :         {
     885            0 :         case SECCAT_DATA:
     886            0 :         case SECCAT_DATA_REL:
     887            0 :         case SECCAT_DATA_REL_LOCAL:
     888            0 :         case SECCAT_DATA_REL_RO:
     889            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     890            0 :           prefix = one_only ? ".ld" : ".ldata";
     891              :           break;
     892            3 :         case SECCAT_BSS:
     893            3 :           prefix = one_only ? ".lb" : ".lbss";
     894              :           break;
     895              :         case SECCAT_RODATA:
     896              :         case SECCAT_RODATA_MERGE_STR:
     897              :         case SECCAT_RODATA_MERGE_STR_INIT:
     898              :         case SECCAT_RODATA_MERGE_CONST:
     899              :           prefix = one_only ? ".lr" : ".lrodata";
     900              :           break;
     901            0 :         case SECCAT_SRODATA:
     902            0 :         case SECCAT_SDATA:
     903            0 :         case SECCAT_SBSS:
     904            0 :           gcc_unreachable ();
     905              :         case SECCAT_TEXT:
     906              :         case SECCAT_TDATA:
     907              :         case SECCAT_TBSS:
     908              :           /* We don't split these for medium model.  Place them into
     909              :              default sections and hope for best.  */
     910              :           break;
     911              :         }
     912            3 :       if (prefix)
     913              :         {
     914            3 :           const char *name, *linkonce;
     915            3 :           char *string;
     916              : 
     917            3 :           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
     918            3 :           name = targetm.strip_name_encoding (name);
     919              : 
     920              :           /* If we're using one_only, then there needs to be a .gnu.linkonce
     921              :              prefix to the section name.  */
     922            3 :           linkonce = one_only ? ".gnu.linkonce" : "";
     923              : 
     924            3 :           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
     925              : 
     926            3 :           set_decl_section_name (decl, string);
     927            3 :           return;
     928              :         }
     929              :     }
     930      1804866 :   default_unique_section (decl, reloc);
     931              : }
     932              : 
     933              : /* Return true if TYPE has no_callee_saved_registers or preserve_none
     934              :    attribute.  */
     935              : 
     936              : bool
     937      7541389 : ix86_type_no_callee_saved_registers_p (const_tree type)
     938              : {
     939     15082778 :   return (lookup_attribute ("no_callee_saved_registers",
     940      7541389 :                             TYPE_ATTRIBUTES (type)) != NULL
     941     15082645 :           || lookup_attribute ("preserve_none",
     942      7541256 :                                TYPE_ATTRIBUTES (type)) != NULL);
     943              : }
     944              : 
     945              : #ifdef COMMON_ASM_OP
     946              : 
     947              : #ifndef LARGECOMM_SECTION_ASM_OP
     948              : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
     949              : #endif
     950              : 
     951              : /* This says how to output assembler code to declare an
     952              :    uninitialized external linkage data object.
     953              : 
     954              :    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
     955              :    large objects.  */
     956              : void
     957       170516 : x86_elf_aligned_decl_common (FILE *file, tree decl,
     958              :                         const char *name, unsigned HOST_WIDE_INT size,
     959              :                         unsigned align)
     960              : {
     961       170516 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     962       170510 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     963            7 :       && size > (unsigned int)ix86_section_threshold)
     964              :     {
     965            1 :       switch_to_section (get_named_section (decl, ".lbss", 0));
     966            1 :       fputs (LARGECOMM_SECTION_ASM_OP, file);
     967              :     }
     968              :   else
     969       170515 :     fputs (COMMON_ASM_OP, file);
     970       170516 :   assemble_name (file, name);
     971       170516 :   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
     972              :            size, align / BITS_PER_UNIT);
     973       170516 : }
     974              : #endif
     975              : 
     976              : /* Utility function for targets to use in implementing
     977              :    ASM_OUTPUT_ALIGNED_BSS.  */
     978              : 
     979              : void
     980       768255 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
     981              :                         unsigned HOST_WIDE_INT size, unsigned align)
     982              : {
     983       768255 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     984       768245 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     985           42 :       && size > (unsigned int)ix86_section_threshold)
     986            3 :     switch_to_section (get_named_section (decl, ".lbss", 0));
     987              :   else
     988       768252 :     switch_to_section (bss_section);
     989       922667 :   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
     990              : #ifdef ASM_DECLARE_OBJECT_NAME
     991       768255 :   last_assemble_variable_decl = decl;
     992       768255 :   ASM_DECLARE_OBJECT_NAME (file, name, decl);
     993              : #else
     994              :   /* Standard thing is just output label for the object.  */
     995              :   ASM_OUTPUT_LABEL (file, name);
     996              : #endif /* ASM_DECLARE_OBJECT_NAME */
     997       768255 :   ASM_OUTPUT_SKIP (file, size ? size : 1);
     998       768255 : }
     999              : 
    1000              : /* Decide whether we must probe the stack before any space allocation
    1001              :    on this target.  It's essentially TARGET_STACK_PROBE except when
    1002              :    -fstack-check causes the stack to be already probed differently.  */
    1003              : 
    1004              : bool
    1005       870491 : ix86_target_stack_probe (void)
    1006              : {
    1007              :   /* Do not probe the stack twice if static stack checking is enabled.  */
    1008       870491 :   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    1009              :     return false;
    1010              : 
    1011       870491 :   return TARGET_STACK_PROBE;
    1012              : }
    1013              : 
    1014              : /* Decide whether we can make a sibling call to a function.  DECL is the
    1015              :    declaration of the function being targeted by the call and EXP is the
    1016              :    CALL_EXPR representing the call.  */
    1017              : 
    1018              : static bool
    1019       138021 : ix86_function_ok_for_sibcall (tree decl, tree exp)
    1020              : {
    1021       138021 :   tree type, decl_or_type;
    1022       138021 :   rtx a, b;
    1023       138021 :   bool bind_global = decl && !targetm.binds_local_p (decl);
    1024              : 
    1025       138021 :   if (ix86_function_naked (current_function_decl))
    1026              :     return false;
    1027              : 
    1028              :   /* Sibling call isn't OK if there are no caller-saved registers
    1029              :      since all registers must be preserved before return.  */
    1030       138019 :   if (cfun->machine->call_saved_registers
    1031       138019 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
    1032              :     return false;
    1033              : 
    1034              :   /* If we are generating position-independent code, we cannot sibcall
    1035              :      optimize direct calls to global functions, as the PLT requires
    1036              :      %ebx be live. (Darwin does not have a PLT.)  */
    1037       137990 :   if (!TARGET_MACHO
    1038       137990 :       && !TARGET_64BIT
    1039        11328 :       && flag_pic
    1040         8402 :       && flag_plt
    1041         8402 :       && bind_global)
    1042              :     return false;
    1043              : 
    1044              :   /* If we need to align the outgoing stack, then sibcalling would
    1045              :      unalign the stack, which may break the called function.  */
    1046       133347 :   if (ix86_minimum_incoming_stack_boundary (true)
    1047       133347 :       < PREFERRED_STACK_BOUNDARY)
    1048              :     return false;
    1049              : 
    1050       132766 :   if (decl)
    1051              :     {
    1052       121915 :       decl_or_type = decl;
    1053       121915 :       type = TREE_TYPE (decl);
    1054              :     }
    1055              :   else
    1056              :     {
    1057              :       /* We're looking at the CALL_EXPR, we need the type of the function.  */
    1058        10851 :       type = CALL_EXPR_FN (exp);                /* pointer expression */
    1059        10851 :       type = TREE_TYPE (type);                  /* pointer type */
    1060        10851 :       type = TREE_TYPE (type);                  /* function type */
    1061        10851 :       decl_or_type = type;
    1062              :     }
    1063              : 
    1064              :   /* Sibling call isn't OK if callee has no callee-saved registers
    1065              :      and the calling function has callee-saved registers.  */
    1066       132766 :   if ((cfun->machine->call_saved_registers
    1067       132766 :        != TYPE_NO_CALLEE_SAVED_REGISTERS)
    1068       132766 :       && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
    1069       132766 :       && ix86_type_no_callee_saved_registers_p (type))
    1070              :     return false;
    1071              : 
    1072              :   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
    1073       132750 :   if ((OUTGOING_REG_PARM_STACK_SPACE (type)
    1074       132750 :        != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
    1075       264754 :       || (REG_PARM_STACK_SPACE (decl_or_type)
    1076       132004 :           != REG_PARM_STACK_SPACE (current_function_decl)))
    1077              :     {
    1078          746 :       maybe_complain_about_tail_call (exp,
    1079              :                                       "inconsistent size of stack space"
    1080              :                                       " allocated for arguments which are"
    1081              :                                       " passed in registers");
    1082          746 :       return false;
    1083              :     }
    1084              : 
    1085              :   /* Check that the return value locations are the same.  Like
    1086              :      if we are returning floats on the 80387 register stack, we cannot
    1087              :      make a sibcall from a function that doesn't return a float to a
    1088              :      function that does or, conversely, from a function that does return
    1089              :      a float to a function that doesn't; the necessary stack adjustment
    1090              :      would not be executed.  This is also the place we notice
    1091              :      differences in the return value ABI.  Note that it is ok for one
    1092              :      of the functions to have void return type as long as the return
    1093              :      value of the other is passed in a register.  */
    1094       132004 :   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
    1095       132004 :   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
    1096       132004 :                            cfun->decl, false);
    1097       132004 :   if (STACK_REG_P (a) || STACK_REG_P (b))
    1098              :     {
    1099         1020 :       if (!rtx_equal_p (a, b))
    1100              :         return false;
    1101              :     }
    1102       130984 :   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
    1103              :     ;
    1104        24466 :   else if (!rtx_equal_p (a, b))
    1105              :     return false;
    1106              : 
    1107       131623 :   if (TARGET_64BIT)
    1108              :     {
    1109              :       /* The SYSV ABI has more call-clobbered registers;
    1110              :          disallow sibcalls from MS to SYSV.  */
    1111       124938 :       if (cfun->machine->call_abi == MS_ABI
    1112       124938 :           && ix86_function_type_abi (type) == SYSV_ABI)
    1113              :         return false;
    1114              :     }
    1115              :   else
    1116              :     {
    1117              :       /* If this call is indirect, we'll need to be able to use a
    1118              :          call-clobbered register for the address of the target function.
    1119              :          Make sure that all such registers are not used for passing
    1120              :          parameters.  Note that DLLIMPORT functions and call to global
    1121              :          function via GOT slot are indirect.  */
    1122         6685 :       if (!decl
    1123         4771 :           || (bind_global && flag_pic && !flag_plt)
    1124              :           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
    1125         4771 :           || flag_force_indirect_call)
    1126              :         {
    1127              :           /* Check if regparm >= 3 since arg_reg_available is set to
    1128              :              false if regparm == 0.  If regparm is 1 or 2, there is
    1129              :              always a call-clobbered register available.
    1130              : 
    1131              :              ??? The symbol indirect call doesn't need a call-clobbered
    1132              :              register.  But we don't know if this is a symbol indirect
    1133              :              call or not here.  */
    1134         1914 :           if (ix86_function_regparm (type, decl) >= 3
    1135         1914 :               && !cfun->machine->arg_reg_available)
    1136              :             return false;
    1137              :         }
    1138              :     }
    1139              : 
    1140       131623 :   if (decl && ix86_use_pseudo_pic_reg ())
    1141              :     {
    1142              :       /* When PIC register is used, it must be restored after ifunc
    1143              :          function returns.  */
    1144         2060 :        cgraph_node *node = cgraph_node::get (decl);
    1145         2060 :        if (node && node->ifunc_resolver)
    1146              :          return false;
    1147              :     }
    1148              : 
    1149              :   /* Disable sibcall if callee has indirect_return attribute and
    1150              :      caller doesn't since callee will return to the caller's caller
    1151              :      via an indirect jump.  */
    1152       131623 :   if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
    1153              :        == (CF_RETURN | CF_BRANCH))
    1154        52642 :       && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
    1155       131627 :       && !lookup_attribute ("indirect_return",
    1156            4 :                             TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
    1157              :     return false;
    1158              : 
    1159              :   /* Otherwise okay.  That also includes certain types of indirect calls.  */
    1160              :   return true;
    1161              : }
    1162              : 
    1163              : /* This function determines from TYPE the calling-convention.  */
    1164              : 
    1165              : unsigned int
    1166      6201143 : ix86_get_callcvt (const_tree type)
    1167              : {
    1168      6201143 :   unsigned int ret = 0;
    1169      6201143 :   bool is_stdarg;
    1170      6201143 :   tree attrs;
    1171              : 
    1172      6201143 :   if (TARGET_64BIT)
    1173              :     return IX86_CALLCVT_CDECL;
    1174              : 
    1175      3270149 :   attrs = TYPE_ATTRIBUTES (type);
    1176      3270149 :   if (attrs != NULL_TREE)
    1177              :     {
    1178        67401 :       if (lookup_attribute ("cdecl", attrs))
    1179              :         ret |= IX86_CALLCVT_CDECL;
    1180        67401 :       else if (lookup_attribute ("stdcall", attrs))
    1181              :         ret |= IX86_CALLCVT_STDCALL;
    1182        67401 :       else if (lookup_attribute ("fastcall", attrs))
    1183              :         ret |= IX86_CALLCVT_FASTCALL;
    1184        67392 :       else if (lookup_attribute ("thiscall", attrs))
    1185              :         ret |= IX86_CALLCVT_THISCALL;
    1186              : 
    1187              :       /* Regparm isn't allowed for thiscall and fastcall.  */
    1188              :       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
    1189              :         {
    1190        67392 :           if (lookup_attribute ("regparm", attrs))
    1191        15828 :             ret |= IX86_CALLCVT_REGPARM;
    1192        67392 :           if (lookup_attribute ("sseregparm", attrs))
    1193            0 :             ret |= IX86_CALLCVT_SSEREGPARM;
    1194              :         }
    1195              : 
    1196        67401 :       if (IX86_BASE_CALLCVT(ret) != 0)
    1197            9 :         return ret;
    1198              :     }
    1199              : 
    1200      3270140 :   is_stdarg = stdarg_p (type);
    1201      3270140 :   if (TARGET_RTD && !is_stdarg)
    1202            0 :     return IX86_CALLCVT_STDCALL | ret;
    1203              : 
    1204      3270140 :   if (ret != 0
    1205      3270140 :       || is_stdarg
    1206      3245292 :       || TREE_CODE (type) != METHOD_TYPE
    1207      3406719 :       || ix86_function_type_abi (type) != MS_ABI)
    1208      3270140 :     return IX86_CALLCVT_CDECL | ret;
    1209              : 
    1210              :   return IX86_CALLCVT_THISCALL;
    1211              : }
    1212              : 
    1213              : /* Return 0 if the attributes for two types are incompatible, 1 if they
    1214              :    are compatible, and 2 if they are nearly compatible (which causes a
    1215              :    warning to be generated).  */
    1216              : 
    1217              : static int
    1218      1483196 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
    1219              : {
    1220      1483196 :   unsigned int ccvt1, ccvt2;
    1221              : 
    1222      1483196 :   if (TREE_CODE (type1) != FUNCTION_TYPE
    1223      1483196 :       && TREE_CODE (type1) != METHOD_TYPE)
    1224              :     return 1;
    1225              : 
    1226      1476621 :   ccvt1 = ix86_get_callcvt (type1);
    1227      1476621 :   ccvt2 = ix86_get_callcvt (type2);
    1228      1476621 :   if (ccvt1 != ccvt2)
    1229              :     return 0;
    1230      2931114 :   if (ix86_function_regparm (type1, NULL)
    1231      1465557 :       != ix86_function_regparm (type2, NULL))
    1232              :     return 0;
    1233              : 
    1234      1427762 :   if (ix86_type_no_callee_saved_registers_p (type1)
    1235       713881 :       != ix86_type_no_callee_saved_registers_p (type2))
    1236              :     return 0;
    1237              : 
    1238              :   /* preserve_none attribute uses a different calling convention is
    1239              :      only for 64-bit.  */
    1240       713753 :   if (TARGET_64BIT
    1241      1427446 :       && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
    1242       713693 :           != lookup_attribute ("preserve_none",
    1243       713693 :                                TYPE_ATTRIBUTES (type2))))
    1244              :     return 0;
    1245              : 
    1246              :   return 1;
    1247              : }
    1248              : 
    1249              : /* Return the regparm value for a function with the indicated TYPE and DECL.
    1250              :    DECL may be NULL when calling function indirectly
    1251              :    or considering a libcall.  */
    1252              : 
    1253              : static int
    1254      4202769 : ix86_function_regparm (const_tree type, const_tree decl)
    1255              : {
    1256      4202769 :   tree attr;
    1257      4202769 :   int regparm;
    1258      4202769 :   unsigned int ccvt;
    1259              : 
    1260      4202769 :   if (TARGET_64BIT)
    1261      2930994 :     return (ix86_function_type_abi (type) == SYSV_ABI
    1262      2930994 :             ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
    1263      1271775 :   ccvt = ix86_get_callcvt (type);
    1264      1271775 :   regparm = ix86_regparm;
    1265              : 
    1266      1271775 :   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
    1267              :     {
    1268         2020 :       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
    1269         2020 :       if (attr)
    1270              :         {
    1271         2020 :           regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
    1272         2020 :           return regparm;
    1273              :         }
    1274              :     }
    1275      1269755 :   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    1276              :     return 2;
    1277      1269755 :   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    1278              :     return 1;
    1279              : 
    1280              :   /* Use register calling convention for local functions when possible.  */
    1281      1269755 :   if (decl
    1282      1205500 :       && TREE_CODE (decl) == FUNCTION_DECL)
    1283              :     {
    1284      1195399 :       cgraph_node *target = cgraph_node::get (decl);
    1285      1195399 :       if (target)
    1286      1187892 :         target = target->function_symbol ();
    1287              : 
    1288              :       /* Caller and callee must agree on the calling convention, so
    1289              :          checking here just optimize means that with
    1290              :          __attribute__((optimize (...))) caller could use regparm convention
    1291              :          and callee not, or vice versa.  Instead look at whether the callee
    1292              :          is optimized or not.  */
    1293      1187892 :       if (target && opt_for_fn (target->decl, optimize)
    1294      2374892 :           && !(profile_flag && !flag_fentry))
    1295              :         {
    1296      1187000 :           if (target->local && target->can_change_signature)
    1297              :             {
    1298       140052 :               int local_regparm, globals = 0, regno;
    1299              : 
    1300              :               /* Make sure no regparm register is taken by a
    1301              :                  fixed register variable.  */
    1302       140052 :               for (local_regparm = 0; local_regparm < REGPARM_MAX;
    1303              :                    local_regparm++)
    1304       105039 :                 if (fixed_regs[local_regparm])
    1305              :                   break;
    1306              : 
    1307              :               /* We don't want to use regparm(3) for nested functions as
    1308              :                  these use a static chain pointer in the third argument.  */
    1309        35013 :               if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
    1310              :                 local_regparm = 2;
    1311              : 
    1312              :               /* Save a register for the split stack.  */
    1313        35013 :               if (flag_split_stack)
    1314              :                 {
    1315        20696 :                   if (local_regparm == 3)
    1316              :                     local_regparm = 2;
    1317          707 :                   else if (local_regparm == 2
    1318          707 :                            && DECL_STATIC_CHAIN (target->decl))
    1319              :                     local_regparm = 1;
    1320              :                 }
    1321              : 
    1322              :               /* Each fixed register usage increases register pressure,
    1323              :                  so less registers should be used for argument passing.
    1324              :                  This functionality can be overriden by an explicit
    1325              :                  regparm value.  */
    1326       245091 :               for (regno = AX_REG; regno <= DI_REG; regno++)
    1327       210078 :                 if (fixed_regs[regno])
    1328            0 :                   globals++;
    1329              : 
    1330        35013 :               local_regparm
    1331        35013 :                 = globals < local_regparm ? local_regparm - globals : 0;
    1332              : 
    1333        35013 :               if (local_regparm > regparm)
    1334      4202769 :                 regparm = local_regparm;
    1335              :             }
    1336              :         }
    1337              :     }
    1338              : 
    1339              :   return regparm;
    1340              : }
    1341              : 
    1342              : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
    1343              :    DFmode (2) arguments in SSE registers for a function with the
    1344              :    indicated TYPE and DECL.  DECL may be NULL when calling function
    1345              :    indirectly or considering a libcall.  Return -1 if any FP parameter
    1346              :    should be rejected by error.  This is used in siutation we imply SSE
    1347              :    calling convetion but the function is called from another function with
    1348              :    SSE disabled. Otherwise return 0.  */
    1349              : 
    1350              : static int
    1351      1077424 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
    1352              : {
    1353      1077424 :   gcc_assert (!TARGET_64BIT);
    1354              : 
    1355              :   /* Use SSE registers to pass SFmode and DFmode arguments if requested
    1356              :      by the sseregparm attribute.  */
    1357      1077424 :   if (TARGET_SSEREGPARM
    1358      1077424 :       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
    1359              :     {
    1360            0 :       if (!TARGET_SSE)
    1361              :         {
    1362            0 :           if (warn)
    1363              :             {
    1364            0 :               if (decl)
    1365            0 :                 error ("calling %qD with attribute sseregparm without "
    1366              :                        "SSE/SSE2 enabled", decl);
    1367              :               else
    1368            0 :                 error ("calling %qT with attribute sseregparm without "
    1369              :                        "SSE/SSE2 enabled", type);
    1370              :             }
    1371            0 :           return 0;
    1372              :         }
    1373              : 
    1374              :       return 2;
    1375              :     }
    1376              : 
    1377      1077424 :   if (!decl)
    1378              :     return 0;
    1379              : 
    1380       978849 :   cgraph_node *target = cgraph_node::get (decl);
    1381       978849 :   if (target)
    1382       971349 :     target = target->function_symbol ();
    1383              : 
    1384              :   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
    1385              :      (and DFmode for SSE2) arguments in SSE registers.  */
    1386       971349 :   if (target
    1387              :       /* TARGET_SSE_MATH */
    1388       971349 :       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
    1389         1296 :       && opt_for_fn (target->decl, optimize)
    1390       972645 :       && !(profile_flag && !flag_fentry))
    1391              :     {
    1392         1296 :       if (target->local && target->can_change_signature)
    1393              :         {
    1394              :           /* Refuse to produce wrong code when local function with SSE enabled
    1395              :              is called from SSE disabled function.
    1396              :              FIXME: We need a way to detect these cases cross-ltrans partition
    1397              :              and avoid using SSE calling conventions on local functions called
    1398              :              from function with SSE disabled.  For now at least delay the
    1399              :              warning until we know we are going to produce wrong code.
    1400              :              See PR66047  */
    1401            0 :           if (!TARGET_SSE && warn)
    1402              :             return -1;
    1403            0 :           return TARGET_SSE2_P (target_opts_for_fn (target->decl)
    1404            0 :                                 ->x_ix86_isa_flags) ? 2 : 1;
    1405              :         }
    1406              :     }
    1407              : 
    1408              :   return 0;
    1409              : }
    1410              : 
    1411              : /* Return true if EAX is live at the start of the function.  Used by
    1412              :    ix86_expand_prologue to determine if we need special help before
    1413              :    calling allocate_stack_worker.  */
    1414              : 
    1415              : static bool
    1416         7090 : ix86_eax_live_at_start_p (void)
    1417              : {
    1418              :   /* Cheat.  Don't bother working forward from ix86_function_regparm
    1419              :      to the function type to whether an actual argument is located in
    1420              :      eax.  Instead just look at cfg info, which is still close enough
    1421              :      to correct at this point.  This gives false positives for broken
    1422              :      functions that might use uninitialized data that happens to be
    1423              :      allocated in eax, but who cares?  */
    1424         7090 :   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
    1425              : }
    1426              : 
    1427              : static bool
    1428       160020 : ix86_keep_aggregate_return_pointer (tree fntype)
    1429              : {
    1430       160020 :   tree attr;
    1431              : 
    1432       160020 :   if (!TARGET_64BIT)
    1433              :     {
    1434       160020 :       attr = lookup_attribute ("callee_pop_aggregate_return",
    1435       160020 :                                TYPE_ATTRIBUTES (fntype));
    1436       160020 :       if (attr)
    1437            0 :         return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
    1438              : 
    1439              :       /* For 32-bit MS-ABI the default is to keep aggregate
    1440              :          return pointer.  */
    1441       160020 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    1442              :         return true;
    1443              :     }
    1444              :   return KEEP_AGGREGATE_RETURN_POINTER != 0;
    1445              : }
    1446              : 
    1447              : /* Value is the number of bytes of arguments automatically
    1448              :    popped when returning from a subroutine call.
    1449              :    FUNDECL is the declaration node of the function (as a tree),
    1450              :    FUNTYPE is the data type of the function (as a tree),
    1451              :    or for a library call it is an identifier node for the subroutine name.
    1452              :    SIZE is the number of bytes of arguments passed on the stack.
    1453              : 
    1454              :    On the 80386, the RTD insn may be used to pop them if the number
    1455              :      of args is fixed, but if the number is variable then the caller
    1456              :      must pop them all.  RTD can't be used for library calls now
    1457              :      because the library is compiled with the Unix compiler.
    1458              :    Use of RTD is a selectable option, since it is incompatible with
    1459              :    standard Unix calling sequences.  If the option is not selected,
    1460              :    the caller must always pop the args.
    1461              : 
    1462              :    The attribute stdcall is equivalent to RTD on a per module basis.  */
    1463              : 
    1464              : static poly_int64
    1465      7590914 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
    1466              : {
    1467      7590914 :   unsigned int ccvt;
    1468              : 
    1469              :   /* None of the 64-bit ABIs pop arguments.  */
    1470      7590914 :   if (TARGET_64BIT)
    1471      6717494 :     return 0;
    1472              : 
    1473       873420 :   ccvt = ix86_get_callcvt (funtype);
    1474              : 
    1475       873420 :   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
    1476              :                | IX86_CALLCVT_THISCALL)) != 0
    1477       873420 :       && ! stdarg_p (funtype))
    1478            3 :     return size;
    1479              : 
    1480              :   /* Lose any fake structure return argument if it is passed on the stack.  */
    1481       873417 :   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
    1482       873417 :       && !ix86_keep_aggregate_return_pointer (funtype))
    1483              :     {
    1484       160020 :       int nregs = ix86_function_regparm (funtype, fundecl);
    1485       160020 :       if (nregs == 0)
    1486       459195 :         return GET_MODE_SIZE (Pmode);
    1487              :     }
    1488              : 
    1489       720352 :   return 0;
    1490              : }
    1491              : 
    1492              : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
    1493              : 
    1494              : static bool
    1495     10051893 : ix86_legitimate_combined_insn (rtx_insn *insn)
    1496              : {
    1497     10051893 :   int i;
    1498              : 
    1499              :   /* Check operand constraints in case hard registers were propagated
    1500              :      into insn pattern.  This check prevents combine pass from
    1501              :      generating insn patterns with invalid hard register operands.
    1502              :      These invalid insns can eventually confuse reload to error out
    1503              :      with a spill failure.  See also PRs 46829 and 46843.  */
    1504              : 
    1505     10051893 :   gcc_assert (INSN_CODE (insn) >= 0);
    1506              : 
    1507     10051893 :   extract_insn (insn);
    1508     10051893 :   preprocess_constraints (insn);
    1509              : 
    1510     10051893 :   int n_operands = recog_data.n_operands;
    1511     10051893 :   int n_alternatives = recog_data.n_alternatives;
    1512     34352571 :   for (i = 0; i < n_operands; i++)
    1513              :     {
    1514     24304181 :       rtx op = recog_data.operand[i];
    1515     24304181 :       machine_mode mode = GET_MODE (op);
    1516     24304181 :       const operand_alternative *op_alt;
    1517     24304181 :       int offset = 0;
    1518     24304181 :       bool win;
    1519     24304181 :       int j;
    1520              : 
    1521              :       /* A unary operator may be accepted by the predicate, but it
    1522              :          is irrelevant for matching constraints.  */
    1523     24304181 :       if (UNARY_P (op))
    1524        48613 :         op = XEXP (op, 0);
    1525              : 
    1526     24304181 :       if (SUBREG_P (op))
    1527              :         {
    1528       876039 :           if (REG_P (SUBREG_REG (op))
    1529       876039 :               && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
    1530           54 :             offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
    1531           54 :                                           GET_MODE (SUBREG_REG (op)),
    1532           54 :                                           SUBREG_BYTE (op),
    1533           54 :                                           GET_MODE (op));
    1534       876039 :           op = SUBREG_REG (op);
    1535              :         }
    1536              : 
    1537     24304181 :       if (!(REG_P (op) && HARD_REGISTER_P (op)))
    1538     24002704 :         continue;
    1539              : 
    1540       301477 :       op_alt = recog_op_alt;
    1541              : 
    1542              :       /* Operand has no constraints, anything is OK.  */
    1543       301477 :       win = !n_alternatives;
    1544              : 
    1545       301477 :       alternative_mask preferred = get_preferred_alternatives (insn);
    1546       827545 :       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
    1547              :         {
    1548       522484 :           if (!TEST_BIT (preferred, j))
    1549       137296 :             continue;
    1550       385188 :           if (op_alt[i].anything_ok
    1551       202726 :               || (op_alt[i].matches != -1
    1552        33730 :                   && operands_match_p
    1553        33730 :                   (recog_data.operand[i],
    1554        33730 :                    recog_data.operand[op_alt[i].matches]))
    1555       583818 :               || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
    1556              :             {
    1557              :               win = true;
    1558              :               break;
    1559              :             }
    1560              :         }
    1561              : 
    1562       301477 :       if (!win)
    1563              :         return false;
    1564              :     }
    1565              : 
    1566              :   return true;
    1567              : }
    1568              : 
    1569              : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
    1570              : 
    1571              : static unsigned HOST_WIDE_INT
    1572         4581 : ix86_asan_shadow_offset (void)
    1573              : {
    1574         4581 :   return SUBTARGET_SHADOW_OFFSET;
    1575              : }
    1576              : 
    1577              : /* Argument support functions.  */
    1578              : 
    1579              : /* Return true when register may be used to pass function parameters.  */
    1580              : bool
    1581   1480812072 : ix86_function_arg_regno_p (int regno)
    1582              : {
    1583   1480812072 :   int i;
    1584   1480812072 :   enum calling_abi call_abi;
    1585   1480812072 :   const int *parm_regs;
    1586              : 
    1587   1477365013 :   if (TARGET_SSE && SSE_REGNO_P (regno)
    1588   2448959030 :       && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
    1589              :     return true;
    1590              : 
    1591   1361245180 :    if (!TARGET_64BIT)
    1592    129210670 :      return (regno < REGPARM_MAX
    1593    129210670 :              || (TARGET_MMX && MMX_REGNO_P (regno)
    1594     11613072 :                  && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
    1595              : 
    1596              :   /* TODO: The function should depend on current function ABI but
    1597              :      builtins.cc would need updating then. Therefore we use the
    1598              :      default ABI.  */
    1599   1232034510 :   call_abi = ix86_cfun_abi ();
    1600              : 
    1601              :   /* RAX is used as hidden argument to va_arg functions.  */
    1602   1232034510 :   if (call_abi == SYSV_ABI && regno == AX_REG)
    1603              :     return true;
    1604              : 
    1605   1217797819 :   if (cfun
    1606   1217797487 :       && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    1607              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    1608   1217778895 :   else if (call_abi == MS_ABI)
    1609              :     parm_regs = x86_64_ms_abi_int_parameter_registers;
    1610              :   else
    1611   1181783343 :     parm_regs = x86_64_int_parameter_registers;
    1612              : 
    1613  16298581598 :   for (i = 0; i < (call_abi == MS_ABI
    1614   8149290799 :                    ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
    1615   7018596439 :     if (regno == parm_regs[i])
    1616              :       return true;
    1617              :   return false;
    1618              : }
    1619              : 
    1620              : /* Return if we do not know how to pass ARG solely in registers.  */
    1621              : 
    1622              : static bool
    1623    403783638 : ix86_must_pass_in_stack (const function_arg_info &arg)
    1624              : {
    1625    403783638 :   if (must_pass_in_stack_var_size_or_pad (arg))
    1626              :     return true;
    1627              : 
    1628              :   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
    1629              :      The layout_type routine is crafty and tries to trick us into passing
    1630              :      currently unsupported vector types on the stack by using TImode.  */
    1631      1772098 :   return (!TARGET_64BIT && arg.mode == TImode
    1632    403783601 :           && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
    1633              : }
    1634              : 
    1635              : /* Implement TARGET_OVERALIGNED_STACK_SLOT_REQUIRED.  */
    1636              : 
    1637              : static bool
    1638        84424 : ix86_overaligned_stack_slot_required (void)
    1639              : {
    1640        84424 :   return TARGET_SEH;
    1641              : }
    1642              : 
    1643              : /* It returns the size, in bytes, of the area reserved for arguments passed
    1644              :    in registers for the function represented by fndecl dependent to the used
    1645              :    abi format.  */
    1646              : int
    1647     10699397 : ix86_reg_parm_stack_space (const_tree fndecl)
    1648              : {
    1649     10699397 :   enum calling_abi call_abi = SYSV_ABI;
    1650     10699397 :   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
    1651     10388100 :     call_abi = ix86_function_abi (fndecl);
    1652              :   else
    1653       311297 :     call_abi = ix86_function_type_abi (fndecl);
    1654     10699397 :   if (TARGET_64BIT && call_abi == MS_ABI)
    1655       119312 :     return 32;
    1656              :   return 0;
    1657              : }
    1658              : 
    1659              : /* We add this as a workaround in order to use libc_has_function
    1660              :    hook in i386.md.  */
    1661              : bool
    1662            0 : ix86_libc_has_function (enum function_class fn_class)
    1663              : {
    1664            0 :   return targetm.libc_has_function (fn_class, NULL_TREE);
    1665              : }
    1666              : 
    1667              : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
    1668              :    specifying the call abi used.  */
    1669              : enum calling_abi
    1670    439278643 : ix86_function_type_abi (const_tree fntype)
    1671              : {
    1672    439278643 :   enum calling_abi abi = ix86_abi;
    1673              : 
    1674    439278643 :   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
    1675              :     return abi;
    1676              : 
    1677     17591913 :   if (abi == SYSV_ABI
    1678     17591913 :       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
    1679              :     {
    1680      2608409 :       static int warned;
    1681      2608409 :       if (TARGET_X32 && !warned)
    1682              :         {
    1683            1 :           error ("X32 does not support %<ms_abi%> attribute");
    1684            1 :           warned = 1;
    1685              :         }
    1686              : 
    1687              :       abi = MS_ABI;
    1688              :     }
    1689     14983504 :   else if (abi == MS_ABI
    1690     14983504 :            && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
    1691              :     abi = SYSV_ABI;
    1692              : 
    1693              :   return abi;
    1694              : }
    1695              : 
    1696              : enum calling_abi
    1697    217305413 : ix86_function_abi (const_tree fndecl)
    1698              : {
    1699    217305413 :   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
    1700              : }
    1701              : 
    1702              : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
    1703              :    specifying the call abi used.  */
    1704              : enum calling_abi
    1705   2082718362 : ix86_cfun_abi (void)
    1706              : {
    1707   2082718362 :   return cfun ? cfun->machine->call_abi : ix86_abi;
    1708              : }
    1709              : 
    1710              : bool
    1711      5026377 : ix86_function_ms_hook_prologue (const_tree fn)
    1712              : {
    1713      5026377 :   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
    1714              :     {
    1715            8 :       if (decl_function_context (fn) != NULL_TREE)
    1716            0 :         error_at (DECL_SOURCE_LOCATION (fn),
    1717              :                   "%<ms_hook_prologue%> attribute is not compatible "
    1718              :                   "with nested function");
    1719              :       else
    1720              :         return true;
    1721              :     }
    1722              :   return false;
    1723              : }
    1724              : 
    1725              : bool
    1726    115209521 : ix86_function_naked (const_tree fn)
    1727              : {
    1728    115209521 :   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
    1729              :     return true;
    1730              : 
    1731              :   return false;
    1732              : }
    1733              : 
    1734              : /* Write the extra assembler code needed to declare a function properly.  */
    1735              : 
    1736              : void
    1737      1551588 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
    1738              :                                 tree decl)
    1739              : {
    1740      1551588 :   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
    1741              : 
    1742      1551588 :   if (cfun)
    1743      1547982 :     cfun->machine->function_label_emitted = true;
    1744              : 
    1745      1551588 :   if (is_ms_hook)
    1746              :     {
    1747            2 :       int i, filler_count = (TARGET_64BIT ? 32 : 16);
    1748            2 :       unsigned int filler_cc = 0xcccccccc;
    1749              : 
    1750           18 :       for (i = 0; i < filler_count; i += 4)
    1751           16 :         fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
    1752              :     }
    1753              : 
    1754              : #ifdef SUBTARGET_ASM_UNWIND_INIT
    1755              :   SUBTARGET_ASM_UNWIND_INIT (out_file);
    1756              : #endif
    1757              : 
    1758      1551588 :   assemble_function_label_raw (out_file, fname);
    1759              : 
    1760              :   /* Output magic byte marker, if hot-patch attribute is set.  */
    1761      1551588 :   if (is_ms_hook)
    1762              :     {
    1763            2 :       if (TARGET_64BIT)
    1764              :         {
    1765              :           /* leaq [%rsp + 0], %rsp  */
    1766            2 :           fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
    1767              :                  out_file);
    1768              :         }
    1769              :       else
    1770              :         {
    1771              :           /* movl.s %edi, %edi
    1772              :              push   %ebp
    1773              :              movl.s %esp, %ebp */
    1774            0 :           fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
    1775              :         }
    1776              :     }
    1777      1551588 : }
    1778              : 
    1779              : /* Output a user-defined label.  In AT&T syntax, registers are prefixed
    1780              :    with %, so labels require no punctuation.  In Intel syntax, registers
    1781              :    are unprefixed, so labels may clash with registers or other operators,
    1782              :    and require quoting.  */
    1783              : void
    1784     35063589 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
    1785              : {
    1786     35063589 :   if (ASSEMBLER_DIALECT == ASM_ATT)
    1787     35062488 :     fprintf (file, "%s%s", prefix, label);
    1788              :   else
    1789         1101 :     fprintf (file, "\"%s%s\"", prefix, label);
    1790     35063589 : }
    1791              : 
    1792              : /* Implementation of call abi switching target hook. Specific to FNDECL
    1793              :    the specific call register sets are set.  See also
    1794              :    ix86_conditional_register_usage for more details.  */
    1795              : void
    1796    196791568 : ix86_call_abi_override (const_tree fndecl)
    1797              : {
    1798    196791568 :   cfun->machine->call_abi = ix86_function_abi (fndecl);
    1799    196791568 : }
    1800              : 
    1801              : /* Return 1 if pseudo register should be created and used to hold
    1802              :    GOT address for PIC code.  */
    1803              : bool
    1804    169699212 : ix86_use_pseudo_pic_reg (void)
    1805              : {
    1806    169699212 :   if ((TARGET_64BIT
    1807    158650626 :        && (ix86_cmodel == CM_SMALL_PIC
    1808              :            || TARGET_PECOFF))
    1809    163823316 :       || !flag_pic)
    1810    164899907 :     return false;
    1811              :   return true;
    1812              : }
    1813              : 
    1814              : /* Initialize large model PIC register.  */
    1815              : 
    1816              : static void
    1817           56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
    1818              : {
    1819           56 :   rtx_code_label *label;
    1820           56 :   rtx tmp_reg;
    1821              : 
    1822           56 :   gcc_assert (Pmode == DImode);
    1823           56 :   label = gen_label_rtx ();
    1824           56 :   emit_label (label);
    1825           56 :   LABEL_PRESERVE_P (label) = 1;
    1826           56 :   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
    1827           56 :   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
    1828           56 :   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
    1829              :                                 label));
    1830           56 :   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
    1831           56 :   emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
    1832           56 :   const char *name = LABEL_NAME (label);
    1833           56 :   PUT_CODE (label, NOTE);
    1834           56 :   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
    1835           56 :   NOTE_DELETED_LABEL_NAME (label) = name;
    1836           56 : }
    1837              : 
    1838              : /* Create and initialize PIC register if required.  */
    1839              : static void
    1840      1481483 : ix86_init_pic_reg (void)
    1841              : {
    1842      1481483 :   edge entry_edge;
    1843      1481483 :   rtx_insn *seq;
    1844              : 
    1845      1481483 :   if (!ix86_use_pseudo_pic_reg ())
    1846              :     return;
    1847              : 
    1848        40461 :   start_sequence ();
    1849              : 
    1850        40461 :   if (TARGET_64BIT)
    1851              :     {
    1852           69 :       if (ix86_cmodel == CM_LARGE_PIC)
    1853           53 :         ix86_init_large_pic_reg (R11_REG);
    1854              :       else
    1855           16 :         emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
    1856              :     }
    1857              :   else
    1858              :     {
    1859              :       /*  If there is future mcount call in the function it is more profitable
    1860              :           to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
    1861        40392 :       rtx reg = crtl->profile
    1862        40392 :                 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
    1863        40392 :                 : pic_offset_table_rtx;
    1864        40392 :       rtx_insn *insn = emit_insn (gen_set_got (reg));
    1865        40392 :       RTX_FRAME_RELATED_P (insn) = 1;
    1866        40392 :       if (crtl->profile)
    1867            0 :         emit_move_insn (pic_offset_table_rtx, reg);
    1868        40392 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    1869              :     }
    1870              : 
    1871        40461 :   seq = end_sequence ();
    1872              : 
    1873        40461 :   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
    1874        40461 :   insert_insn_on_edge (seq, entry_edge);
    1875        40461 :   commit_one_edge_insertion (entry_edge);
    1876              : }
    1877              : 
    1878              : /* Initialize a variable CUM of type CUMULATIVE_ARGS
    1879              :    for a call to a function whose data type is FNTYPE.
    1880              :    For a library call, FNTYPE is 0.  */
    1881              : 
    1882              : void
    1883     10418798 : init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
    1884              :                       tree fntype,      /* tree ptr for function decl */
    1885              :                       rtx libname,      /* SYMBOL_REF of library name or 0 */
    1886              :                       tree fndecl,
    1887              :                       int caller)
    1888              : {
    1889     10418798 :   struct cgraph_node *local_info_node = NULL;
    1890     10418798 :   struct cgraph_node *target = NULL;
    1891              : 
    1892              :   /* Set silent_p to false to raise an error for invalid calls when
    1893              :      expanding function body.  */
    1894     10418798 :   cfun->machine->silent_p = false;
    1895              : 
    1896     10418798 :   memset (cum, 0, sizeof (*cum));
    1897              : 
    1898     10418798 :   tree preserve_none_type;
    1899     10418798 :   if (fndecl)
    1900              :     {
    1901     10078839 :       target = cgraph_node::get (fndecl);
    1902     10078839 :       if (target)
    1903              :         {
    1904      9932931 :           target = target->function_symbol ();
    1905      9932931 :           local_info_node = cgraph_node::local_info_node (target->decl);
    1906      9932931 :           cum->call_abi = ix86_function_abi (target->decl);
    1907      9932931 :           preserve_none_type = TREE_TYPE (target->decl);
    1908              :         }
    1909              :       else
    1910              :         {
    1911       145908 :           cum->call_abi = ix86_function_abi (fndecl);
    1912       145908 :           preserve_none_type = TREE_TYPE (fndecl);
    1913              :         }
    1914              :     }
    1915              :   else
    1916              :     {
    1917       339959 :       cum->call_abi = ix86_function_type_abi (fntype);
    1918       339959 :       preserve_none_type = fntype;
    1919              :     }
    1920     10418798 :   cum->preserve_none_abi
    1921     10418798 :     = (preserve_none_type
    1922     20720306 :        && (lookup_attribute ("preserve_none",
    1923     10301508 :                              TYPE_ATTRIBUTES (preserve_none_type))
    1924              :            != nullptr));
    1925              : 
    1926     10418798 :   cum->caller = caller;
    1927              : 
    1928              :   /* Set up the number of registers to use for passing arguments.  */
    1929     10418798 :   cum->nregs = ix86_regparm;
    1930     10418798 :   if (TARGET_64BIT)
    1931              :     {
    1932      9382609 :       cum->nregs = (cum->call_abi == SYSV_ABI
    1933      9382609 :                    ? X86_64_REGPARM_MAX
    1934              :                    : X86_64_MS_REGPARM_MAX);
    1935              :     }
    1936     10418798 :   if (TARGET_SSE)
    1937              :     {
    1938     10409688 :       cum->sse_nregs = SSE_REGPARM_MAX;
    1939     10409688 :       if (TARGET_64BIT)
    1940              :         {
    1941      9373619 :           cum->sse_nregs = (cum->call_abi == SYSV_ABI
    1942      9373619 :                            ? X86_64_SSE_REGPARM_MAX
    1943              :                            : X86_64_MS_SSE_REGPARM_MAX);
    1944              :         }
    1945              :     }
    1946     10418798 :   if (TARGET_MMX)
    1947     11242211 :     cum->mmx_nregs = MMX_REGPARM_MAX;
    1948     10418798 :   cum->warn_avx512f = true;
    1949     10418798 :   cum->warn_avx = true;
    1950     10418798 :   cum->warn_sse = true;
    1951     10418798 :   cum->warn_mmx = true;
    1952              : 
    1953              :   /* Because type might mismatch in between caller and callee, we need to
    1954              :      use actual type of function for local calls.
    1955              :      FIXME: cgraph_analyze can be told to actually record if function uses
    1956              :      va_start so for local functions maybe_vaarg can be made aggressive
    1957              :      helping K&R code.
    1958              :      FIXME: once typesytem is fixed, we won't need this code anymore.  */
    1959     10418798 :   if (local_info_node && local_info_node->local
    1960       427951 :       && local_info_node->can_change_signature)
    1961       404792 :     fntype = TREE_TYPE (target->decl);
    1962     10418798 :   cum->stdarg = stdarg_p (fntype);
    1963     20837596 :   cum->maybe_vaarg = (fntype
    1964     10888613 :                       ? (!prototype_p (fntype) || stdarg_p (fntype))
    1965       117290 :                       : !libname);
    1966              : 
    1967     10418798 :   cum->decl = fndecl;
    1968              : 
    1969     10418798 :   cum->warn_empty = !warn_abi || cum->stdarg;
    1970     10418798 :   if (!cum->warn_empty && fntype)
    1971              :     {
    1972      2693010 :       function_args_iterator iter;
    1973      2693010 :       tree argtype;
    1974      2693010 :       bool seen_empty_type = false;
    1975      7462918 :       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
    1976              :         {
    1977      7462855 :           if (argtype == error_mark_node || VOID_TYPE_P (argtype))
    1978              :             break;
    1979      4790005 :           if (TYPE_EMPTY_P (argtype))
    1980              :             seen_empty_type = true;
    1981      4717848 :           else if (seen_empty_type)
    1982              :             {
    1983        20097 :               cum->warn_empty = true;
    1984        20097 :               break;
    1985              :             }
    1986              :         }
    1987              :     }
    1988              : 
    1989     10418798 :   if (!TARGET_64BIT)
    1990              :     {
    1991              :       /* If there are variable arguments, then we won't pass anything
    1992              :          in registers in 32-bit mode. */
    1993      1036189 :       if (stdarg_p (fntype))
    1994              :         {
    1995         9077 :           cum->nregs = 0;
    1996              :           /* Since in 32-bit, variable arguments are always passed on
    1997              :              stack, there is scratch register available for indirect
    1998              :              sibcall.  */
    1999         9077 :           cfun->machine->arg_reg_available = true;
    2000         9077 :           cum->sse_nregs = 0;
    2001         9077 :           cum->mmx_nregs = 0;
    2002         9077 :           cum->warn_avx512f = false;
    2003         9077 :           cum->warn_avx = false;
    2004         9077 :           cum->warn_sse = false;
    2005         9077 :           cum->warn_mmx = false;
    2006         9077 :           return;
    2007              :         }
    2008              : 
    2009              :       /* Use ecx and edx registers if function has fastcall attribute,
    2010              :          else look for regparm information.  */
    2011      1027112 :       if (fntype)
    2012              :         {
    2013      1013904 :           unsigned int ccvt = ix86_get_callcvt (fntype);
    2014      1013904 :           if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    2015              :             {
    2016            0 :               cum->nregs = 1;
    2017            0 :               cum->fastcall = 1; /* Same first register as in fastcall.  */
    2018              :             }
    2019      1013904 :           else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    2020              :             {
    2021            4 :               cum->nregs = 2;
    2022            4 :               cum->fastcall = 1;
    2023              :             }
    2024              :           else
    2025      1013900 :             cum->nregs = ix86_function_regparm (fntype, fndecl);
    2026              :         }
    2027              : 
    2028              :       /* Set up the number of SSE registers used for passing SFmode
    2029              :          and DFmode arguments.  Warn for mismatching ABI.  */
    2030      1027112 :       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
    2031              :     }
    2032              : 
    2033     10409721 :   cfun->machine->arg_reg_available = (cum->nregs > 0);
    2034              : }
    2035              : 
    2036              : /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
    2037              :    But in the case of vector types, it is some vector mode.
    2038              : 
    2039              :    When we have only some of our vector isa extensions enabled, then there
    2040              :    are some modes for which vector_mode_supported_p is false.  For these
    2041              :    modes, the generic vector support in gcc will choose some non-vector mode
    2042              :    in order to implement the type.  By computing the natural mode, we'll
    2043              :    select the proper ABI location for the operand and not depend on whatever
    2044              :    the middle-end decides to do with these vector types.
    2045              : 
    2046              :    The midde-end can't deal with the vector types > 16 bytes.  In this
    2047              :    case, we return the original mode and warn ABI change if CUM isn't
    2048              :    NULL.
    2049              : 
    2050              :    If INT_RETURN is true, warn ABI change if the vector mode isn't
    2051              :    available for function return value.  */
    2052              : 
    2053              : static machine_mode
    2054    228315054 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
    2055              :                    bool in_return)
    2056              : {
    2057    228315054 :   machine_mode mode = TYPE_MODE (type);
    2058              : 
    2059    228315054 :   if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
    2060              :     {
    2061       467240 :       HOST_WIDE_INT size = int_size_in_bytes (type);
    2062       467240 :       if ((size == 8 || size == 16 || size == 32 || size == 64)
    2063              :           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
    2064       467240 :           && TYPE_VECTOR_SUBPARTS (type) > 1)
    2065              :         {
    2066       430668 :           machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
    2067              : 
    2068              :           /* There are no XFmode vector modes ...  */
    2069       430668 :           if (innermode == XFmode)
    2070              :             return mode;
    2071              : 
    2072              :           /* ... and no decimal float vector modes.  */
    2073       430115 :           if (DECIMAL_FLOAT_MODE_P (innermode))
    2074              :             return mode;
    2075              : 
    2076       429822 :           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
    2077              :             mode = MIN_MODE_VECTOR_FLOAT;
    2078              :           else
    2079       360002 :             mode = MIN_MODE_VECTOR_INT;
    2080              : 
    2081              :           /* Get the mode which has this inner mode and number of units.  */
    2082      9086586 :           FOR_EACH_MODE_FROM (mode, mode)
    2083     18908131 :             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
    2084     10251367 :                 && GET_MODE_INNER (mode) == innermode)
    2085              :               {
    2086       429822 :                 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
    2087              :                   {
    2088       293561 :                     static bool warnedavx512f;
    2089       293561 :                     static bool warnedavx512f_ret;
    2090              : 
    2091       293561 :                     if (cum && cum->warn_avx512f && !warnedavx512f)
    2092              :                       {
    2093         1361 :                         if (warning (OPT_Wpsabi, "AVX512F vector argument "
    2094              :                                      "without AVX512F enabled changes the ABI"))
    2095            2 :                           warnedavx512f = true;
    2096              :                       }
    2097       292200 :                     else if (in_return && !warnedavx512f_ret)
    2098              :                       {
    2099       283579 :                         if (warning (OPT_Wpsabi, "AVX512F vector return "
    2100              :                                      "without AVX512F enabled changes the ABI"))
    2101            4 :                           warnedavx512f_ret = true;
    2102              :                       }
    2103              : 
    2104       293561 :                     return TYPE_MODE (type);
    2105              :                   }
    2106       136261 :                 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
    2107              :                   {
    2108       135712 :                     static bool warnedavx;
    2109       135712 :                     static bool warnedavx_ret;
    2110              : 
    2111       135712 :                     if (cum && cum->warn_avx && !warnedavx)
    2112              :                       {
    2113          770 :                         if (warning (OPT_Wpsabi, "AVX vector argument "
    2114              :                                      "without AVX enabled changes the ABI"))
    2115            5 :                           warnedavx = true;
    2116              :                       }
    2117       134942 :                     else if (in_return && !warnedavx_ret)
    2118              :                       {
    2119       120871 :                         if (warning (OPT_Wpsabi, "AVX vector return "
    2120              :                                      "without AVX enabled changes the ABI"))
    2121           10 :                           warnedavx_ret = true;
    2122              :                       }
    2123              : 
    2124       135712 :                     return TYPE_MODE (type);
    2125              :                   }
    2126          549 :                 else if (((size == 8 && TARGET_64BIT) || size == 16)
    2127          546 :                          && !TARGET_SSE
    2128          140 :                          && !TARGET_IAMCU)
    2129              :                   {
    2130          140 :                     static bool warnedsse;
    2131          140 :                     static bool warnedsse_ret;
    2132              : 
    2133          140 :                     if (cum && cum->warn_sse && !warnedsse)
    2134              :                       {
    2135           19 :                         if (warning (OPT_Wpsabi, "SSE vector argument "
    2136              :                                      "without SSE enabled changes the ABI"))
    2137            6 :                           warnedsse = true;
    2138              :                       }
    2139          121 :                     else if (!TARGET_64BIT && in_return && !warnedsse_ret)
    2140              :                       {
    2141            0 :                         if (warning (OPT_Wpsabi, "SSE vector return "
    2142              :                                      "without SSE enabled changes the ABI"))
    2143            0 :                           warnedsse_ret = true;
    2144              :                       }
    2145              :                   }
    2146          409 :                 else if ((size == 8 && !TARGET_64BIT)
    2147            0 :                          && (!cfun
    2148            0 :                              || cfun->machine->func_type == TYPE_NORMAL)
    2149            0 :                          && !TARGET_MMX
    2150            0 :                          && !TARGET_IAMCU)
    2151              :                   {
    2152            0 :                     static bool warnedmmx;
    2153            0 :                     static bool warnedmmx_ret;
    2154              : 
    2155            0 :                     if (cum && cum->warn_mmx && !warnedmmx)
    2156              :                       {
    2157            0 :                         if (warning (OPT_Wpsabi, "MMX vector argument "
    2158              :                                      "without MMX enabled changes the ABI"))
    2159            0 :                           warnedmmx = true;
    2160              :                       }
    2161            0 :                     else if (in_return && !warnedmmx_ret)
    2162              :                       {
    2163            0 :                         if (warning (OPT_Wpsabi, "MMX vector return "
    2164              :                                      "without MMX enabled changes the ABI"))
    2165            0 :                           warnedmmx_ret = true;
    2166              :                       }
    2167              :                   }
    2168          549 :                 return mode;
    2169              :               }
    2170              : 
    2171            0 :           gcc_unreachable ();
    2172              :         }
    2173              :     }
    2174              : 
    2175              :   return mode;
    2176              : }
    2177              : 
    2178              : /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
    2179              :    this may not agree with the mode that the type system has chosen for the
    2180              :    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
    2181              :    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
    2182              : 
    2183              : static rtx
    2184     36447178 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
    2185              :                      unsigned int regno)
    2186              : {
    2187     36447178 :   rtx tmp;
    2188              : 
    2189     36447178 :   if (orig_mode != BLKmode)
    2190     36447150 :     tmp = gen_rtx_REG (orig_mode, regno);
    2191              :   else
    2192              :     {
    2193           28 :       tmp = gen_rtx_REG (mode, regno);
    2194           28 :       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
    2195           28 :       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
    2196              :     }
    2197              : 
    2198     36447178 :   return tmp;
    2199              : }
    2200              : 
    2201              : /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
    2202              :    of this code is to classify each 8bytes of incoming argument by the register
    2203              :    class and assign registers accordingly.  */
    2204              : 
    2205              : /* Return the union class of CLASS1 and CLASS2.
    2206              :    See the x86-64 PS ABI for details.  */
    2207              : 
    2208              : static enum x86_64_reg_class
    2209     54902543 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
    2210              : {
    2211              :   /* Rule #1: If both classes are equal, this is the resulting class.  */
    2212     53663363 :   if (class1 == class2)
    2213              :     return class1;
    2214              : 
    2215              :   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
    2216              :      the other class.  */
    2217     47516650 :   if (class1 == X86_64_NO_CLASS)
    2218              :     return class2;
    2219     48335504 :   if (class2 == X86_64_NO_CLASS)
    2220              :     return class1;
    2221              : 
    2222              :   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    2223      1658266 :   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    2224              :     return X86_64_MEMORY_CLASS;
    2225              : 
    2226              :   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    2227      1511892 :   if ((class1 == X86_64_INTEGERSI_CLASS
    2228       189962 :        && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
    2229      1510686 :       || (class2 == X86_64_INTEGERSI_CLASS
    2230       921197 :           && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
    2231              :     return X86_64_INTEGERSI_CLASS;
    2232      1506879 :   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    2233       382202 :       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    2234              :     return X86_64_INTEGER_CLASS;
    2235              : 
    2236              :   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    2237              :      MEMORY is used.  */
    2238        90802 :   if (class1 == X86_64_X87_CLASS
    2239              :       || class1 == X86_64_X87UP_CLASS
    2240        90802 :       || class1 == X86_64_COMPLEX_X87_CLASS
    2241              :       || class2 == X86_64_X87_CLASS
    2242        89897 :       || class2 == X86_64_X87UP_CLASS
    2243        59516 :       || class2 == X86_64_COMPLEX_X87_CLASS)
    2244        31286 :     return X86_64_MEMORY_CLASS;
    2245              : 
    2246              :   /* Rule #6: Otherwise class SSE is used.  */
    2247              :   return X86_64_SSE_CLASS;
    2248              : }
    2249              : 
    2250              : /* Classify the argument of type TYPE and mode MODE.
    2251              :    CLASSES will be filled by the register class used to pass each word
    2252              :    of the operand.  The number of words is returned.  In case the parameter
    2253              :    should be passed in memory, 0 is returned. As a special case for zero
    2254              :    sized containers, classes[0] will be NO_CLASS and 1 is returned.
    2255              : 
    2256              :    BIT_OFFSET is used internally for handling records and specifies offset
    2257              :    of the offset in bits modulo 512 to avoid overflow cases.
    2258              : 
    2259              :    See the x86-64 PS ABI for details.
    2260              : */
    2261              : 
    2262              : static int
    2263    389650162 : classify_argument (machine_mode mode, const_tree type,
    2264              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
    2265              :                    int &zero_width_bitfields)
    2266              : {
    2267    389650162 :   HOST_WIDE_INT bytes
    2268    773176374 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2269    389650162 :   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
    2270              : 
    2271              :   /* Variable sized entities are always passed/returned in memory.  */
    2272    389650162 :   if (bytes < 0)
    2273              :     return 0;
    2274              : 
    2275    389648963 :   if (mode != VOIDmode)
    2276              :     {
    2277              :       /* The value of "named" doesn't matter.  */
    2278    388572183 :       function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
    2279    388572183 :       if (targetm.calls.must_pass_in_stack (arg))
    2280           37 :         return 0;
    2281              :     }
    2282              : 
    2283    389648926 :   if (type && (AGGREGATE_TYPE_P (type)
    2284    353888910 :                || (BITINT_TYPE_P (type) && words > 1)))
    2285              :     {
    2286     36867487 :       int i;
    2287     36867487 :       tree field;
    2288     36867487 :       enum x86_64_reg_class subclasses[MAX_CLASSES];
    2289              : 
    2290              :       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
    2291     36867487 :       if (bytes > 64)
    2292              :         return 0;
    2293              : 
    2294     92595959 :       for (i = 0; i < words; i++)
    2295     56555162 :         classes[i] = X86_64_NO_CLASS;
    2296              : 
    2297              :       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
    2298              :          signalize memory class, so handle it as special case.  */
    2299     36040797 :       if (!words)
    2300              :         {
    2301        82698 :           classes[0] = X86_64_NO_CLASS;
    2302        82698 :           return 1;
    2303              :         }
    2304              : 
    2305              :       /* Classify each field of record and merge classes.  */
    2306     35958099 :       switch (TREE_CODE (type))
    2307              :         {
    2308     33933053 :         case RECORD_TYPE:
    2309              :           /* And now merge the fields of structure.  */
    2310    915525429 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2311              :             {
    2312    882105567 :               if (TREE_CODE (field) == FIELD_DECL)
    2313              :                 {
    2314     50099542 :                   int num;
    2315              : 
    2316     50099542 :                   if (TREE_TYPE (field) == error_mark_node)
    2317            4 :                     continue;
    2318              : 
    2319              :                   /* Bitfields are always classified as integer.  Handle them
    2320              :                      early, since later code would consider them to be
    2321              :                      misaligned integers.  */
    2322     50099538 :                   if (DECL_BIT_FIELD (field))
    2323              :                     {
    2324      1248399 :                       if (integer_zerop (DECL_SIZE (field)))
    2325              :                         {
    2326        12839 :                           if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
    2327         8021 :                             continue;
    2328         4818 :                           if (zero_width_bitfields != 2)
    2329              :                             {
    2330         4284 :                               zero_width_bitfields = 1;
    2331         4284 :                               continue;
    2332              :                             }
    2333              :                         }
    2334      1236094 :                       for (i = (int_bit_position (field)
    2335      1236094 :                                 + (bit_offset % 64)) / 8 / 8;
    2336      2475274 :                            i < ((int_bit_position (field) + (bit_offset % 64))
    2337      2475274 :                                 + tree_to_shwi (DECL_SIZE (field))
    2338      2475274 :                                 + 63) / 8 / 8; i++)
    2339      1239180 :                         classes[i]
    2340      2478360 :                           = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
    2341              :                     }
    2342              :                   else
    2343              :                     {
    2344     48851139 :                       int pos;
    2345              : 
    2346     48851139 :                       type = TREE_TYPE (field);
    2347              : 
    2348              :                       /* Flexible array member is ignored.  */
    2349     48851139 :                       if (TYPE_MODE (type) == BLKmode
    2350       646043 :                           && TREE_CODE (type) == ARRAY_TYPE
    2351       168102 :                           && TYPE_SIZE (type) == NULL_TREE
    2352         2007 :                           && TYPE_DOMAIN (type) != NULL_TREE
    2353     48852381 :                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
    2354              :                               == NULL_TREE))
    2355              :                         {
    2356         1242 :                           static bool warned;
    2357              : 
    2358         1242 :                           if (!warned && warn_psabi)
    2359              :                             {
    2360            3 :                               warned = true;
    2361            3 :                               inform (input_location,
    2362              :                                       "the ABI of passing struct with"
    2363              :                                       " a flexible array member has"
    2364              :                                       " changed in GCC 4.4");
    2365              :                             }
    2366         1242 :                           continue;
    2367         1242 :                         }
    2368     48849897 :                       num = classify_argument (TYPE_MODE (type), type,
    2369              :                                                subclasses,
    2370     48849897 :                                                (int_bit_position (field)
    2371     48849897 :                                                 + bit_offset) % 512,
    2372              :                                                zero_width_bitfields);
    2373     48849897 :                       if (!num)
    2374              :                         return 0;
    2375     48336706 :                       pos = (int_bit_position (field)
    2376     48336706 :                              + (bit_offset % 64)) / 8 / 8;
    2377    100043375 :                       for (i = 0; i < num && (i + pos) < words; i++)
    2378     51706669 :                         classes[i + pos]
    2379     51706669 :                           = merge_classes (subclasses[i], classes[i + pos]);
    2380              :                     }
    2381              :                 }
    2382              :             }
    2383              :           break;
    2384              : 
    2385       444666 :         case ARRAY_TYPE:
    2386              :           /* Arrays are handled as small records.  */
    2387       444666 :           {
    2388       444666 :             int num;
    2389       444666 :             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
    2390       444666 :                                      TREE_TYPE (type), subclasses, bit_offset,
    2391              :                                      zero_width_bitfields);
    2392       444666 :             if (!num)
    2393              :               return 0;
    2394              : 
    2395              :             /* The partial classes are now full classes.  */
    2396       429184 :             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
    2397        13868 :               subclasses[0] = X86_64_SSE_CLASS;
    2398       429184 :             if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
    2399         5126 :               subclasses[0] = X86_64_SSE_CLASS;
    2400       429184 :             if (subclasses[0] == X86_64_INTEGERSI_CLASS
    2401       161650 :                 && !((bit_offset % 64) == 0 && bytes == 4))
    2402       129999 :               subclasses[0] = X86_64_INTEGER_CLASS;
    2403              : 
    2404      1325866 :             for (i = 0; i < words; i++)
    2405       896682 :               classes[i] = subclasses[i % num];
    2406              : 
    2407              :             break;
    2408              :           }
    2409       272546 :         case UNION_TYPE:
    2410       272546 :         case QUAL_UNION_TYPE:
    2411              :           /* Unions are similar to RECORD_TYPE but offset is always 0.
    2412              :              */
    2413      3033677 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2414              :             {
    2415      2796396 :               if (TREE_CODE (field) == FIELD_DECL)
    2416              :                 {
    2417      1231357 :                   int num;
    2418              : 
    2419      1231357 :                   if (TREE_TYPE (field) == error_mark_node)
    2420           10 :                     continue;
    2421              : 
    2422      1231347 :                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
    2423      1231347 :                                            TREE_TYPE (field), subclasses,
    2424              :                                            bit_offset, zero_width_bitfields);
    2425      1231347 :                   if (!num)
    2426              :                     return 0;
    2427      3152776 :                   for (i = 0; i < num && i < words; i++)
    2428      1956694 :                     classes[i] = merge_classes (subclasses[i], classes[i]);
    2429              :                 }
    2430              :             }
    2431              :           break;
    2432              : 
    2433      1307834 :         case BITINT_TYPE:
    2434      1307834 :         case ENUMERAL_TYPE:
    2435              :           /* _BitInt(N) for N > 64 is passed as structure containing
    2436              :              (N + 63) / 64 64-bit elements.  */
    2437      1307834 :           if (words > 2)
    2438              :             return 0;
    2439        75441 :           classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2440        75441 :           return 2;
    2441              : 
    2442            0 :         default:
    2443            0 :           gcc_unreachable ();
    2444              :         }
    2445              : 
    2446     34086327 :       if (words > 2)
    2447              :         {
    2448              :           /* When size > 16 bytes, if the first one isn't
    2449              :              X86_64_SSE_CLASS or any other ones aren't
    2450              :              X86_64_SSEUP_CLASS, everything should be passed in
    2451              :              memory.  */
    2452      1654562 :           if (classes[0] != X86_64_SSE_CLASS)
    2453              :             return 0;
    2454              : 
    2455       197316 :           for (i = 1; i < words; i++)
    2456       179129 :             if (classes[i] != X86_64_SSEUP_CLASS)
    2457              :               return 0;
    2458              :         }
    2459              : 
    2460              :       /* Final merger cleanup.  */
    2461     76198321 :       for (i = 0; i < words; i++)
    2462              :         {
    2463              :           /* If one class is MEMORY, everything should be passed in
    2464              :              memory.  */
    2465     43781312 :           if (classes[i] == X86_64_MEMORY_CLASS)
    2466              :             return 0;
    2467              : 
    2468              :           /* The X86_64_SSEUP_CLASS should be always preceded by
    2469              :              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
    2470     43750739 :           if (classes[i] == X86_64_SSEUP_CLASS
    2471       207011 :               && classes[i - 1] != X86_64_SSE_CLASS
    2472        76546 :               && classes[i - 1] != X86_64_SSEUP_CLASS)
    2473              :             {
    2474              :               /* The first one should never be X86_64_SSEUP_CLASS.  */
    2475         1916 :               gcc_assert (i != 0);
    2476         1916 :               classes[i] = X86_64_SSE_CLASS;
    2477              :             }
    2478              : 
    2479              :           /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
    2480              :              everything should be passed in memory.  */
    2481     43750739 :           if (classes[i] == X86_64_X87UP_CLASS
    2482       179862 :               && (classes[i - 1] != X86_64_X87_CLASS))
    2483              :             {
    2484         2370 :               static bool warned;
    2485              : 
    2486              :               /* The first one should never be X86_64_X87UP_CLASS.  */
    2487         2370 :               gcc_assert (i != 0);
    2488         2370 :               if (!warned && warn_psabi)
    2489              :                 {
    2490            1 :                   warned = true;
    2491            1 :                   inform (input_location,
    2492              :                           "the ABI of passing union with %<long double%>"
    2493              :                           " has changed in GCC 4.4");
    2494              :                 }
    2495         2370 :               return 0;
    2496              :             }
    2497              :         }
    2498              :       return words;
    2499              :     }
    2500              : 
    2501              :   /* Compute alignment needed.  We align all types to natural boundaries with
    2502              :      exception of XFmode that is aligned to 64bits.  */
    2503    352781439 :   if (mode != VOIDmode && mode != BLKmode)
    2504              :     {
    2505    351240668 :       int mode_alignment = GET_MODE_BITSIZE (mode);
    2506              : 
    2507    351240668 :       if (mode == XFmode)
    2508              :         mode_alignment = 128;
    2509    344335404 :       else if (mode == XCmode)
    2510       552059 :         mode_alignment = 256;
    2511    351240668 :       if (COMPLEX_MODE_P (mode))
    2512      2312936 :         mode_alignment /= 2;
    2513              :       /* Misaligned fields are always returned in memory.  */
    2514    351240668 :       if (bit_offset % mode_alignment)
    2515              :         return 0;
    2516              :     }
    2517              : 
    2518              :   /* for V1xx modes, just use the base mode */
    2519    352773806 :   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
    2520    446216763 :       && GET_MODE_UNIT_SIZE (mode) == bytes)
    2521         6315 :     mode = GET_MODE_INNER (mode);
    2522              : 
    2523              :   /* Classification of atomic types.  */
    2524    352773806 :   switch (mode)
    2525              :     {
    2526       206928 :     case E_SDmode:
    2527       206928 :     case E_DDmode:
    2528       206928 :       classes[0] = X86_64_SSE_CLASS;
    2529       206928 :       return 1;
    2530        98708 :     case E_TDmode:
    2531        98708 :       classes[0] = X86_64_SSE_CLASS;
    2532        98708 :       classes[1] = X86_64_SSEUP_CLASS;
    2533        98708 :       return 2;
    2534    229905394 :     case E_DImode:
    2535    229905394 :     case E_SImode:
    2536    229905394 :     case E_HImode:
    2537    229905394 :     case E_QImode:
    2538    229905394 :     case E_CSImode:
    2539    229905394 :     case E_CHImode:
    2540    229905394 :     case E_CQImode:
    2541    229905394 :       {
    2542    229905394 :         int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
    2543              : 
    2544              :         /* Analyze last 128 bits only.  */
    2545    229905394 :         size = (size - 1) & 0x7f;
    2546              : 
    2547    229905394 :         if (size < 32)
    2548              :           {
    2549    102093159 :             classes[0] = X86_64_INTEGERSI_CLASS;
    2550    102093159 :             return 1;
    2551              :           }
    2552    127812235 :         else if (size < 64)
    2553              :           {
    2554    117238598 :             classes[0] = X86_64_INTEGER_CLASS;
    2555    117238598 :             return 1;
    2556              :           }
    2557     10573637 :         else if (size < 64+32)
    2558              :           {
    2559      3866618 :             classes[0] = X86_64_INTEGER_CLASS;
    2560      3866618 :             classes[1] = X86_64_INTEGERSI_CLASS;
    2561      3866618 :             return 2;
    2562              :           }
    2563      6707019 :         else if (size < 64+64)
    2564              :           {
    2565      6707019 :             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2566      6707019 :             return 2;
    2567              :           }
    2568              :         else
    2569              :           gcc_unreachable ();
    2570              :       }
    2571      2440624 :     case E_CDImode:
    2572      2440624 :     case E_TImode:
    2573      2440624 :       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2574      2440624 :       return 2;
    2575            0 :     case E_COImode:
    2576            0 :     case E_OImode:
    2577              :       /* OImode shouldn't be used directly.  */
    2578            0 :       gcc_unreachable ();
    2579              :     case E_CTImode:
    2580              :       return 0;
    2581       828876 :     case E_HFmode:
    2582       828876 :     case E_BFmode:
    2583       828876 :       if (!(bit_offset % 64))
    2584       826326 :         classes[0] = X86_64_SSEHF_CLASS;
    2585              :       else
    2586         2550 :         classes[0] = X86_64_SSE_CLASS;
    2587              :       return 1;
    2588      9741887 :     case E_SFmode:
    2589      9741887 :       if (!(bit_offset % 64))
    2590      9688708 :         classes[0] = X86_64_SSESF_CLASS;
    2591              :       else
    2592        53179 :         classes[0] = X86_64_SSE_CLASS;
    2593              :       return 1;
    2594      4246619 :     case E_DFmode:
    2595      4246619 :       classes[0] = X86_64_SSEDF_CLASS;
    2596      4246619 :       return 1;
    2597      6904548 :     case E_XFmode:
    2598      6904548 :       classes[0] = X86_64_X87_CLASS;
    2599      6904548 :       classes[1] = X86_64_X87UP_CLASS;
    2600      6904548 :       return 2;
    2601      1289867 :     case E_TFmode:
    2602      1289867 :       classes[0] = X86_64_SSE_CLASS;
    2603      1289867 :       classes[1] = X86_64_SSEUP_CLASS;
    2604      1289867 :       return 2;
    2605        76798 :     case E_HCmode:
    2606        76798 :     case E_BCmode:
    2607        76798 :       classes[0] = X86_64_SSE_CLASS;
    2608        76798 :       if (!(bit_offset % 64))
    2609              :         return 1;
    2610              :       else
    2611              :         {
    2612           98 :           classes[1] = X86_64_SSEHF_CLASS;
    2613           98 :           return 2;
    2614              :         }
    2615       693999 :     case E_SCmode:
    2616       693999 :       classes[0] = X86_64_SSE_CLASS;
    2617       693999 :       if (!(bit_offset % 64))
    2618              :         return 1;
    2619              :       else
    2620              :         {
    2621         1119 :           static bool warned;
    2622              : 
    2623         1119 :           if (!warned && warn_psabi)
    2624              :             {
    2625            2 :               warned = true;
    2626            2 :               inform (input_location,
    2627              :                       "the ABI of passing structure with %<complex float%>"
    2628              :                       " member has changed in GCC 4.4");
    2629              :             }
    2630         1119 :           classes[1] = X86_64_SSESF_CLASS;
    2631         1119 :           return 2;
    2632              :         }
    2633       704001 :     case E_DCmode:
    2634       704001 :       classes[0] = X86_64_SSEDF_CLASS;
    2635       704001 :       classes[1] = X86_64_SSEDF_CLASS;
    2636       704001 :       return 2;
    2637       552059 :     case E_XCmode:
    2638       552059 :       classes[0] = X86_64_COMPLEX_X87_CLASS;
    2639       552059 :       return 1;
    2640              :     case E_TCmode:
    2641              :       /* This modes is larger than 16 bytes.  */
    2642              :       return 0;
    2643     25334908 :     case E_V8SFmode:
    2644     25334908 :     case E_V8SImode:
    2645     25334908 :     case E_V32QImode:
    2646     25334908 :     case E_V16HFmode:
    2647     25334908 :     case E_V16BFmode:
    2648     25334908 :     case E_V16HImode:
    2649     25334908 :     case E_V4DFmode:
    2650     25334908 :     case E_V4DImode:
    2651     25334908 :       classes[0] = X86_64_SSE_CLASS;
    2652     25334908 :       classes[1] = X86_64_SSEUP_CLASS;
    2653     25334908 :       classes[2] = X86_64_SSEUP_CLASS;
    2654     25334908 :       classes[3] = X86_64_SSEUP_CLASS;
    2655     25334908 :       return 4;
    2656     27469792 :     case E_V8DFmode:
    2657     27469792 :     case E_V16SFmode:
    2658     27469792 :     case E_V32HFmode:
    2659     27469792 :     case E_V32BFmode:
    2660     27469792 :     case E_V8DImode:
    2661     27469792 :     case E_V16SImode:
    2662     27469792 :     case E_V32HImode:
    2663     27469792 :     case E_V64QImode:
    2664     27469792 :       classes[0] = X86_64_SSE_CLASS;
    2665     27469792 :       classes[1] = X86_64_SSEUP_CLASS;
    2666     27469792 :       classes[2] = X86_64_SSEUP_CLASS;
    2667     27469792 :       classes[3] = X86_64_SSEUP_CLASS;
    2668     27469792 :       classes[4] = X86_64_SSEUP_CLASS;
    2669     27469792 :       classes[5] = X86_64_SSEUP_CLASS;
    2670     27469792 :       classes[6] = X86_64_SSEUP_CLASS;
    2671     27469792 :       classes[7] = X86_64_SSEUP_CLASS;
    2672     27469792 :       return 8;
    2673     37342797 :     case E_V4SFmode:
    2674     37342797 :     case E_V4SImode:
    2675     37342797 :     case E_V16QImode:
    2676     37342797 :     case E_V8HImode:
    2677     37342797 :     case E_V8HFmode:
    2678     37342797 :     case E_V8BFmode:
    2679     37342797 :     case E_V2DFmode:
    2680     37342797 :     case E_V2DImode:
    2681     37342797 :       classes[0] = X86_64_SSE_CLASS;
    2682     37342797 :       classes[1] = X86_64_SSEUP_CLASS;
    2683     37342797 :       return 2;
    2684      3263874 :     case E_V1TImode:
    2685      3263874 :     case E_V1DImode:
    2686      3263874 :     case E_V2SFmode:
    2687      3263874 :     case E_V2SImode:
    2688      3263874 :     case E_V4HImode:
    2689      3263874 :     case E_V4HFmode:
    2690      3263874 :     case E_V4BFmode:
    2691      3263874 :     case E_V2HFmode:
    2692      3263874 :     case E_V2BFmode:
    2693      3263874 :     case E_V8QImode:
    2694      3263874 :       classes[0] = X86_64_SSE_CLASS;
    2695      3263874 :       return 1;
    2696              :     case E_BLKmode:
    2697              :     case E_VOIDmode:
    2698              :       return 0;
    2699        45148 :     default:
    2700        45148 :       gcc_assert (VECTOR_MODE_P (mode));
    2701              : 
    2702        45148 :       if (bytes > 16)
    2703              :         return 0;
    2704              : 
    2705        60568 :       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
    2706              : 
    2707        60568 :       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
    2708        29850 :         classes[0] = X86_64_INTEGERSI_CLASS;
    2709              :       else
    2710          434 :         classes[0] = X86_64_INTEGER_CLASS;
    2711        30284 :       classes[1] = X86_64_INTEGER_CLASS;
    2712        30284 :       return 1 + (bytes > 8);
    2713              :     }
    2714              : }
    2715              : 
    2716              : /* Wrapper around classify_argument with the extra zero_width_bitfields
    2717              :    argument, to diagnose GCC 12.1 ABI differences for C.  */
    2718              : 
    2719              : static int
    2720    339123718 : classify_argument (machine_mode mode, const_tree type,
    2721              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
    2722              : {
    2723    339123718 :   int zero_width_bitfields = 0;
    2724    339123718 :   static bool warned = false;
    2725    339123718 :   int n = classify_argument (mode, type, classes, bit_offset,
    2726              :                              zero_width_bitfields);
    2727    339123718 :   if (!zero_width_bitfields || warned || !warn_psabi)
    2728              :     return n;
    2729          534 :   enum x86_64_reg_class alt_classes[MAX_CLASSES];
    2730          534 :   zero_width_bitfields = 2;
    2731          534 :   if (classify_argument (mode, type, alt_classes, bit_offset,
    2732              :                          zero_width_bitfields) != n)
    2733            0 :     zero_width_bitfields = 3;
    2734              :   else
    2735         1286 :     for (int i = 0; i < n; i++)
    2736          760 :       if (classes[i] != alt_classes[i])
    2737              :         {
    2738            8 :           zero_width_bitfields = 3;
    2739            8 :           break;
    2740              :         }
    2741          534 :   if (zero_width_bitfields == 3)
    2742              :     {
    2743            8 :       warned = true;
    2744            8 :       const char *url
    2745              :         = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
    2746              : 
    2747            8 :       inform (input_location,
    2748              :               "the ABI of passing C structures with zero-width bit-fields"
    2749              :               " has changed in GCC %{12.1%}", url);
    2750              :     }
    2751              :   return n;
    2752              : }
    2753              : 
    2754              : /* Examine the argument and return set number of register required in each
    2755              :    class.  Return true iff parameter should be passed in memory.  */
    2756              : 
    2757              : static bool
    2758    229459165 : examine_argument (machine_mode mode, const_tree type, bool in_return,
    2759              :                   int *int_nregs, int *sse_nregs)
    2760              : {
    2761    229459165 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2762    229459165 :   int n = classify_argument (mode, type, regclass, 0);
    2763              : 
    2764    229459165 :   *int_nregs = 0;
    2765    229459165 :   *sse_nregs = 0;
    2766              : 
    2767    229459165 :   if (!n)
    2768              :     return true;
    2769    664735932 :   for (n--; n >= 0; n--)
    2770    440756439 :     switch (regclass[n])
    2771              :       {
    2772    152366192 :       case X86_64_INTEGER_CLASS:
    2773    152366192 :       case X86_64_INTEGERSI_CLASS:
    2774    152366192 :         (*int_nregs)++;
    2775    152366192 :         break;
    2776     74405590 :       case X86_64_SSE_CLASS:
    2777     74405590 :       case X86_64_SSEHF_CLASS:
    2778     74405590 :       case X86_64_SSESF_CLASS:
    2779     74405590 :       case X86_64_SSEDF_CLASS:
    2780     74405590 :         (*sse_nregs)++;
    2781     74405590 :         break;
    2782              :       case X86_64_NO_CLASS:
    2783              :       case X86_64_SSEUP_CLASS:
    2784              :         break;
    2785      9430636 :       case X86_64_X87_CLASS:
    2786      9430636 :       case X86_64_X87UP_CLASS:
    2787      9430636 :       case X86_64_COMPLEX_X87_CLASS:
    2788      9430636 :         if (!in_return)
    2789              :           return true;
    2790              :         break;
    2791            0 :       case X86_64_MEMORY_CLASS:
    2792            0 :         gcc_unreachable ();
    2793              :       }
    2794              : 
    2795              :   return false;
    2796              : }
    2797              : 
    2798              : /* Construct container for the argument used by GCC interface.  See
    2799              :    FUNCTION_ARG for the detailed description.  */
    2800              : 
    2801              : static rtx
    2802    111281221 : construct_container (machine_mode mode, machine_mode orig_mode,
    2803              :                      const_tree type, bool in_return, int nintregs,
    2804              :                      int nsseregs, const int *intreg, int sse_regno)
    2805              : {
    2806              :   /* The following variables hold the static issued_error state.  */
    2807    111281221 :   static bool issued_sse_arg_error;
    2808    111281221 :   static bool issued_sse_ret_error;
    2809    111281221 :   static bool issued_x87_ret_error;
    2810              : 
    2811    111281221 :   machine_mode tmpmode;
    2812    111281221 :   int bytes
    2813    221885169 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2814    111281221 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2815    111281221 :   int n;
    2816    111281221 :   int i;
    2817    111281221 :   int nexps = 0;
    2818    111281221 :   int needed_sseregs, needed_intregs;
    2819    111281221 :   rtx exp[MAX_CLASSES];
    2820    111281221 :   rtx ret;
    2821              : 
    2822    111281221 :   if (examine_argument (mode, type, in_return, &needed_intregs,
    2823              :                         &needed_sseregs))
    2824              :     return NULL;
    2825              : 
    2826    110769303 :   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
    2827              :     return NULL;
    2828              : 
    2829              :   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
    2830              :      some less clueful developer tries to use floating-point anyway.  */
    2831    109664624 :   if (needed_sseregs
    2832     36767966 :       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
    2833              :     {
    2834              :       /* Return early if we shouldn't raise an error for invalid
    2835              :          calls.  */
    2836           71 :       if (cfun != NULL && cfun->machine->silent_p)
    2837              :         return NULL;
    2838           39 :       if (in_return)
    2839              :         {
    2840           34 :           if (!issued_sse_ret_error)
    2841              :             {
    2842           16 :               if (VALID_SSE2_TYPE_MODE (mode))
    2843            5 :                 error ("SSE register return with SSE2 disabled");
    2844              :               else
    2845           11 :                 error ("SSE register return with SSE disabled");
    2846           16 :               issued_sse_ret_error = true;
    2847              :             }
    2848              :         }
    2849            5 :       else if (!issued_sse_arg_error)
    2850              :         {
    2851            5 :           if (VALID_SSE2_TYPE_MODE (mode))
    2852            0 :             error ("SSE register argument with SSE2 disabled");
    2853              :           else
    2854            5 :             error ("SSE register argument with SSE disabled");
    2855            5 :           issued_sse_arg_error = true;
    2856              :         }
    2857           39 :       return NULL;
    2858              :     }
    2859              : 
    2860    109664553 :   n = classify_argument (mode, type, regclass, 0);
    2861    109664553 :   gcc_assert (n);
    2862              : 
    2863              :   /* Likewise, error if the ABI requires us to return values in the
    2864              :      x87 registers and the user specified -mno-80387.  */
    2865    109664553 :   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
    2866      1424666 :     for (i = 0; i < n; i++)
    2867       751944 :       if (regclass[i] == X86_64_X87_CLASS
    2868              :           || regclass[i] == X86_64_X87UP_CLASS
    2869       751944 :           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
    2870              :         {
    2871              :           /* Return early if we shouldn't raise an error for invalid
    2872              :              calls.  */
    2873           16 :           if (cfun != NULL && cfun->machine->silent_p)
    2874              :             return NULL;
    2875           13 :           if (!issued_x87_ret_error)
    2876              :             {
    2877            8 :               error ("x87 register return with x87 disabled");
    2878            8 :               issued_x87_ret_error = true;
    2879              :             }
    2880           13 :           return NULL;
    2881              :         }
    2882              : 
    2883              :   /* First construct simple cases.  Avoid SCmode, since we want to use
    2884              :      single register to pass this type.  */
    2885    109664537 :   if (n == 1 && mode != SCmode && mode != HCmode)
    2886     72833863 :     switch (regclass[0])
    2887              :       {
    2888     66790454 :       case X86_64_INTEGER_CLASS:
    2889     66790454 :       case X86_64_INTEGERSI_CLASS:
    2890     66790454 :         return gen_rtx_REG (mode, intreg[0]);
    2891      5843099 :       case X86_64_SSE_CLASS:
    2892      5843099 :       case X86_64_SSEHF_CLASS:
    2893      5843099 :       case X86_64_SSESF_CLASS:
    2894      5843099 :       case X86_64_SSEDF_CLASS:
    2895      5843099 :         if (mode != BLKmode)
    2896     11685390 :           return gen_reg_or_parallel (mode, orig_mode,
    2897     11685390 :                                       GET_SSE_REGNO (sse_regno));
    2898              :         break;
    2899       172007 :       case X86_64_X87_CLASS:
    2900       172007 :       case X86_64_COMPLEX_X87_CLASS:
    2901       172007 :         return gen_rtx_REG (mode, FIRST_STACK_REG);
    2902              :       case X86_64_NO_CLASS:
    2903              :         /* Zero sized array, struct or class.  */
    2904              :         return NULL;
    2905            0 :       default:
    2906            0 :         gcc_unreachable ();
    2907              :       }
    2908     36831078 :   if (n == 2
    2909     19036943 :       && regclass[0] == X86_64_SSE_CLASS
    2910     12881589 :       && regclass[1] == X86_64_SSEUP_CLASS
    2911     12876434 :       && mode != BLKmode)
    2912     25752868 :     return gen_reg_or_parallel (mode, orig_mode,
    2913     25752868 :                                 GET_SSE_REGNO (sse_regno));
    2914     23954644 :   if (n == 4
    2915      8428958 :       && regclass[0] == X86_64_SSE_CLASS
    2916      8428958 :       && regclass[1] == X86_64_SSEUP_CLASS
    2917      8428958 :       && regclass[2] == X86_64_SSEUP_CLASS
    2918      8428958 :       && regclass[3] == X86_64_SSEUP_CLASS
    2919      8428958 :       && mode != BLKmode)
    2920     16854538 :     return gen_reg_or_parallel (mode, orig_mode,
    2921     16854538 :                                 GET_SSE_REGNO (sse_regno));
    2922     15527375 :   if (n == 8
    2923      9126513 :       && regclass[0] == X86_64_SSE_CLASS
    2924      9126513 :       && regclass[1] == X86_64_SSEUP_CLASS
    2925      9126513 :       && regclass[2] == X86_64_SSEUP_CLASS
    2926      9126513 :       && regclass[3] == X86_64_SSEUP_CLASS
    2927      9126513 :       && regclass[4] == X86_64_SSEUP_CLASS
    2928      9126513 :       && regclass[5] == X86_64_SSEUP_CLASS
    2929      9126513 :       && regclass[6] == X86_64_SSEUP_CLASS
    2930      9126513 :       && regclass[7] == X86_64_SSEUP_CLASS
    2931      9126513 :       && mode != BLKmode)
    2932     18248754 :     return gen_reg_or_parallel (mode, orig_mode,
    2933     18248754 :                                 GET_SSE_REGNO (sse_regno));
    2934      6402998 :   if (n == 2
    2935      6160509 :       && regclass[0] == X86_64_X87_CLASS
    2936      2252451 :       && regclass[1] == X86_64_X87UP_CLASS)
    2937      2252451 :     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
    2938              : 
    2939      4150547 :   if (n == 2
    2940      3908058 :       && regclass[0] == X86_64_INTEGER_CLASS
    2941      3483885 :       && regclass[1] == X86_64_INTEGER_CLASS
    2942      3475587 :       && (mode == CDImode || mode == TImode || mode == BLKmode)
    2943      3475587 :       && intreg[0] + 1 == intreg[1])
    2944              :     {
    2945      3158305 :       if (mode == BLKmode)
    2946              :         {
    2947              :           /* Use TImode for BLKmode values in 2 integer registers.  */
    2948       505084 :           exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
    2949       252542 :                                       gen_rtx_REG (TImode, intreg[0]),
    2950              :                                       GEN_INT (0));
    2951       252542 :           ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
    2952       252542 :           XVECEXP (ret, 0, 0) = exp[0];
    2953       252542 :           return ret;
    2954              :         }
    2955              :       else
    2956      2905763 :         return gen_rtx_REG (mode, intreg[0]);
    2957              :     }
    2958              : 
    2959              :   /* Otherwise figure out the entries of the PARALLEL.  */
    2960      2734237 :   for (i = 0; i < n; i++)
    2961              :     {
    2962      1741995 :       int pos;
    2963              : 
    2964      1741995 :       switch (regclass[i])
    2965              :         {
    2966              :           case X86_64_NO_CLASS:
    2967              :             break;
    2968       993636 :           case X86_64_INTEGER_CLASS:
    2969       993636 :           case X86_64_INTEGERSI_CLASS:
    2970              :             /* Merge TImodes on aligned occasions here too.  */
    2971       993636 :             if (i * 8 + 8 > bytes)
    2972              :               {
    2973         3233 :                 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
    2974         3233 :                 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
    2975              :                   /* We've requested 24 bytes we
    2976              :                      don't have mode for.  Use DImode.  */
    2977          357 :                   tmpmode = DImode;
    2978              :               }
    2979       990403 :             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
    2980              :               tmpmode = SImode;
    2981              :             else
    2982       816460 :               tmpmode = DImode;
    2983      1987272 :             exp [nexps++]
    2984       993636 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2985       993636 :                                    gen_rtx_REG (tmpmode, *intreg),
    2986       993636 :                                    GEN_INT (i*8));
    2987       993636 :             intreg++;
    2988       993636 :             break;
    2989          592 :           case X86_64_SSEHF_CLASS:
    2990          592 :             tmpmode = (mode == BFmode ? BFmode : HFmode);
    2991         1184 :             exp [nexps++]
    2992         1184 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2993              :                                    gen_rtx_REG (tmpmode,
    2994          592 :                                                 GET_SSE_REGNO (sse_regno)),
    2995          592 :                                    GEN_INT (i*8));
    2996          592 :             sse_regno++;
    2997          592 :             break;
    2998         3007 :           case X86_64_SSESF_CLASS:
    2999         6014 :             exp [nexps++]
    3000         6014 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3001              :                                    gen_rtx_REG (SFmode,
    3002         3007 :                                                 GET_SSE_REGNO (sse_regno)),
    3003         3007 :                                    GEN_INT (i*8));
    3004         3007 :             sse_regno++;
    3005         3007 :             break;
    3006       484151 :           case X86_64_SSEDF_CLASS:
    3007       968302 :             exp [nexps++]
    3008       968302 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3009              :                                    gen_rtx_REG (DFmode,
    3010       484151 :                                                 GET_SSE_REGNO (sse_regno)),
    3011       484151 :                                    GEN_INT (i*8));
    3012       484151 :             sse_regno++;
    3013       484151 :             break;
    3014       252403 :           case X86_64_SSE_CLASS:
    3015       252403 :             pos = i;
    3016       252403 :             switch (n)
    3017              :               {
    3018              :               case 1:
    3019              :                 tmpmode = DImode;
    3020              :                 break;
    3021        10128 :               case 2:
    3022        10128 :                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
    3023              :                   {
    3024            0 :                     tmpmode = TImode;
    3025            0 :                     i++;
    3026              :                   }
    3027              :                 else
    3028              :                   tmpmode = DImode;
    3029              :                 break;
    3030         1689 :               case 4:
    3031         1689 :                 gcc_assert (i == 0
    3032              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3033              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3034              :                             && regclass[3] == X86_64_SSEUP_CLASS);
    3035              :                 tmpmode = OImode;
    3036              :                 i += 3;
    3037              :                 break;
    3038         2136 :               case 8:
    3039         2136 :                 gcc_assert (i == 0
    3040              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3041              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3042              :                             && regclass[3] == X86_64_SSEUP_CLASS
    3043              :                             && regclass[4] == X86_64_SSEUP_CLASS
    3044              :                             && regclass[5] == X86_64_SSEUP_CLASS
    3045              :                             && regclass[6] == X86_64_SSEUP_CLASS
    3046              :                             && regclass[7] == X86_64_SSEUP_CLASS);
    3047              :                 tmpmode = XImode;
    3048              :                 i += 7;
    3049              :                 break;
    3050            0 :               default:
    3051            0 :                 gcc_unreachable ();
    3052              :               }
    3053       504806 :             exp [nexps++]
    3054       504806 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3055              :                                    gen_rtx_REG (tmpmode,
    3056       252403 :                                                 GET_SSE_REGNO (sse_regno)),
    3057       252403 :                                    GEN_INT (pos*8));
    3058       252403 :             sse_regno++;
    3059       252403 :             break;
    3060            0 :           default:
    3061            0 :             gcc_unreachable ();
    3062              :         }
    3063              :     }
    3064              : 
    3065              :   /* Empty aligned struct, union or class.  */
    3066       992242 :   if (nexps == 0)
    3067              :     return NULL;
    3068              : 
    3069       991987 :   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
    3070      2725776 :   for (i = 0; i < nexps; i++)
    3071      1733789 :     XVECEXP (ret, 0, i) = exp [i];
    3072              :   return ret;
    3073              : }
    3074              : 
    3075              : /* Update the data in CUM to advance over an argument of mode MODE
    3076              :    and data type TYPE.  (TYPE is null for libcalls where that information
    3077              :    may not be available.)
    3078              : 
    3079              :    Return a number of integer regsiters advanced over.  */
    3080              : 
    3081              : static int
    3082      2129938 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3083              :                          const_tree type, HOST_WIDE_INT bytes,
    3084              :                          HOST_WIDE_INT words)
    3085              : {
    3086      2129938 :   int res = 0;
    3087      2129938 :   bool error_p = false;
    3088              : 
    3089      2129938 :   if (TARGET_IAMCU)
    3090              :     {
    3091              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3092              :          bytes in registers.  */
    3093            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3094            0 :         goto pass_in_reg;
    3095              :       return res;
    3096              :     }
    3097              : 
    3098      2129938 :   switch (mode)
    3099              :     {
    3100              :     default:
    3101              :       break;
    3102              : 
    3103        93818 :     case E_BLKmode:
    3104        93818 :       if (bytes < 0)
    3105              :         break;
    3106              :       /* FALLTHRU */
    3107              : 
    3108      2093082 :     case E_DImode:
    3109      2093082 :     case E_SImode:
    3110      2093082 :     case E_HImode:
    3111      2093082 :     case E_QImode:
    3112        93818 : pass_in_reg:
    3113      2093082 :       cum->words += words;
    3114      2093082 :       cum->nregs -= words;
    3115      2093082 :       cum->regno += words;
    3116      2093082 :       if (cum->nregs >= 0)
    3117        47363 :         res = words;
    3118      2093082 :       if (cum->nregs <= 0)
    3119              :         {
    3120      2058951 :           cum->nregs = 0;
    3121      2058951 :           cfun->machine->arg_reg_available = false;
    3122      2058951 :           cum->regno = 0;
    3123              :         }
    3124              :       break;
    3125              : 
    3126            0 :     case E_OImode:
    3127              :       /* OImode shouldn't be used directly.  */
    3128            0 :       gcc_unreachable ();
    3129              : 
    3130         4744 :     case E_DFmode:
    3131         4744 :       if (cum->float_in_sse == -1)
    3132            0 :         error_p = true;
    3133         4744 :       if (cum->float_in_sse < 2)
    3134              :         break;
    3135              :       /* FALLTHRU */
    3136         1360 :     case E_SFmode:
    3137         1360 :       if (cum->float_in_sse == -1)
    3138            0 :         error_p = true;
    3139         1360 :       if (cum->float_in_sse < 1)
    3140              :         break;
    3141              :       /* FALLTHRU */
    3142              : 
    3143           52 :     case E_V16HFmode:
    3144           52 :     case E_V16BFmode:
    3145           52 :     case E_V8SFmode:
    3146           52 :     case E_V8SImode:
    3147           52 :     case E_V64QImode:
    3148           52 :     case E_V32HImode:
    3149           52 :     case E_V16SImode:
    3150           52 :     case E_V8DImode:
    3151           52 :     case E_V32HFmode:
    3152           52 :     case E_V32BFmode:
    3153           52 :     case E_V16SFmode:
    3154           52 :     case E_V8DFmode:
    3155           52 :     case E_V32QImode:
    3156           52 :     case E_V16HImode:
    3157           52 :     case E_V4DFmode:
    3158           52 :     case E_V4DImode:
    3159           52 :     case E_TImode:
    3160           52 :     case E_V16QImode:
    3161           52 :     case E_V8HImode:
    3162           52 :     case E_V4SImode:
    3163           52 :     case E_V2DImode:
    3164           52 :     case E_V8HFmode:
    3165           52 :     case E_V8BFmode:
    3166           52 :     case E_V4SFmode:
    3167           52 :     case E_V2DFmode:
    3168           52 :       if (!type || !AGGREGATE_TYPE_P (type))
    3169              :         {
    3170           52 :           cum->sse_words += words;
    3171           52 :           cum->sse_nregs -= 1;
    3172           52 :           cum->sse_regno += 1;
    3173           52 :           if (cum->sse_nregs <= 0)
    3174              :             {
    3175            4 :               cum->sse_nregs = 0;
    3176            4 :               cum->sse_regno = 0;
    3177              :             }
    3178              :         }
    3179              :       break;
    3180              : 
    3181           16 :     case E_V8QImode:
    3182           16 :     case E_V4HImode:
    3183           16 :     case E_V4HFmode:
    3184           16 :     case E_V4BFmode:
    3185           16 :     case E_V2SImode:
    3186           16 :     case E_V2SFmode:
    3187           16 :     case E_V1TImode:
    3188           16 :     case E_V1DImode:
    3189           16 :       if (!type || !AGGREGATE_TYPE_P (type))
    3190              :         {
    3191           16 :           cum->mmx_words += words;
    3192           16 :           cum->mmx_nregs -= 1;
    3193           16 :           cum->mmx_regno += 1;
    3194           16 :           if (cum->mmx_nregs <= 0)
    3195              :             {
    3196            0 :               cum->mmx_nregs = 0;
    3197            0 :               cum->mmx_regno = 0;
    3198              :             }
    3199              :         }
    3200              :       break;
    3201              :     }
    3202      2065107 :   if (error_p)
    3203              :     {
    3204            0 :       cum->float_in_sse = 0;
    3205            0 :       error ("calling %qD with SSE calling convention without "
    3206              :              "SSE/SSE2 enabled", cum->decl);
    3207            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3208              :              "attribute used to function called");
    3209              :     }
    3210              : 
    3211              :   return res;
    3212              : }
    3213              : 
    3214              : static int
    3215     19030667 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3216              :                          const_tree type, HOST_WIDE_INT words, bool named)
    3217              : {
    3218     19030667 :   int int_nregs, sse_nregs;
    3219              : 
    3220              :   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
    3221     19030667 :   if (!named && (VALID_AVX512F_REG_MODE (mode)
    3222              :                  || VALID_AVX256_REG_MODE (mode)))
    3223              :     return 0;
    3224              : 
    3225     19030303 :   if (!examine_argument (mode, type, false, &int_nregs, &sse_nregs)
    3226     19030303 :       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
    3227              :     {
    3228     16752900 :       cum->nregs -= int_nregs;
    3229     16752900 :       cum->sse_nregs -= sse_nregs;
    3230     16752900 :       cum->regno += int_nregs;
    3231     16752900 :       cum->sse_regno += sse_nregs;
    3232     16752900 :       return int_nregs;
    3233              :     }
    3234              :   else
    3235              :     {
    3236      2277403 :       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
    3237      2277403 :       cum->words = ROUND_UP (cum->words, align);
    3238      2277403 :       cum->words += words;
    3239      2277403 :       return 0;
    3240              :     }
    3241              : }
    3242              : 
    3243              : static int
    3244       447161 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
    3245              :                             HOST_WIDE_INT words)
    3246              : {
    3247              :   /* Otherwise, this should be passed indirect.  */
    3248       447161 :   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
    3249              : 
    3250       447161 :   cum->words += words;
    3251       447161 :   if (cum->nregs > 0)
    3252              :     {
    3253       289519 :       cum->nregs -= 1;
    3254       289519 :       cum->regno += 1;
    3255       289519 :       return 1;
    3256              :     }
    3257              :   return 0;
    3258              : }
    3259              : 
    3260              : /* Update the data in CUM to advance over argument ARG.  */
    3261              : 
    3262              : static void
    3263     21608133 : ix86_function_arg_advance (cumulative_args_t cum_v,
    3264              :                            const function_arg_info &arg)
    3265              : {
    3266     21608133 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3267     21608133 :   machine_mode mode = arg.mode;
    3268     21608133 :   HOST_WIDE_INT bytes, words;
    3269     21608133 :   int nregs;
    3270              : 
    3271              :   /* The argument of interrupt handler is a special case and is
    3272              :      handled in ix86_function_arg.  */
    3273     21608133 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3274              :     return;
    3275              : 
    3276     21607766 :   bytes = arg.promoted_size_in_bytes ();
    3277     21607766 :   words = CEIL (bytes, UNITS_PER_WORD);
    3278              : 
    3279     21607766 :   if (arg.type)
    3280     21294350 :     mode = type_natural_mode (arg.type, NULL, false);
    3281              : 
    3282     21607766 :   if (TARGET_64BIT)
    3283              :     {
    3284     19477828 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3285              : 
    3286     19477828 :       if (call_abi == MS_ABI)
    3287       447161 :         nregs = function_arg_advance_ms_64 (cum, bytes, words);
    3288              :       else
    3289     19030667 :         nregs = function_arg_advance_64 (cum, mode, arg.type, words,
    3290     19030667 :                                          arg.named);
    3291              :     }
    3292              :   else
    3293      2129938 :     nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
    3294              : 
    3295     21607766 :   if (!nregs)
    3296              :     {
    3297              :       /* Track if there are outgoing arguments on stack.  */
    3298      5707969 :       if (cum->caller)
    3299      2718086 :         cfun->machine->outgoing_args_on_stack = true;
    3300              :     }
    3301              : }
    3302              : 
    3303              : /* Define where to put the arguments to a function.
    3304              :    Value is zero to push the argument on the stack,
    3305              :    or a hard register in which to store the argument.
    3306              : 
    3307              :    MODE is the argument's machine mode.
    3308              :    TYPE is the data type of the argument (as a tree).
    3309              :     This is null for libcalls where that information may
    3310              :     not be available.
    3311              :    CUM is a variable of type CUMULATIVE_ARGS which gives info about
    3312              :     the preceding args and about the function being called.
    3313              :    NAMED is nonzero if this argument is a named parameter
    3314              :     (otherwise it is an extra parameter matching an ellipsis).  */
    3315              : 
    3316              : static rtx
    3317      2556734 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3318              :                  machine_mode orig_mode, const_tree type,
    3319              :                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
    3320              : {
    3321      2556734 :   bool error_p = false;
    3322              : 
    3323              :   /* Avoid the AL settings for the Unix64 ABI.  */
    3324      2556734 :   if (mode == VOIDmode)
    3325       741977 :     return constm1_rtx;
    3326              : 
    3327      1814757 :   if (TARGET_IAMCU)
    3328              :     {
    3329              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3330              :          bytes in registers.  */
    3331            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3332            0 :         goto pass_in_reg;
    3333              :       return NULL_RTX;
    3334              :     }
    3335              : 
    3336      1814757 :   switch (mode)
    3337              :     {
    3338              :     default:
    3339              :       break;
    3340              : 
    3341        77786 :     case E_BLKmode:
    3342        77786 :       if (bytes < 0)
    3343              :         break;
    3344              :       /* FALLTHRU */
    3345      1781186 :     case E_DImode:
    3346      1781186 :     case E_SImode:
    3347      1781186 :     case E_HImode:
    3348      1781186 :     case E_QImode:
    3349        77786 : pass_in_reg:
    3350      1781186 :       if (words <= cum->nregs)
    3351              :         {
    3352        45531 :           int regno = cum->regno;
    3353              : 
    3354              :           /* Fastcall allocates the first two DWORD (SImode) or
    3355              :             smaller arguments to ECX and EDX if it isn't an
    3356              :             aggregate type .  */
    3357        45531 :           if (cum->fastcall)
    3358              :             {
    3359            6 :               if (mode == BLKmode
    3360            6 :                   || mode == DImode
    3361            6 :                   || (type && AGGREGATE_TYPE_P (type)))
    3362              :                 break;
    3363              : 
    3364              :               /* ECX not EAX is the first allocated register.  */
    3365            6 :               if (regno == AX_REG)
    3366        45531 :                 regno = CX_REG;
    3367              :             }
    3368        45531 :           return gen_rtx_REG (mode, regno);
    3369              :         }
    3370              :       break;
    3371              : 
    3372         3354 :     case E_DFmode:
    3373         3354 :       if (cum->float_in_sse == -1)
    3374            0 :         error_p = true;
    3375         3354 :       if (cum->float_in_sse < 2)
    3376              :         break;
    3377              :       /* FALLTHRU */
    3378          960 :     case E_SFmode:
    3379          960 :       if (cum->float_in_sse == -1)
    3380            0 :         error_p = true;
    3381          960 :       if (cum->float_in_sse < 1)
    3382              :         break;
    3383              :       /* FALLTHRU */
    3384           12 :     case E_TImode:
    3385              :       /* In 32bit, we pass TImode in xmm registers.  */
    3386           12 :     case E_V16QImode:
    3387           12 :     case E_V8HImode:
    3388           12 :     case E_V4SImode:
    3389           12 :     case E_V2DImode:
    3390           12 :     case E_V8HFmode:
    3391           12 :     case E_V8BFmode:
    3392           12 :     case E_V4SFmode:
    3393           12 :     case E_V2DFmode:
    3394           12 :       if (!type || !AGGREGATE_TYPE_P (type))
    3395              :         {
    3396           12 :           if (cum->sse_nregs)
    3397           12 :             return gen_reg_or_parallel (mode, orig_mode,
    3398           12 :                                         cum->sse_regno + FIRST_SSE_REG);
    3399              :         }
    3400              :       break;
    3401              : 
    3402            0 :     case E_OImode:
    3403            0 :     case E_XImode:
    3404              :       /* OImode and XImode shouldn't be used directly.  */
    3405            0 :       gcc_unreachable ();
    3406              : 
    3407            9 :     case E_V64QImode:
    3408            9 :     case E_V32HImode:
    3409            9 :     case E_V16SImode:
    3410            9 :     case E_V8DImode:
    3411            9 :     case E_V32HFmode:
    3412            9 :     case E_V32BFmode:
    3413            9 :     case E_V16SFmode:
    3414            9 :     case E_V8DFmode:
    3415            9 :     case E_V16HFmode:
    3416            9 :     case E_V16BFmode:
    3417            9 :     case E_V8SFmode:
    3418            9 :     case E_V8SImode:
    3419            9 :     case E_V32QImode:
    3420            9 :     case E_V16HImode:
    3421            9 :     case E_V4DFmode:
    3422            9 :     case E_V4DImode:
    3423            9 :       if (!type || !AGGREGATE_TYPE_P (type))
    3424              :         {
    3425            9 :           if (cum->sse_nregs)
    3426            9 :             return gen_reg_or_parallel (mode, orig_mode,
    3427            9 :                                         cum->sse_regno + FIRST_SSE_REG);
    3428              :         }
    3429              :       break;
    3430              : 
    3431            8 :     case E_V8QImode:
    3432            8 :     case E_V4HImode:
    3433            8 :     case E_V4HFmode:
    3434            8 :     case E_V4BFmode:
    3435            8 :     case E_V2SImode:
    3436            8 :     case E_V2SFmode:
    3437            8 :     case E_V1TImode:
    3438            8 :     case E_V1DImode:
    3439            8 :       if (!type || !AGGREGATE_TYPE_P (type))
    3440              :         {
    3441            8 :           if (cum->mmx_nregs)
    3442            8 :             return gen_reg_or_parallel (mode, orig_mode,
    3443            8 :                                         cum->mmx_regno + FIRST_MMX_REG);
    3444              :         }
    3445              :       break;
    3446              :     }
    3447         4314 :   if (error_p)
    3448              :     {
    3449            0 :       cum->float_in_sse = 0;
    3450            0 :       error ("calling %qD with SSE calling convention without "
    3451              :              "SSE/SSE2 enabled", cum->decl);
    3452            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3453              :              "attribute used to function called");
    3454              :     }
    3455              : 
    3456              :   return NULL_RTX;
    3457              : }
    3458              : 
    3459              : static rtx
    3460     18720218 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3461              :                  machine_mode orig_mode, const_tree type, bool named)
    3462              : {
    3463              :   /* Handle a hidden AL argument containing number of registers
    3464              :      for varargs x86-64 functions.  */
    3465     18720218 :   if (mode == VOIDmode)
    3466      5202168 :     return GEN_INT (cum->maybe_vaarg
    3467              :                     ? (cum->sse_nregs < 0
    3468              :                        ? X86_64_SSE_REGPARM_MAX
    3469              :                        : cum->sse_regno)
    3470              :                     : -1);
    3471              : 
    3472     13518050 :   switch (mode)
    3473              :     {
    3474              :     default:
    3475              :       break;
    3476              : 
    3477        90175 :     case E_V16HFmode:
    3478        90175 :     case E_V16BFmode:
    3479        90175 :     case E_V8SFmode:
    3480        90175 :     case E_V8SImode:
    3481        90175 :     case E_V32QImode:
    3482        90175 :     case E_V16HImode:
    3483        90175 :     case E_V4DFmode:
    3484        90175 :     case E_V4DImode:
    3485        90175 :     case E_V32HFmode:
    3486        90175 :     case E_V32BFmode:
    3487        90175 :     case E_V16SFmode:
    3488        90175 :     case E_V16SImode:
    3489        90175 :     case E_V64QImode:
    3490        90175 :     case E_V32HImode:
    3491        90175 :     case E_V8DFmode:
    3492        90175 :     case E_V8DImode:
    3493              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    3494        90175 :       if (!named)
    3495              :         return NULL;
    3496              :       break;
    3497              :     }
    3498              : 
    3499     13517686 :   const int *parm_regs;
    3500     13517686 :   if (cum->preserve_none_abi)
    3501              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    3502              :   else
    3503     13517557 :     parm_regs = x86_64_int_parameter_registers;
    3504              : 
    3505     13517686 :   return construct_container (mode, orig_mode, type, false,
    3506     13517686 :                               cum->nregs, cum->sse_nregs,
    3507     13517686 :                               &parm_regs[cum->regno],
    3508     13517686 :                               cum->sse_regno);
    3509              : }
    3510              : 
    3511              : static rtx
    3512       296428 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3513              :                     machine_mode orig_mode, bool named, const_tree type,
    3514              :                     HOST_WIDE_INT bytes)
    3515              : {
    3516       296428 :   unsigned int regno;
    3517              : 
    3518              :   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
    3519              :      We use value of -2 to specify that current function call is MSABI.  */
    3520       296428 :   if (mode == VOIDmode)
    3521        36295 :     return GEN_INT (-2);
    3522              : 
    3523              :   /* If we've run out of registers, it goes on the stack.  */
    3524       260133 :   if (cum->nregs == 0)
    3525              :     return NULL_RTX;
    3526              : 
    3527       176374 :   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
    3528              : 
    3529              :   /* Only floating point modes less than 64 bits are passed in anything but
    3530              :      integer regs.  Larger floating point types are excluded as the Windows
    3531              :      ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
    3532       176374 :   if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
    3533              :     {
    3534        38260 :       if (named)
    3535              :         {
    3536        38260 :           if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
    3537        37263 :             regno = cum->regno + FIRST_SSE_REG;
    3538              :         }
    3539              :       else
    3540              :         {
    3541            0 :           rtx t1, t2;
    3542              : 
    3543              :           /* Unnamed floating parameters are passed in both the
    3544              :              SSE and integer registers.  */
    3545            0 :           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
    3546            0 :           t2 = gen_rtx_REG (mode, regno);
    3547            0 :           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
    3548            0 :           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
    3549            0 :           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
    3550              :         }
    3551              :     }
    3552              :   /* Handle aggregated types passed in register.  */
    3553       176374 :   if (orig_mode == BLKmode)
    3554              :     {
    3555            0 :       if (bytes > 0 && bytes <= 8)
    3556            0 :         mode = (bytes > 4 ? DImode : SImode);
    3557            0 :       if (mode == BLKmode)
    3558            0 :         mode = DImode;
    3559              :     }
    3560              : 
    3561       176374 :   return gen_reg_or_parallel (mode, orig_mode, regno);
    3562              : }
    3563              : 
    3564              : /* Return where to put the arguments to a function.
    3565              :    Return zero to push the argument on the stack, or a hard register in which to store the argument.
    3566              : 
    3567              :    ARG describes the argument while CUM gives information about the
    3568              :    preceding args and about the function being called.  */
    3569              : 
    3570              : static rtx
    3571     21573567 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
    3572              : {
    3573     21573567 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3574     21573567 :   machine_mode mode = arg.mode;
    3575     21573567 :   HOST_WIDE_INT bytes, words;
    3576     21573567 :   rtx reg;
    3577              : 
    3578     21573567 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3579              :     {
    3580          187 :       gcc_assert (arg.type != NULL_TREE);
    3581          187 :       if (POINTER_TYPE_P (arg.type))
    3582              :         {
    3583              :           /* This is the pointer argument.  */
    3584          122 :           gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
    3585              :           /* It is at -WORD(AP) in the current frame in interrupt and
    3586              :              exception handlers.  */
    3587          122 :           reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
    3588              :         }
    3589              :       else
    3590              :         {
    3591           65 :           gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
    3592              :                       && TREE_CODE (arg.type) == INTEGER_TYPE
    3593              :                       && TYPE_MODE (arg.type) == word_mode);
    3594              :           /* The error code is the word-mode integer argument at
    3595              :              -2 * WORD(AP) in the current frame of the exception
    3596              :              handler.  */
    3597           65 :           reg = gen_rtx_MEM (word_mode,
    3598           65 :                              plus_constant (Pmode,
    3599              :                                             arg_pointer_rtx,
    3600           65 :                                             -2 * UNITS_PER_WORD));
    3601              :         }
    3602          187 :       return reg;
    3603              :     }
    3604              : 
    3605     21573380 :   bytes = arg.promoted_size_in_bytes ();
    3606     21573380 :   words = CEIL (bytes, UNITS_PER_WORD);
    3607              : 
    3608              :   /* To simplify the code below, represent vector types with a vector mode
    3609              :      even if MMX/SSE are not active.  */
    3610     21573380 :   if (arg.type && VECTOR_TYPE_P (arg.type))
    3611       171255 :     mode = type_natural_mode (arg.type, cum, false);
    3612              : 
    3613     21573380 :   if (TARGET_64BIT)
    3614              :     {
    3615     19016646 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3616              : 
    3617     19016646 :       if (call_abi == MS_ABI)
    3618       296428 :         reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
    3619       296428 :                                   arg.type, bytes);
    3620              :       else
    3621     18720218 :         reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
    3622              :     }
    3623              :   else
    3624      2556734 :     reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
    3625              : 
    3626              :   /* Track if there are outgoing arguments on stack.  */
    3627     21573380 :   if (reg == NULL_RTX && cum->caller)
    3628      2200033 :     cfun->machine->outgoing_args_on_stack = true;
    3629              : 
    3630              :   return reg;
    3631              : }
    3632              : 
    3633              : /* A C expression that indicates when an argument must be passed by
    3634              :    reference.  If nonzero for an argument, a copy of that argument is
    3635              :    made in memory and a pointer to the argument is passed instead of
    3636              :    the argument itself.  The pointer is passed in whatever way is
    3637              :    appropriate for passing a pointer to that type.  */
    3638              : 
    3639              : static bool
    3640     21528806 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
    3641              : {
    3642     21528806 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3643              : 
    3644     21528806 :   if (TARGET_64BIT)
    3645              :     {
    3646     19409126 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3647              : 
    3648              :       /* See Windows x64 Software Convention.  */
    3649     19409126 :       if (call_abi == MS_ABI)
    3650              :         {
    3651       441562 :           HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
    3652              : 
    3653       441562 :           if (tree type = arg.type)
    3654              :             {
    3655              :               /* Arrays are passed by reference.  */
    3656       441562 :               if (TREE_CODE (type) == ARRAY_TYPE)
    3657              :                 return true;
    3658              : 
    3659       441562 :               if (RECORD_OR_UNION_TYPE_P (type))
    3660              :                 {
    3661              :                   /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
    3662              :                      are passed by reference.  */
    3663        15103 :                   msize = int_size_in_bytes (type);
    3664              :                 }
    3665              :             }
    3666              : 
    3667              :           /* __m128 is passed by reference.  */
    3668       873131 :           return msize != 1 && msize != 2 && msize != 4 && msize != 8;
    3669              :         }
    3670     18967564 :       else if (arg.type && int_size_in_bytes (arg.type) == -1)
    3671              :         return true;
    3672              :     }
    3673              : 
    3674              :   return false;
    3675              : }
    3676              : 
    3677              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3678              :    passing ABI.  XXX: This function is obsolete and is only used for
    3679              :    checking psABI compatibility with previous versions of GCC.  */
    3680              : 
    3681              : static bool
    3682      1975065 : ix86_compat_aligned_value_p (const_tree type)
    3683              : {
    3684      1975065 :   machine_mode mode = TYPE_MODE (type);
    3685      1975065 :   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
    3686      1975023 :        || mode == TDmode
    3687      1975023 :        || mode == TFmode
    3688              :        || mode == TCmode)
    3689      1975277 :       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
    3690              :     return true;
    3691      1974853 :   if (TYPE_ALIGN (type) < 128)
    3692              :     return false;
    3693              : 
    3694            0 :   if (AGGREGATE_TYPE_P (type))
    3695              :     {
    3696              :       /* Walk the aggregates recursively.  */
    3697            0 :       switch (TREE_CODE (type))
    3698              :         {
    3699            0 :         case RECORD_TYPE:
    3700            0 :         case UNION_TYPE:
    3701            0 :         case QUAL_UNION_TYPE:
    3702            0 :           {
    3703            0 :             tree field;
    3704              : 
    3705              :             /* Walk all the structure fields.  */
    3706            0 :             for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    3707              :               {
    3708            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3709            0 :                     && ix86_compat_aligned_value_p (TREE_TYPE (field)))
    3710              :                   return true;
    3711              :               }
    3712              :             break;
    3713              :           }
    3714              : 
    3715            0 :         case ARRAY_TYPE:
    3716              :           /* Just for use if some languages passes arrays by value.  */
    3717            0 :           if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
    3718              :             return true;
    3719              :           break;
    3720              : 
    3721              :         default:
    3722              :           gcc_unreachable ();
    3723              :         }
    3724              :     }
    3725              :   return false;
    3726              : }
    3727              : 
    3728              : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
    3729              :    XXX: This function is obsolete and is only used for checking psABI
    3730              :    compatibility with previous versions of GCC.  */
    3731              : 
    3732              : static unsigned int
    3733      5556494 : ix86_compat_function_arg_boundary (machine_mode mode,
    3734              :                                    const_tree type, unsigned int align)
    3735              : {
    3736              :   /* In 32bit, only _Decimal128 and __float128 are aligned to their
    3737              :      natural boundaries.  */
    3738      5556494 :   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
    3739              :     {
    3740              :       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
    3741              :          make an exception for SSE modes since these require 128bit
    3742              :          alignment.
    3743              : 
    3744              :          The handling here differs from field_alignment.  ICC aligns MMX
    3745              :          arguments to 4 byte boundaries, while structure fields are aligned
    3746              :          to 8 byte boundaries.  */
    3747      1987001 :       if (!type)
    3748              :         {
    3749        11936 :           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
    3750      1986789 :             align = PARM_BOUNDARY;
    3751              :         }
    3752              :       else
    3753              :         {
    3754      1975065 :           if (!ix86_compat_aligned_value_p (type))
    3755      1986789 :             align = PARM_BOUNDARY;
    3756              :         }
    3757              :     }
    3758     10713045 :   if (align > BIGGEST_ALIGNMENT)
    3759           90 :     align = BIGGEST_ALIGNMENT;
    3760      5556494 :   return align;
    3761              : }
    3762              : 
    3763              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3764              :    passing ABI.  */
    3765              : 
    3766              : static bool
    3767      1977748 : ix86_contains_aligned_value_p (const_tree type)
    3768              : {
    3769      1977748 :   machine_mode mode = TYPE_MODE (type);
    3770              : 
    3771      1977748 :   if (mode == XFmode || mode == XCmode)
    3772              :     return false;
    3773              : 
    3774      1975606 :   if (TYPE_ALIGN (type) < 128)
    3775              :     return false;
    3776              : 
    3777         2895 :   if (AGGREGATE_TYPE_P (type))
    3778              :     {
    3779              :       /* Walk the aggregates recursively.  */
    3780            0 :       switch (TREE_CODE (type))
    3781              :         {
    3782            0 :         case RECORD_TYPE:
    3783            0 :         case UNION_TYPE:
    3784            0 :         case QUAL_UNION_TYPE:
    3785            0 :           {
    3786            0 :             tree field;
    3787              : 
    3788              :             /* Walk all the structure fields.  */
    3789            0 :             for (field = TYPE_FIELDS (type);
    3790            0 :                  field;
    3791            0 :                  field = DECL_CHAIN (field))
    3792              :               {
    3793            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3794            0 :                     && ix86_contains_aligned_value_p (TREE_TYPE (field)))
    3795              :                   return true;
    3796              :               }
    3797              :             break;
    3798              :           }
    3799              : 
    3800            0 :         case ARRAY_TYPE:
    3801              :           /* Just for use if some languages passes arrays by value.  */
    3802            0 :           if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
    3803              :             return true;
    3804              :           break;
    3805              : 
    3806              :         default:
    3807              :           gcc_unreachable ();
    3808              :         }
    3809              :     }
    3810              :   else
    3811         2895 :     return TYPE_ALIGN (type) >= 128;
    3812              : 
    3813              :   return false;
    3814              : }
    3815              : 
    3816              : /* Gives the alignment boundary, in bits, of an argument with the
    3817              :    specified mode and type.  */
    3818              : 
    3819              : static unsigned int
    3820     10954913 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
    3821              : {
    3822     10954913 :   unsigned int align;
    3823     10954913 :   if (type)
    3824              :     {
    3825              :       /* Since the main variant type is used for call, we convert it to
    3826              :          the main variant type.  */
    3827     10915080 :       type = TYPE_MAIN_VARIANT (type);
    3828     10915080 :       align = TYPE_ALIGN (type);
    3829     10915080 :       if (TYPE_EMPTY_P (type))
    3830        24468 :         return PARM_BOUNDARY;
    3831              :     }
    3832              :   else
    3833        39833 :     align = GET_MODE_ALIGNMENT (mode);
    3834     12955382 :   if (align < PARM_BOUNDARY)
    3835      4111288 :     align = PARM_BOUNDARY;
    3836              :   else
    3837              :     {
    3838      6819157 :       static bool warned;
    3839      6819157 :       unsigned int saved_align = align;
    3840              : 
    3841      6819157 :       if (!TARGET_64BIT)
    3842              :         {
    3843              :           /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
    3844      2013574 :           if (!type)
    3845              :             {
    3846        35826 :               if (mode == XFmode || mode == XCmode)
    3847              :                 align = PARM_BOUNDARY;
    3848              :             }
    3849      1977748 :           else if (!ix86_contains_aligned_value_p (type))
    3850              :             align = PARM_BOUNDARY;
    3851              : 
    3852        38721 :           if (align < 128)
    3853      1986789 :             align = PARM_BOUNDARY;
    3854              :         }
    3855              : 
    3856      6819157 :       if (warn_psabi
    3857      5561330 :           && !warned
    3858     12375651 :           && align != ix86_compat_function_arg_boundary (mode, type,
    3859              :                                                          saved_align))
    3860              :         {
    3861           90 :           warned = true;
    3862           90 :           inform (input_location,
    3863              :                   "the ABI for passing parameters with %d-byte"
    3864              :                   " alignment has changed in GCC 4.6",
    3865              :                   align / BITS_PER_UNIT);
    3866              :         }
    3867              :     }
    3868              : 
    3869              :   return align;
    3870              : }
    3871              : 
    3872              : /* Return true if N is a possible register number of function value.  */
    3873              : 
    3874              : static bool
    3875      4693315 : ix86_function_value_regno_p (const unsigned int regno)
    3876              : {
    3877      4693315 :   switch (regno)
    3878              :     {
    3879              :     case AX_REG:
    3880              :       return true;
    3881       103151 :     case DX_REG:
    3882       103151 :       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
    3883              : 
    3884              :       /* Complex values are returned in %st(0)/%st(1) pair.  */
    3885        25261 :     case ST0_REG:
    3886        25261 :     case ST1_REG:
    3887              :       /* TODO: The function should depend on current function ABI but
    3888              :        builtins.cc would need updating then. Therefore we use the
    3889              :        default ABI.  */
    3890        25261 :       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
    3891              :         return false;
    3892        25261 :       return TARGET_FLOAT_RETURNS_IN_80387;
    3893              : 
    3894              :       /* Complex values are returned in %xmm0/%xmm1 pair.  */
    3895      1291014 :     case XMM0_REG:
    3896      1291014 :     case XMM1_REG:
    3897      1291014 :       return TARGET_SSE;
    3898              : 
    3899        10078 :     case MM0_REG:
    3900        10078 :       if (TARGET_MACHO || TARGET_64BIT)
    3901              :         return false;
    3902         2492 :       return TARGET_MMX;
    3903              :     }
    3904              : 
    3905              :   return false;
    3906              : }
    3907              : 
    3908              : /* Check whether the register REGNO should be zeroed on X86.
    3909              :    When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
    3910              :    together, no need to zero it again.
    3911              :    When NEED_ZERO_MMX is true, MMX registers should be cleared.  */
    3912              : 
    3913              : static bool
    3914         1377 : zero_call_used_regno_p (const unsigned int regno,
    3915              :                         bool all_sse_zeroed,
    3916              :                         bool need_zero_mmx)
    3917              : {
    3918          835 :   return GENERAL_REGNO_P (regno)
    3919          819 :          || (!all_sse_zeroed && SSE_REGNO_P (regno))
    3920          439 :          || MASK_REGNO_P (regno)
    3921         1800 :          || (need_zero_mmx && MMX_REGNO_P (regno));
    3922              : }
    3923              : 
    3924              : /* Return the machine_mode that is used to zero register REGNO.  */
    3925              : 
    3926              : static machine_mode
    3927          954 : zero_call_used_regno_mode (const unsigned int regno)
    3928              : {
    3929              :   /* NB: We only need to zero the lower 32 bits for integer registers
    3930              :      and the lower 128 bits for vector registers since destination are
    3931              :      zero-extended to the full register width.  */
    3932          954 :   if (GENERAL_REGNO_P (regno))
    3933              :     return SImode;
    3934              :   else if (SSE_REGNO_P (regno))
    3935          380 :     return V4SFmode;
    3936              :   else if (MASK_REGNO_P (regno))
    3937              :     return HImode;
    3938              :   else if (MMX_REGNO_P (regno))
    3939            0 :     return V2SImode;
    3940              :   else
    3941            0 :     gcc_unreachable ();
    3942              : }
    3943              : 
    3944              : /* Generate a rtx to zero all vector registers together if possible,
    3945              :    otherwise, return NULL.  */
    3946              : 
    3947              : static rtx
    3948          131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
    3949              : {
    3950          131 :   if (!TARGET_AVX)
    3951              :     return NULL;
    3952              : 
    3953          372 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3954          368 :     if ((LEGACY_SSE_REGNO_P (regno)
    3955          336 :          || (TARGET_64BIT
    3956          336 :              && (REX_SSE_REGNO_P (regno)
    3957          304 :                  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
    3958          432 :         && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3959              :       return NULL;
    3960              : 
    3961            4 :   return gen_avx_vzeroall ();
    3962              : }
    3963              : 
    3964              : /* Generate insns to zero all st registers together.
    3965              :    Return true when zeroing instructions are generated.
    3966              :    Assume the number of st registers that are zeroed is num_of_st,
    3967              :    we will emit the following sequence to zero them together:
    3968              :                   fldz;         \
    3969              :                   fldz;         \
    3970              :                   ...
    3971              :                   fldz;         \
    3972              :                   fstp %%st(0); \
    3973              :                   fstp %%st(0); \
    3974              :                   ...
    3975              :                   fstp %%st(0);
    3976              :    i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
    3977              :    mark stack slots empty.
    3978              : 
    3979              :    How to compute the num_of_st:
    3980              :    There is no direct mapping from stack registers to hard register
    3981              :    numbers.  If one stack register needs to be cleared, we don't know
    3982              :    where in the stack the value remains.  So, if any stack register
    3983              :    needs to be cleared, the whole stack should be cleared.  However,
    3984              :    x87 stack registers that hold the return value should be excluded.
    3985              :    x87 returns in the top (two for complex values) register, so
    3986              :    num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
    3987              :    return the value of num_of_st.  */
    3988              : 
    3989              : 
    3990              : static int
    3991          131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
    3992              : {
    3993              : 
    3994              :   /* If the FPU is disabled, no need to zero all st registers.  */
    3995          131 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
    3996              :     return 0;
    3997              : 
    3998        10329 :   unsigned int num_of_st = 0;
    3999        10329 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4000        10220 :     if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
    4001        10220 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4002              :       {
    4003              :         num_of_st++;
    4004              :         break;
    4005              :       }
    4006              : 
    4007          130 :   if (num_of_st == 0)
    4008              :     return 0;
    4009              : 
    4010           21 :   bool return_with_x87 = false;
    4011           42 :   return_with_x87 = (crtl->return_rtx
    4012           21 :                      && (STACK_REG_P (crtl->return_rtx)));
    4013              : 
    4014           21 :   bool complex_return = false;
    4015           42 :   complex_return = (crtl->return_rtx
    4016           21 :                     && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
    4017              : 
    4018           21 :   if (return_with_x87)
    4019            2 :     if (complex_return)
    4020              :       num_of_st = 6;
    4021              :     else
    4022            1 :       num_of_st = 7;
    4023              :   else
    4024              :     num_of_st = 8;
    4025              : 
    4026           21 :   rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
    4027          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4028          165 :     emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
    4029              : 
    4030          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4031              :     {
    4032          165 :       rtx insn;
    4033          165 :       insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
    4034          165 :       add_reg_note (insn, REG_DEAD, st_reg);
    4035              :     }
    4036           21 :   return num_of_st;
    4037              : }
    4038              : 
    4039              : 
    4040              : /* When the routine exit in MMX mode, if any ST register needs
    4041              :    to be zeroed, we should clear all MMX registers except the
    4042              :    RET_MMX_REGNO that holds the return value.  */
    4043              : static bool
    4044            0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
    4045              :                        unsigned int ret_mmx_regno)
    4046              : {
    4047            0 :   bool need_zero_all_mm = false;
    4048            0 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4049            0 :     if (STACK_REGNO_P (regno)
    4050            0 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4051              :       {
    4052              :         need_zero_all_mm = true;
    4053              :         break;
    4054              :       }
    4055              : 
    4056            0 :   if (!need_zero_all_mm)
    4057              :     return false;
    4058              : 
    4059              :   machine_mode mode = V2SImode;
    4060            0 :   for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4061            0 :     if (regno != ret_mmx_regno)
    4062              :       {
    4063            0 :         rtx reg = gen_rtx_REG (mode, regno);
    4064            0 :         emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
    4065              :       }
    4066              :   return true;
    4067              : }
    4068              : 
    4069              : /* TARGET_ZERO_CALL_USED_REGS.  */
    4070              : /* Generate a sequence of instructions that zero registers specified by
    4071              :    NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
    4072              :    zeroed.  */
    4073              : static HARD_REG_SET
    4074          131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
    4075              : {
    4076          131 :   HARD_REG_SET zeroed_hardregs;
    4077          131 :   bool all_sse_zeroed = false;
    4078          131 :   int all_st_zeroed_num = 0;
    4079          131 :   bool all_mm_zeroed = false;
    4080              : 
    4081          131 :   CLEAR_HARD_REG_SET (zeroed_hardregs);
    4082              : 
    4083              :   /* first, let's see whether we can zero all vector registers together.  */
    4084          131 :   rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
    4085          131 :   if (zero_all_vec_insn)
    4086              :     {
    4087            4 :       emit_insn (zero_all_vec_insn);
    4088            4 :       all_sse_zeroed = true;
    4089            4 :       if (TARGET_64BIT && TARGET_AVX512F)
    4090              :         {
    4091            2 :           rtx zero = CONST0_RTX (V4SFmode);
    4092           34 :           for (unsigned int regno = XMM16_REG;
    4093           34 :                regno <= XMM31_REG;
    4094              :                regno++)
    4095              :             {
    4096           32 :               rtx reg = gen_rtx_REG (V4SFmode, regno);
    4097           32 :               emit_move_insn (reg, zero);
    4098              :             }
    4099              :         }
    4100              :     }
    4101              : 
    4102              :   /* mm/st registers are shared registers set, we should follow the following
    4103              :      rules to clear them:
    4104              :                         MMX exit mode         x87 exit mode
    4105              :         -------------|----------------------|---------------
    4106              :         uses x87 reg | clear all MMX        | clear all x87
    4107              :         uses MMX reg | clear individual MMX | clear all x87
    4108              :         x87 + MMX    | clear all MMX        | clear all x87
    4109              : 
    4110              :      first, we should decide which mode (MMX mode or x87 mode) the function
    4111              :      exit with.  */
    4112              : 
    4113          131 :   bool exit_with_mmx_mode = (crtl->return_rtx
    4114          131 :                              && (MMX_REG_P (crtl->return_rtx)));
    4115              : 
    4116          131 :   if (!exit_with_mmx_mode)
    4117              :     /* x87 exit mode, we should zero all st registers together.  */
    4118              :     {
    4119          131 :       all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
    4120              : 
    4121          131 :       if (all_st_zeroed_num > 0)
    4122          189 :         for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
    4123              :           /* x87 stack registers that hold the return value should be excluded.
    4124              :              x87 returns in the top (two for complex values) register.  */
    4125          168 :           if (all_st_zeroed_num == 8
    4126          168 :               || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
    4127              :                    || (all_st_zeroed_num == 6
    4128            7 :                        && (regno == (REGNO (crtl->return_rtx) + 1)))))
    4129          165 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4130              :     }
    4131              :   else
    4132              :     /* MMX exit mode, check whether we can zero all mm registers.  */
    4133              :     {
    4134            0 :       unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
    4135            0 :       all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
    4136              :                                              exit_mmx_regno);
    4137            0 :       if (all_mm_zeroed)
    4138            0 :         for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4139            0 :           if (regno != exit_mmx_regno)
    4140            0 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4141              :     }
    4142              : 
    4143              :   /* Now, generate instructions to zero all the other registers.  */
    4144              : 
    4145        12183 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4146              :     {
    4147        12052 :       if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4148        10675 :         continue;
    4149         1800 :       if (!zero_call_used_regno_p (regno, all_sse_zeroed,
    4150         1377 :                                    exit_with_mmx_mode && !all_mm_zeroed))
    4151          423 :         continue;
    4152              : 
    4153          954 :       SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4154              : 
    4155          954 :       machine_mode mode = zero_call_used_regno_mode (regno);
    4156              : 
    4157          954 :       rtx reg = gen_rtx_REG (mode, regno);
    4158          954 :       rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
    4159              : 
    4160          954 :       switch (mode)
    4161              :         {
    4162          558 :         case E_SImode:
    4163          558 :           if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    4164              :             {
    4165          558 :               rtx clob = gen_rtx_CLOBBER (VOIDmode,
    4166              :                                           gen_rtx_REG (CCmode,
    4167              :                                                        FLAGS_REG));
    4168          558 :               tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
    4169              :                                                            tmp,
    4170              :                                                            clob));
    4171              :             }
    4172              :           /* FALLTHRU.  */
    4173              : 
    4174          954 :         case E_V4SFmode:
    4175          954 :         case E_HImode:
    4176          954 :         case E_V2SImode:
    4177          954 :           emit_insn (tmp);
    4178          954 :           break;
    4179              : 
    4180            0 :         default:
    4181            0 :           gcc_unreachable ();
    4182              :         }
    4183              :     }
    4184          131 :   return zeroed_hardregs;
    4185              : }
    4186              : 
    4187              : /* Define how to find the value returned by a function.
    4188              :    VALTYPE is the data type of the value (as a tree).
    4189              :    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    4190              :    otherwise, FUNC is 0.  */
    4191              : 
    4192              : static rtx
    4193      3933465 : function_value_32 (machine_mode orig_mode, machine_mode mode,
    4194              :                    const_tree fntype, const_tree fn)
    4195              : {
    4196      3933465 :   unsigned int regno;
    4197              : 
    4198              :   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
    4199              :      we normally prevent this case when mmx is not available.  However
    4200              :      some ABIs may require the result to be returned like DImode.  */
    4201      4201550 :   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
    4202              :     regno = FIRST_MMX_REG;
    4203              : 
    4204              :   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
    4205              :      we prevent this case when sse is not available.  However some ABIs
    4206              :      may require the result to be returned like integer TImode.  */
    4207      3924189 :   else if (mode == TImode
    4208      4182998 :            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
    4209              :     regno = FIRST_SSE_REG;
    4210              : 
    4211              :   /* 32-byte vector modes in %ymm0.   */
    4212      3965100 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
    4213              :     regno = FIRST_SSE_REG;
    4214              : 
    4215              :   /* 64-byte vector modes in %zmm0.   */
    4216      3820902 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
    4217              :     regno = FIRST_SSE_REG;
    4218              : 
    4219              :   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
    4220      3665380 :   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
    4221              :     regno = FIRST_FLOAT_REG;
    4222              :   else
    4223              :     /* Most things go in %eax.  */
    4224      3600985 :     regno = AX_REG;
    4225              : 
    4226              :   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
    4227      3933465 :   if (mode == HFmode || mode == BFmode)
    4228              :     {
    4229         1907 :       if (!TARGET_SSE2)
    4230              :         {
    4231            0 :           error ("SSE register return with SSE2 disabled");
    4232            0 :           regno = AX_REG;
    4233              :         }
    4234              :       else
    4235              :         regno = FIRST_SSE_REG;
    4236              :     }
    4237              : 
    4238      3933465 :   if (mode == HCmode)
    4239              :     {
    4240          129 :       if (!TARGET_SSE2)
    4241            0 :         error ("SSE register return with SSE2 disabled");
    4242              : 
    4243          129 :       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
    4244          258 :       XVECEXP (ret, 0, 0)
    4245          258 :         = gen_rtx_EXPR_LIST (VOIDmode,
    4246              :                              gen_rtx_REG (SImode,
    4247          129 :                                           TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
    4248              :                              GEN_INT (0));
    4249          129 :       return ret;
    4250              :     }
    4251              : 
    4252              :   /* Override FP return register with %xmm0 for local functions when
    4253              :      SSE math is enabled or for functions with sseregparm attribute.  */
    4254      3933336 :   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
    4255              :     {
    4256        50312 :       int sse_level = ix86_function_sseregparm (fntype, fn, false);
    4257        50312 :       if (sse_level == -1)
    4258              :         {
    4259            0 :           error ("calling %qD with SSE calling convention without "
    4260              :                  "SSE/SSE2 enabled", fn);
    4261            0 :           sorry ("this is a GCC bug that can be worked around by adding "
    4262              :                  "attribute used to function called");
    4263              :         }
    4264        50312 :       else if ((sse_level >= 1 && mode == SFmode)
    4265        50312 :                || (sse_level == 2 && mode == DFmode))
    4266              :         regno = FIRST_SSE_REG;
    4267              :     }
    4268              : 
    4269              :   /* OImode shouldn't be used directly.  */
    4270      3933336 :   gcc_assert (mode != OImode);
    4271              : 
    4272      3933336 :   return gen_rtx_REG (orig_mode, regno);
    4273              : }
    4274              : 
    4275              : static rtx
    4276     97814065 : function_value_64 (machine_mode orig_mode, machine_mode mode,
    4277              :                    const_tree valtype)
    4278              : {
    4279     97814065 :   rtx ret;
    4280              : 
    4281              :   /* Handle libcalls, which don't provide a type node.  */
    4282     97814065 :   if (valtype == NULL)
    4283              :     {
    4284       102330 :       unsigned int regno;
    4285              : 
    4286       102330 :       switch (mode)
    4287              :         {
    4288              :         case E_BFmode:
    4289              :         case E_HFmode:
    4290              :         case E_HCmode:
    4291              :         case E_SFmode:
    4292              :         case E_SCmode:
    4293              :         case E_DFmode:
    4294              :         case E_DCmode:
    4295              :         case E_TFmode:
    4296              :         case E_SDmode:
    4297              :         case E_DDmode:
    4298              :         case E_TDmode:
    4299              :           regno = FIRST_SSE_REG;
    4300              :           break;
    4301         1040 :         case E_XFmode:
    4302         1040 :         case E_XCmode:
    4303         1040 :           regno = FIRST_FLOAT_REG;
    4304         1040 :           break;
    4305              :         case E_TCmode:
    4306              :           return NULL;
    4307        56152 :         default:
    4308        56152 :           regno = AX_REG;
    4309              :         }
    4310              : 
    4311       102330 :       return gen_rtx_REG (mode, regno);
    4312              :     }
    4313     97711735 :   else if (POINTER_TYPE_P (valtype))
    4314              :     {
    4315              :       /* Pointers are always returned in word_mode.  */
    4316     16166319 :       mode = word_mode;
    4317              :     }
    4318              : 
    4319     97711735 :   ret = construct_container (mode, orig_mode, valtype, true,
    4320              :                              X86_64_MAX_RETURN_NREGS,
    4321              :                              X86_64_MAX_SSE_RETURN_NREGS,
    4322              :                              x86_64_int_return_registers, 0);
    4323              : 
    4324              :   /* For zero sized structures, construct_container returns NULL, but we
    4325              :      need to keep rest of compiler happy by returning meaningful value.  */
    4326     97711735 :   if (!ret)
    4327       204085 :     ret = gen_rtx_REG (orig_mode, AX_REG);
    4328              : 
    4329              :   return ret;
    4330              : }
    4331              : 
    4332              : static rtx
    4333            0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
    4334              :                       const_tree fntype, const_tree fn, const_tree valtype)
    4335              : {
    4336            0 :   unsigned int regno;
    4337              : 
    4338              :   /* Floating point return values in %st(0)
    4339              :      (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
    4340            0 :   if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
    4341            0 :            && (GET_MODE_SIZE (mode) > 8
    4342            0 :                || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
    4343              :   {
    4344            0 :     regno = FIRST_FLOAT_REG;
    4345            0 :     return gen_rtx_REG (orig_mode, regno);
    4346              :   }
    4347              :   else
    4348            0 :     return function_value_32(orig_mode, mode, fntype,fn);
    4349              : }
    4350              : 
    4351              : static rtx
    4352       787708 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
    4353              :                       const_tree valtype)
    4354              : {
    4355       787708 :   unsigned int regno = AX_REG;
    4356              : 
    4357       787708 :   if (TARGET_SSE)
    4358              :     {
    4359       786981 :       unsigned int mode_size = GET_MODE_SIZE (mode);
    4360              : 
    4361       786981 :       switch (mode_size)
    4362              :         {
    4363        34397 :         case 16:
    4364        34397 :         case 32:
    4365        34397 :         case 64:
    4366        34397 :           if (mode_size == 32 && !TARGET_AVX)
    4367              :             break;
    4368        34397 :           if (mode_size == 64 && !TARGET_AVX512F)
    4369              :             break;
    4370        34397 :           if (valtype != NULL_TREE
    4371        34397 :               && !VECTOR_INTEGER_TYPE_P (valtype)
    4372        15828 :               && !INTEGRAL_TYPE_P (valtype)
    4373        50225 :               && !VECTOR_FLOAT_TYPE_P (valtype))
    4374              :             break;
    4375        34397 :           if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4376              :               && !COMPLEX_MODE_P (mode))
    4377       218079 :             regno = FIRST_SSE_REG;
    4378              :           break;
    4379       741282 :         case 8:
    4380       741282 :         case 4:
    4381       741282 :         case 2:
    4382       741282 :           if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
    4383              :             break;
    4384       723988 :           if (mode == HFmode || mode == SFmode || mode == DFmode)
    4385       218079 :             regno = FIRST_SSE_REG;
    4386              :           break;
    4387              :         default:
    4388              :           break;
    4389              :         }
    4390              :     }
    4391       787708 :   return gen_rtx_REG (orig_mode, regno);
    4392              : }
    4393              : 
    4394              : static rtx
    4395    102535238 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
    4396              :                        machine_mode orig_mode, machine_mode mode)
    4397              : {
    4398    102535238 :   const_tree fn, fntype;
    4399              : 
    4400    102535238 :   fn = NULL_TREE;
    4401    102535238 :   if (fntype_or_decl && DECL_P (fntype_or_decl))
    4402      3541005 :     fn = fntype_or_decl;
    4403      3541005 :   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
    4404              : 
    4405    102535238 :   if (ix86_function_type_abi (fntype) == MS_ABI)
    4406              :     {
    4407       787708 :       if (TARGET_64BIT)
    4408       787708 :         return function_value_ms_64 (orig_mode, mode, valtype);
    4409              :       else
    4410            0 :         return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
    4411              :     }
    4412    101747530 :   else if (TARGET_64BIT)
    4413     97814065 :     return function_value_64 (orig_mode, mode, valtype);
    4414              :   else
    4415      3933465 :     return function_value_32 (orig_mode, mode, fntype, fn);
    4416              : }
    4417              : 
    4418              : static rtx
    4419    102429766 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
    4420              : {
    4421    102429766 :   machine_mode mode, orig_mode;
    4422              : 
    4423    102429766 :   orig_mode = TYPE_MODE (valtype);
    4424    102429766 :   mode = type_natural_mode (valtype, NULL, true);
    4425    102429766 :   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
    4426              : }
    4427              : 
    4428              : /* Pointer function arguments and return values are promoted to
    4429              :    word_mode for normal functions.  */
    4430              : 
    4431              : static machine_mode
    4432     32089961 : ix86_promote_function_mode (const_tree type, machine_mode mode,
    4433              :                             int *punsignedp, const_tree fntype,
    4434              :                             int for_return)
    4435              : {
    4436     32089961 :   if (cfun->machine->func_type == TYPE_NORMAL
    4437     32088938 :       && type != NULL_TREE
    4438     32054815 :       && POINTER_TYPE_P (type))
    4439              :     {
    4440     16057950 :       *punsignedp = POINTERS_EXTEND_UNSIGNED;
    4441     16057950 :       return word_mode;
    4442              :     }
    4443     16032011 :   return default_promote_function_mode (type, mode, punsignedp, fntype,
    4444     16032011 :                                         for_return);
    4445              : }
    4446              : 
    4447              : /* Return true if a structure, union or array with MODE containing FIELD
    4448              :    should be accessed using BLKmode.  */
    4449              : 
    4450              : static bool
    4451    142570941 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
    4452              : {
    4453              :   /* Union with XFmode must be in BLKmode.  */
    4454    142570941 :   return (mode == XFmode
    4455    142708886 :           && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
    4456       130993 :               || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
    4457              : }
    4458              : 
    4459              : rtx
    4460       105472 : ix86_libcall_value (machine_mode mode)
    4461              : {
    4462       105472 :   return ix86_function_value_1 (NULL, NULL, mode, mode);
    4463              : }
    4464              : 
    4465              : /* Return true iff type is returned in memory.  */
    4466              : 
    4467              : static bool
    4468    104367855 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
    4469              : {
    4470    104367855 :   const machine_mode mode = type_natural_mode (type, NULL, true);
    4471    104367855 :   HOST_WIDE_INT size;
    4472              : 
    4473    104367855 :   if (TARGET_64BIT)
    4474              :     {
    4475     99826039 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    4476              :         {
    4477       707133 :           size = int_size_in_bytes (type);
    4478              : 
    4479              :           /* __m128 is returned in xmm0.  256/512-bit vector values are
    4480              :              returned in ymm0/zmm0 when AVX/AVX512 is enabled.  */
    4481       707133 :           if ((!type || VECTOR_INTEGER_TYPE_P (type)
    4482       687562 :                || INTEGRAL_TYPE_P (type)
    4483       217159 :                || VECTOR_FLOAT_TYPE_P (type))
    4484       505802 :               && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4485              :               && !COMPLEX_MODE_P (mode)
    4486      1212935 :               && ((GET_MODE_SIZE (mode) == 16 || size == 16)
    4487       519126 :                   || (TARGET_AVX && (GET_MODE_SIZE (mode) == 32 || size == 32))
    4488       482061 :                   || (TARGET_AVX512F
    4489        16590 :                       && (GET_MODE_SIZE (mode) == 64 || size == 64))))
    4490              :             return false;
    4491              : 
    4492              :           /* Otherwise, the size must be exactly in [1248]. */
    4493      1329376 :           return size != 1 && size != 2 && size != 4 && size != 8;
    4494              :         }
    4495              :       else
    4496              :         {
    4497     99118906 :           int needed_intregs, needed_sseregs;
    4498              : 
    4499     99118906 :           return examine_argument (mode, type, true,
    4500              :                                    &needed_intregs, &needed_sseregs);
    4501              :         }
    4502              :     }
    4503              :   else
    4504              :     {
    4505      4541816 :       size = int_size_in_bytes (type);
    4506              : 
    4507              :       /* Intel MCU psABI returns scalars and aggregates no larger than 8
    4508              :          bytes in registers.  */
    4509      4541816 :       if (TARGET_IAMCU)
    4510            0 :         return VECTOR_MODE_P (mode) || size < 0 || size > 8;
    4511              : 
    4512      4541816 :       if (mode == BLKmode)
    4513              :         return true;
    4514              : 
    4515      4541816 :       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
    4516              :         return false;
    4517              : 
    4518      4541816 :       if (VECTOR_MODE_P (mode) || mode == TImode)
    4519              :         {
    4520              :           /* User-created vectors small enough to fit in EAX.  */
    4521       268055 :           if (size < 8)
    4522              :             return false;
    4523              : 
    4524              :           /* Unless ABI prescibes otherwise,
    4525              :              MMX/3dNow values are returned in MM0 if available.  */
    4526              : 
    4527       268055 :           if (size == 8)
    4528         9266 :             return TARGET_VECT8_RETURNS || !TARGET_MMX;
    4529              : 
    4530              :           /* SSE values are returned in XMM0 if available.  */
    4531       258789 :           if (size == 16)
    4532       108939 :             return !TARGET_SSE;
    4533              : 
    4534              :           /* AVX values are returned in YMM0 if available.  */
    4535       149850 :           if (size == 32)
    4536        72090 :             return !TARGET_AVX;
    4537              : 
    4538              :           /* AVX512F values are returned in ZMM0 if available.  */
    4539        77760 :           if (size == 64)
    4540        77760 :             return !TARGET_AVX512F;
    4541              :         }
    4542              : 
    4543      4273761 :       if (mode == XFmode)
    4544              :         return false;
    4545              : 
    4546      4262038 :       if (size > 12)
    4547              :         return true;
    4548              : 
    4549              :       /* OImode shouldn't be used directly.  */
    4550      3280377 :       gcc_assert (mode != OImode);
    4551              : 
    4552              :       return false;
    4553              :     }
    4554              : }
    4555              : 
    4556              : /* Implement TARGET_PUSH_ARGUMENT.  */
    4557              : 
    4558              : static bool
    4559      9348065 : ix86_push_argument (unsigned int npush)
    4560              : {
    4561              :   /* If SSE2 is available, use vector move to put large argument onto
    4562              :      stack.  NB:  In 32-bit mode, use 8-byte vector move.  */
    4563     11773075 :   return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
    4564      9082932 :           && TARGET_PUSH_ARGS
    4565     18430899 :           && !ACCUMULATE_OUTGOING_ARGS);
    4566              : }
    4567              : 
    4568              : 
    4569              : /* Create the va_list data type.  */
    4570              : 
    4571              : static tree
    4572       281307 : ix86_build_builtin_va_list_64 (void)
    4573              : {
    4574       281307 :   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
    4575              : 
    4576       281307 :   record = lang_hooks.types.make_type (RECORD_TYPE);
    4577       281307 :   type_decl = build_decl (BUILTINS_LOCATION,
    4578              :                           TYPE_DECL, get_identifier ("__va_list_tag"), record);
    4579              : 
    4580       281307 :   f_gpr = build_decl (BUILTINS_LOCATION,
    4581              :                       FIELD_DECL, get_identifier ("gp_offset"),
    4582              :                       unsigned_type_node);
    4583       281307 :   f_fpr = build_decl (BUILTINS_LOCATION,
    4584              :                       FIELD_DECL, get_identifier ("fp_offset"),
    4585              :                       unsigned_type_node);
    4586       281307 :   f_ovf = build_decl (BUILTINS_LOCATION,
    4587              :                       FIELD_DECL, get_identifier ("overflow_arg_area"),
    4588              :                       ptr_type_node);
    4589       281307 :   f_sav = build_decl (BUILTINS_LOCATION,
    4590              :                       FIELD_DECL, get_identifier ("reg_save_area"),
    4591              :                       ptr_type_node);
    4592              : 
    4593       281307 :   va_list_gpr_counter_field = f_gpr;
    4594       281307 :   va_list_fpr_counter_field = f_fpr;
    4595              : 
    4596       281307 :   DECL_FIELD_CONTEXT (f_gpr) = record;
    4597       281307 :   DECL_FIELD_CONTEXT (f_fpr) = record;
    4598       281307 :   DECL_FIELD_CONTEXT (f_ovf) = record;
    4599       281307 :   DECL_FIELD_CONTEXT (f_sav) = record;
    4600              : 
    4601       281307 :   TYPE_STUB_DECL (record) = type_decl;
    4602       281307 :   TYPE_NAME (record) = type_decl;
    4603       281307 :   TYPE_FIELDS (record) = f_gpr;
    4604       281307 :   DECL_CHAIN (f_gpr) = f_fpr;
    4605       281307 :   DECL_CHAIN (f_fpr) = f_ovf;
    4606       281307 :   DECL_CHAIN (f_ovf) = f_sav;
    4607       281307 :   TREE_PUBLIC (type_decl) = 1;
    4608              : 
    4609       281307 :   layout_type (record);
    4610              : 
    4611       281307 :   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
    4612       281307 :                                         NULL_TREE, TYPE_ATTRIBUTES (record));
    4613              : 
    4614              :   /* The correct type is an array type of one element.  */
    4615       281307 :   return build_array_type (record, build_index_type (size_zero_node));
    4616              : }
    4617              : 
    4618              : /* Setup the builtin va_list data type and for 64-bit the additional
    4619              :    calling convention specific va_list data types.  */
    4620              : 
    4621              : static tree
    4622       288463 : ix86_build_builtin_va_list (void)
    4623              : {
    4624       288463 :   if (TARGET_64BIT)
    4625              :     {
    4626              :       /* Initialize ABI specific va_list builtin types.
    4627              : 
    4628              :          In lto1, we can encounter two va_list types:
    4629              :          - one as a result of the type-merge across TUs, and
    4630              :          - the one constructed here.
    4631              :          These two types will not have the same TYPE_MAIN_VARIANT, and therefore
    4632              :          a type identity check in canonical_va_list_type based on
    4633              :          TYPE_MAIN_VARIANT (which we used to have) will not work.
    4634              :          Instead, we tag each va_list_type_node with its unique attribute, and
    4635              :          look for the attribute in the type identity check in
    4636              :          canonical_va_list_type.
    4637              : 
    4638              :          Tagging sysv_va_list_type_node directly with the attribute is
    4639              :          problematic since it's a array of one record, which will degrade into a
    4640              :          pointer to record when used as parameter (see build_va_arg comments for
    4641              :          an example), dropping the attribute in the process.  So we tag the
    4642              :          record instead.  */
    4643              : 
    4644              :       /* For SYSV_ABI we use an array of one record.  */
    4645       281307 :       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
    4646              : 
    4647              :       /* For MS_ABI we use plain pointer to argument area.  */
    4648       281307 :       tree char_ptr_type = build_pointer_type (char_type_node);
    4649       281307 :       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
    4650       281307 :                              TYPE_ATTRIBUTES (char_ptr_type));
    4651       281307 :       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
    4652              : 
    4653       281307 :       return ((ix86_abi == MS_ABI)
    4654       281307 :               ? ms_va_list_type_node
    4655       281307 :               : sysv_va_list_type_node);
    4656              :     }
    4657              :   else
    4658              :     {
    4659              :       /* For i386 we use plain pointer to argument area.  */
    4660         7156 :       return build_pointer_type (char_type_node);
    4661              :     }
    4662              : }
    4663              : 
    4664              : /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
    4665              : 
    4666              : static void
    4667        15683 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
    4668              : {
    4669        15683 :   rtx save_area, mem;
    4670        15683 :   alias_set_type set;
    4671        15683 :   int i, max;
    4672              : 
    4673              :   /* GPR size of varargs save area.  */
    4674        15683 :   if (cfun->va_list_gpr_size)
    4675        15233 :     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
    4676              :   else
    4677          450 :     ix86_varargs_gpr_size = 0;
    4678              : 
    4679              :   /* FPR size of varargs save area.  We don't need it if we don't pass
    4680              :      anything in SSE registers.  */
    4681        15683 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4682        14646 :     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
    4683              :   else
    4684         1037 :     ix86_varargs_fpr_size = 0;
    4685              : 
    4686        15683 :   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
    4687              :     return;
    4688              : 
    4689        15402 :   save_area = frame_pointer_rtx;
    4690        15402 :   set = get_varargs_alias_set ();
    4691              : 
    4692        15402 :   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
    4693        15402 :   if (max > X86_64_REGPARM_MAX)
    4694              :     max = X86_64_REGPARM_MAX;
    4695              : 
    4696        15402 :   const int *parm_regs;
    4697        15402 :   if (cum->preserve_none_abi)
    4698              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    4699              :   else
    4700        15401 :     parm_regs = x86_64_int_parameter_registers;
    4701              : 
    4702        85531 :   for (i = cum->regno; i < max; i++)
    4703              :     {
    4704        70129 :       mem = gen_rtx_MEM (word_mode,
    4705        70129 :                          plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
    4706        70129 :       MEM_NOTRAP_P (mem) = 1;
    4707        70129 :       set_mem_alias_set (mem, set);
    4708        70129 :       emit_move_insn (mem,
    4709        70129 :                       gen_rtx_REG (word_mode, parm_regs[i]));
    4710              :     }
    4711              : 
    4712        15402 :   if (ix86_varargs_fpr_size)
    4713              :     {
    4714        14646 :       machine_mode smode;
    4715        14646 :       rtx_code_label *label;
    4716        14646 :       rtx test;
    4717              : 
    4718              :       /* Now emit code to save SSE registers.  The AX parameter contains number
    4719              :          of SSE parameter registers used to call this function, though all we
    4720              :          actually check here is the zero/non-zero status.  */
    4721              : 
    4722        14646 :       label = gen_label_rtx ();
    4723        14646 :       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
    4724        14646 :       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
    4725              :                                       label));
    4726              : 
    4727              :       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
    4728              :          we used movdqa (i.e. TImode) instead?  Perhaps even better would
    4729              :          be if we could determine the real mode of the data, via a hook
    4730              :          into pass_stdarg.  Ignore all that for now.  */
    4731        14646 :       smode = V4SFmode;
    4732        14646 :       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
    4733         4149 :         crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
    4734              : 
    4735        14646 :       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
    4736        14646 :       if (max > X86_64_SSE_REGPARM_MAX)
    4737              :         max = X86_64_SSE_REGPARM_MAX;
    4738              : 
    4739       130198 :       for (i = cum->sse_regno; i < max; ++i)
    4740              :         {
    4741       115552 :           mem = plus_constant (Pmode, save_area,
    4742       115552 :                                i * 16 + ix86_varargs_gpr_size);
    4743       115552 :           mem = gen_rtx_MEM (smode, mem);
    4744       115552 :           MEM_NOTRAP_P (mem) = 1;
    4745       115552 :           set_mem_alias_set (mem, set);
    4746       115552 :           set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
    4747              : 
    4748       115552 :           emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
    4749              :         }
    4750              : 
    4751        14646 :       emit_label (label);
    4752              :     }
    4753              : }
    4754              : 
    4755              : static void
    4756         5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
    4757              : {
    4758         5652 :   alias_set_type set = get_varargs_alias_set ();
    4759         5652 :   int i;
    4760              : 
    4761              :   /* Reset to zero, as there might be a sysv vaarg used
    4762              :      before.  */
    4763         5652 :   ix86_varargs_gpr_size = 0;
    4764         5652 :   ix86_varargs_fpr_size = 0;
    4765              : 
    4766        14154 :   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
    4767              :     {
    4768         8502 :       rtx reg, mem;
    4769              : 
    4770         8502 :       mem = gen_rtx_MEM (Pmode,
    4771         8502 :                          plus_constant (Pmode, virtual_incoming_args_rtx,
    4772         8502 :                                         i * UNITS_PER_WORD));
    4773         8502 :       MEM_NOTRAP_P (mem) = 1;
    4774         8502 :       set_mem_alias_set (mem, set);
    4775              : 
    4776         8502 :       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
    4777         8502 :       emit_move_insn (mem, reg);
    4778              :     }
    4779         5652 : }
    4780              : 
    4781              : static void
    4782        21489 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
    4783              :                              const function_arg_info &arg,
    4784              :                              int *, int no_rtl)
    4785              : {
    4786        21489 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    4787        21489 :   CUMULATIVE_ARGS next_cum;
    4788        21489 :   tree fntype;
    4789              : 
    4790              :   /* This argument doesn't appear to be used anymore.  Which is good,
    4791              :      because the old code here didn't suppress rtl generation.  */
    4792        21489 :   gcc_assert (!no_rtl);
    4793              : 
    4794        21489 :   if (!TARGET_64BIT)
    4795          154 :     return;
    4796              : 
    4797        21335 :   fntype = TREE_TYPE (current_function_decl);
    4798              : 
    4799              :   /* For varargs, we do not want to skip the dummy va_dcl argument.
    4800              :      For stdargs, we do want to skip the last named argument.  */
    4801        21335 :   next_cum = *cum;
    4802        21335 :   if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
    4803          122 :        || arg.type != NULL_TREE)
    4804        21360 :       && stdarg_p (fntype))
    4805        21238 :     ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
    4806              : 
    4807        21335 :   if (cum->call_abi == MS_ABI)
    4808         5652 :     setup_incoming_varargs_ms_64 (&next_cum);
    4809              :   else
    4810        15683 :     setup_incoming_varargs_64 (&next_cum);
    4811              : }
    4812              : 
    4813              : /* Checks if TYPE is of kind va_list char *.  */
    4814              : 
    4815              : static bool
    4816        73066 : is_va_list_char_pointer (tree type)
    4817              : {
    4818        73066 :   tree canonic;
    4819              : 
    4820              :   /* For 32-bit it is always true.  */
    4821        73066 :   if (!TARGET_64BIT)
    4822              :     return true;
    4823        72904 :   canonic = ix86_canonical_va_list_type (type);
    4824        72904 :   return (canonic == ms_va_list_type_node
    4825        72904 :           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
    4826              : }
    4827              : 
    4828              : /* Implement va_start.  */
    4829              : 
    4830              : static void
    4831        20978 : ix86_va_start (tree valist, rtx nextarg)
    4832              : {
    4833        20978 :   HOST_WIDE_INT words, n_gpr, n_fpr;
    4834        20978 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4835        20978 :   tree gpr, fpr, ovf, sav, t;
    4836        20978 :   tree type;
    4837        20978 :   rtx ovf_rtx;
    4838              : 
    4839        20978 :   if (flag_split_stack
    4840           12 :       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4841              :     {
    4842           12 :       unsigned int scratch_regno;
    4843              : 
    4844              :       /* When we are splitting the stack, we can't refer to the stack
    4845              :          arguments using internal_arg_pointer, because they may be on
    4846              :          the old stack.  The split stack prologue will arrange to
    4847              :          leave a pointer to the old stack arguments in a scratch
    4848              :          register, which we here copy to a pseudo-register.  The split
    4849              :          stack prologue can't set the pseudo-register directly because
    4850              :          it (the prologue) runs before any registers have been saved.  */
    4851              : 
    4852           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
    4853           12 :       if (scratch_regno != INVALID_REGNUM)
    4854              :         {
    4855           12 :           rtx reg;
    4856           12 :           rtx_insn *seq;
    4857              : 
    4858           16 :           reg = gen_reg_rtx (Pmode);
    4859           12 :           cfun->machine->split_stack_varargs_pointer = reg;
    4860              : 
    4861           12 :           start_sequence ();
    4862           16 :           emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
    4863           12 :           seq = end_sequence ();
    4864              : 
    4865           12 :           push_topmost_sequence ();
    4866           12 :           emit_insn_after (seq, entry_of_function ());
    4867           12 :           pop_topmost_sequence ();
    4868              :         }
    4869              :     }
    4870              : 
    4871              :   /* Only 64bit target needs something special.  */
    4872        20978 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4873              :     {
    4874         5656 :       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4875         5652 :         std_expand_builtin_va_start (valist, nextarg);
    4876              :       else
    4877              :         {
    4878            4 :           rtx va_r, next;
    4879              : 
    4880            4 :           va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
    4881            8 :           next = expand_binop (ptr_mode, add_optab,
    4882            4 :                                cfun->machine->split_stack_varargs_pointer,
    4883              :                                crtl->args.arg_offset_rtx,
    4884              :                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
    4885            4 :           convert_move (va_r, next, 0);
    4886              :         }
    4887         5656 :       return;
    4888              :     }
    4889              : 
    4890        15322 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4891        15322 :   f_fpr = DECL_CHAIN (f_gpr);
    4892        15322 :   f_ovf = DECL_CHAIN (f_fpr);
    4893        15322 :   f_sav = DECL_CHAIN (f_ovf);
    4894              : 
    4895        15322 :   valist = build_simple_mem_ref (valist);
    4896        15322 :   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
    4897              :   /* The following should be folded into the MEM_REF offset.  */
    4898        15322 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
    4899              :                 f_gpr, NULL_TREE);
    4900        15322 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
    4901              :                 f_fpr, NULL_TREE);
    4902        15322 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
    4903              :                 f_ovf, NULL_TREE);
    4904        15322 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
    4905              :                 f_sav, NULL_TREE);
    4906              : 
    4907              :   /* Count number of gp and fp argument registers used.  */
    4908        15322 :   words = crtl->args.info.words;
    4909        15322 :   n_gpr = crtl->args.info.regno;
    4910        15322 :   n_fpr = crtl->args.info.sse_regno;
    4911              : 
    4912        15322 :   if (cfun->va_list_gpr_size)
    4913              :     {
    4914        15088 :       type = TREE_TYPE (gpr);
    4915        15088 :       t = build2 (MODIFY_EXPR, type,
    4916        15088 :                   gpr, build_int_cst (type, n_gpr * 8));
    4917        15088 :       TREE_SIDE_EFFECTS (t) = 1;
    4918        15088 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4919              :     }
    4920              : 
    4921        15322 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4922              :     {
    4923        14489 :       type = TREE_TYPE (fpr);
    4924        14489 :       t = build2 (MODIFY_EXPR, type, fpr,
    4925        14489 :                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
    4926        14489 :       TREE_SIDE_EFFECTS (t) = 1;
    4927        14489 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4928              :     }
    4929              : 
    4930              :   /* Find the overflow area.  */
    4931        15322 :   type = TREE_TYPE (ovf);
    4932        15322 :   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4933        15314 :     ovf_rtx = crtl->args.internal_arg_pointer;
    4934              :   else
    4935              :     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
    4936        15322 :   t = make_tree (type, ovf_rtx);
    4937        15322 :   if (words != 0)
    4938          488 :     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
    4939              : 
    4940        15322 :   t = build2 (MODIFY_EXPR, type, ovf, t);
    4941        15322 :   TREE_SIDE_EFFECTS (t) = 1;
    4942        15322 :   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4943              : 
    4944        15322 :   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
    4945              :     {
    4946              :       /* Find the register save area.
    4947              :          Prologue of the function save it right above stack frame.  */
    4948        15257 :       type = TREE_TYPE (sav);
    4949        15257 :       t = make_tree (type, frame_pointer_rtx);
    4950        15257 :       if (!ix86_varargs_gpr_size)
    4951          169 :         t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
    4952              : 
    4953        15257 :       t = build2 (MODIFY_EXPR, type, sav, t);
    4954        15257 :       TREE_SIDE_EFFECTS (t) = 1;
    4955        15257 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4956              :     }
    4957              : }
    4958              : 
    4959              : /* Implement va_arg.  */
    4960              : 
    4961              : static tree
    4962        52088 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
    4963              :                       gimple_seq *post_p)
    4964              : {
    4965        52088 :   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
    4966        52088 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4967        52088 :   tree gpr, fpr, ovf, sav, t;
    4968        52088 :   int size, rsize;
    4969        52088 :   tree lab_false, lab_over = NULL_TREE;
    4970        52088 :   tree addr, t2;
    4971        52088 :   rtx container;
    4972        52088 :   int indirect_p = 0;
    4973        52088 :   tree ptrtype;
    4974        52088 :   machine_mode nat_mode;
    4975        52088 :   unsigned int arg_boundary;
    4976        52088 :   unsigned int type_align;
    4977              : 
    4978              :   /* Only 64bit target needs something special.  */
    4979        52088 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4980          260 :     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
    4981              : 
    4982        51828 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4983        51828 :   f_fpr = DECL_CHAIN (f_gpr);
    4984        51828 :   f_ovf = DECL_CHAIN (f_fpr);
    4985        51828 :   f_sav = DECL_CHAIN (f_ovf);
    4986              : 
    4987        51828 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
    4988              :                 valist, f_gpr, NULL_TREE);
    4989              : 
    4990        51828 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
    4991        51828 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
    4992        51828 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
    4993              : 
    4994        51828 :   indirect_p = pass_va_arg_by_reference (type);
    4995        51828 :   if (indirect_p)
    4996          103 :     type = build_pointer_type (type);
    4997        51828 :   size = arg_int_size_in_bytes (type);
    4998        51828 :   rsize = CEIL (size, UNITS_PER_WORD);
    4999              : 
    5000        51828 :   nat_mode = type_natural_mode (type, NULL, false);
    5001        51828 :   switch (nat_mode)
    5002              :     {
    5003           28 :     case E_V16HFmode:
    5004           28 :     case E_V16BFmode:
    5005           28 :     case E_V8SFmode:
    5006           28 :     case E_V8SImode:
    5007           28 :     case E_V32QImode:
    5008           28 :     case E_V16HImode:
    5009           28 :     case E_V4DFmode:
    5010           28 :     case E_V4DImode:
    5011           28 :     case E_V32HFmode:
    5012           28 :     case E_V32BFmode:
    5013           28 :     case E_V16SFmode:
    5014           28 :     case E_V16SImode:
    5015           28 :     case E_V64QImode:
    5016           28 :     case E_V32HImode:
    5017           28 :     case E_V8DFmode:
    5018           28 :     case E_V8DImode:
    5019              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    5020           28 :       if (!TARGET_64BIT_MS_ABI)
    5021              :         {
    5022              :           container = NULL;
    5023              :           break;
    5024              :         }
    5025              :       /* FALLTHRU */
    5026              : 
    5027        51800 :     default:
    5028        51800 :       container = construct_container (nat_mode, TYPE_MODE (type),
    5029              :                                        type, false, X86_64_REGPARM_MAX,
    5030              :                                        X86_64_SSE_REGPARM_MAX, intreg, 0);
    5031        51800 :       break;
    5032              :     }
    5033              : 
    5034              :   /* Pull the value out of the saved registers.  */
    5035              : 
    5036        51828 :   addr = create_tmp_var (ptr_type_node, "addr");
    5037        51828 :   type_align = TYPE_ALIGN (type);
    5038              : 
    5039        51828 :   if (container)
    5040              :     {
    5041        28735 :       int needed_intregs, needed_sseregs;
    5042        28735 :       bool need_temp;
    5043        28735 :       tree int_addr, sse_addr;
    5044              : 
    5045        28735 :       lab_false = create_artificial_label (UNKNOWN_LOCATION);
    5046        28735 :       lab_over = create_artificial_label (UNKNOWN_LOCATION);
    5047              : 
    5048        28735 :       examine_argument (nat_mode, type, false,
    5049              :                         &needed_intregs, &needed_sseregs);
    5050              : 
    5051        28735 :       bool container_in_reg = false;
    5052        28735 :       if (REG_P (container))
    5053              :         container_in_reg = true;
    5054         1641 :       else if (GET_CODE (container) == PARALLEL
    5055         1641 :                && GET_MODE (container) == BLKmode
    5056          580 :                && XVECLEN (container, 0) == 1)
    5057              :         {
    5058              :           /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
    5059              :              expression in a TImode register.  In this case, temp isn't
    5060              :              needed.  Otherwise, the TImode variable will be put in the
    5061              :              GPR save area which guarantees only 8-byte alignment.   */
    5062          509 :           rtx x = XVECEXP (container, 0, 0);
    5063          509 :           if (GET_CODE (x) == EXPR_LIST
    5064          509 :               && REG_P (XEXP (x, 0))
    5065          509 :               && XEXP (x, 1) == const0_rtx)
    5066              :             container_in_reg = true;
    5067              :         }
    5068              : 
    5069          680 :       need_temp = (!container_in_reg
    5070         1150 :                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
    5071          680 :                        || TYPE_ALIGN (type) > 128));
    5072              : 
    5073              :       /* In case we are passing structure, verify that it is consecutive block
    5074              :          on the register save area.  If not we need to do moves.  */
    5075          680 :       if (!need_temp && !container_in_reg)
    5076              :         {
    5077              :           /* Verify that all registers are strictly consecutive  */
    5078          966 :           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
    5079              :             {
    5080              :               int i;
    5081              : 
    5082          815 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5083              :                 {
    5084          529 :                   rtx slot = XVECEXP (container, 0, i);
    5085          529 :                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
    5086          529 :                       || INTVAL (XEXP (slot, 1)) != i * 16)
    5087              :                     need_temp = true;
    5088              :                 }
    5089              :             }
    5090              :           else
    5091              :             {
    5092              :               int i;
    5093              : 
    5094         1120 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5095              :                 {
    5096          726 :                   rtx slot = XVECEXP (container, 0, i);
    5097          726 :                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
    5098          726 :                       || INTVAL (XEXP (slot, 1)) != i * 8)
    5099              :                     need_temp = true;
    5100              :                 }
    5101              :             }
    5102              :         }
    5103        28735 :       if (!need_temp)
    5104              :         {
    5105              :           int_addr = addr;
    5106              :           sse_addr = addr;
    5107              :         }
    5108              :       else
    5109              :         {
    5110          877 :           int_addr = create_tmp_var (ptr_type_node, "int_addr");
    5111          877 :           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
    5112              :         }
    5113              : 
    5114              :       /* First ensure that we fit completely in registers.  */
    5115        28735 :       if (needed_intregs)
    5116              :         {
    5117        18014 :           t = build_int_cst (TREE_TYPE (gpr),
    5118        18014 :                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
    5119        18014 :           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
    5120        18014 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5121        18014 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5122        18014 :           gimplify_and_add (t, pre_p);
    5123              :         }
    5124        28735 :       if (needed_sseregs)
    5125              :         {
    5126        11113 :           t = build_int_cst (TREE_TYPE (fpr),
    5127              :                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
    5128        11113 :                              + X86_64_REGPARM_MAX * 8);
    5129        11113 :           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
    5130        11113 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5131        11113 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5132        11113 :           gimplify_and_add (t, pre_p);
    5133              :         }
    5134              : 
    5135              :       /* Compute index to start of area used for integer regs.  */
    5136        28735 :       if (needed_intregs)
    5137              :         {
    5138              :           /* int_addr = gpr + sav; */
    5139        18014 :           t = fold_build_pointer_plus (sav, gpr);
    5140        18014 :           gimplify_assign (int_addr, t, pre_p);
    5141              :         }
    5142        28735 :       if (needed_sseregs)
    5143              :         {
    5144              :           /* sse_addr = fpr + sav; */
    5145        11113 :           t = fold_build_pointer_plus (sav, fpr);
    5146        11113 :           gimplify_assign (sse_addr, t, pre_p);
    5147              :         }
    5148        28735 :       if (need_temp)
    5149              :         {
    5150          877 :           int i, prev_size = 0;
    5151          877 :           tree temp = create_tmp_var (type, "va_arg_tmp");
    5152          877 :           TREE_ADDRESSABLE (temp) = 1;
    5153              : 
    5154              :           /* addr = &temp; */
    5155          877 :           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
    5156          877 :           gimplify_assign (addr, t, pre_p);
    5157              : 
    5158         2241 :           for (i = 0; i < XVECLEN (container, 0); i++)
    5159              :             {
    5160         1364 :               rtx slot = XVECEXP (container, 0, i);
    5161         1364 :               rtx reg = XEXP (slot, 0);
    5162         1364 :               machine_mode mode = GET_MODE (reg);
    5163         1364 :               tree piece_type;
    5164         1364 :               tree addr_type;
    5165         1364 :               tree daddr_type;
    5166         1364 :               tree src_addr, src;
    5167         1364 :               int src_offset;
    5168         1364 :               tree dest_addr, dest;
    5169         1364 :               int cur_size = GET_MODE_SIZE (mode);
    5170              : 
    5171         1364 :               gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
    5172         1364 :               prev_size = INTVAL (XEXP (slot, 1));
    5173         1364 :               if (prev_size + cur_size > size)
    5174              :                 {
    5175           30 :                   cur_size = size - prev_size;
    5176           30 :                   unsigned int nbits = cur_size * BITS_PER_UNIT;
    5177           30 :                   if (!int_mode_for_size (nbits, 1).exists (&mode))
    5178           10 :                     mode = QImode;
    5179              :                 }
    5180         1364 :               piece_type = lang_hooks.types.type_for_mode (mode, 1);
    5181         1364 :               if (mode == GET_MODE (reg))
    5182         1334 :                 addr_type = build_pointer_type (piece_type);
    5183              :               else
    5184           30 :                 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5185              :                                                          true);
    5186         1364 :               daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5187              :                                                         true);
    5188              : 
    5189         1364 :               if (SSE_REGNO_P (REGNO (reg)))
    5190              :                 {
    5191          534 :                   src_addr = sse_addr;
    5192          534 :                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
    5193              :                 }
    5194              :               else
    5195              :                 {
    5196          830 :                   src_addr = int_addr;
    5197          830 :                   src_offset = REGNO (reg) * 8;
    5198              :                 }
    5199         1364 :               src_addr = fold_convert (addr_type, src_addr);
    5200         1364 :               src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
    5201              : 
    5202         1364 :               dest_addr = fold_convert (daddr_type, addr);
    5203         1364 :               dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
    5204         2728 :               if (cur_size == GET_MODE_SIZE (mode))
    5205              :                 {
    5206         1354 :                   src = build_va_arg_indirect_ref (src_addr);
    5207         1354 :                   dest = build_va_arg_indirect_ref (dest_addr);
    5208              : 
    5209         1354 :                   gimplify_assign (dest, src, pre_p);
    5210              :                 }
    5211              :               else
    5212              :                 {
    5213           10 :                   tree copy
    5214           20 :                     = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
    5215              :                                        3, dest_addr, src_addr,
    5216           10 :                                        size_int (cur_size));
    5217           10 :                   gimplify_and_add (copy, pre_p);
    5218              :                 }
    5219         1364 :               prev_size += cur_size;
    5220              :             }
    5221              :         }
    5222              : 
    5223        28735 :       if (needed_intregs)
    5224              :         {
    5225        18014 :           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
    5226        18014 :                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
    5227        18014 :           gimplify_assign (gpr, t, pre_p);
    5228              :           /* The GPR save area guarantees only 8-byte alignment.  */
    5229        18014 :           if (!need_temp)
    5230        17210 :             type_align = MIN (type_align, 64);
    5231              :         }
    5232              : 
    5233        28735 :       if (needed_sseregs)
    5234              :         {
    5235        11113 :           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
    5236        11113 :                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
    5237        11113 :           gimplify_assign (unshare_expr (fpr), t, pre_p);
    5238              :         }
    5239              : 
    5240        28735 :       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
    5241              : 
    5242        28735 :       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
    5243              :     }
    5244              : 
    5245              :   /* ... otherwise out of the overflow area.  */
    5246              : 
    5247              :   /* When we align parameter on stack for caller, if the parameter
    5248              :      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
    5249              :      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
    5250              :      here with caller.  */
    5251        51828 :   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
    5252        51828 :   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
    5253              :     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
    5254              : 
    5255              :   /* Care for on-stack alignment if needed.  */
    5256        51828 :   if (arg_boundary <= 64 || size == 0)
    5257        34789 :     t = ovf;
    5258              :  else
    5259              :     {
    5260        17039 :       HOST_WIDE_INT align = arg_boundary / 8;
    5261        17039 :       t = fold_build_pointer_plus_hwi (ovf, align - 1);
    5262        17039 :       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
    5263        17039 :                   build_int_cst (TREE_TYPE (t), -align));
    5264              :     }
    5265              : 
    5266        51828 :   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
    5267        51828 :   gimplify_assign (addr, t, pre_p);
    5268              : 
    5269        51828 :   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
    5270        51828 :   gimplify_assign (unshare_expr (ovf), t, pre_p);
    5271              : 
    5272        51828 :   if (container)
    5273        28735 :     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
    5274              : 
    5275        51828 :   type = build_aligned_type (type, type_align);
    5276        51828 :   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
    5277        51828 :   addr = fold_convert (ptrtype, addr);
    5278              : 
    5279        51828 :   if (indirect_p)
    5280          103 :     addr = build_va_arg_indirect_ref (addr);
    5281        51828 :   return build_va_arg_indirect_ref (addr);
    5282              : }
    5283              : 
    5284              : /* Return true if OPNUM's MEM should be matched
    5285              :    in movabs* patterns.  */
    5286              : 
    5287              : bool
    5288          480 : ix86_check_movabs (rtx insn, int opnum)
    5289              : {
    5290          480 :   rtx set, mem;
    5291              : 
    5292          480 :   set = PATTERN (insn);
    5293          480 :   if (GET_CODE (set) == PARALLEL)
    5294            0 :     set = XVECEXP (set, 0, 0);
    5295          480 :   gcc_assert (GET_CODE (set) == SET);
    5296          480 :   mem = XEXP (set, opnum);
    5297          480 :   while (SUBREG_P (mem))
    5298            0 :     mem = SUBREG_REG (mem);
    5299          480 :   gcc_assert (MEM_P (mem));
    5300          480 :   return volatile_ok || !MEM_VOLATILE_P (mem);
    5301              : }
    5302              : 
    5303              : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
    5304              : bool
    5305       222383 : ix86_check_movs (rtx insn, int idx)
    5306              : {
    5307       222383 :   rtx pat = PATTERN (insn);
    5308       222383 :   gcc_assert (GET_CODE (pat) == PARALLEL);
    5309              : 
    5310       222383 :   rtx set = XVECEXP (pat, 0, idx);
    5311       222383 :   gcc_assert (GET_CODE (set) == SET);
    5312              : 
    5313       222383 :   rtx dst = SET_DEST (set);
    5314       222383 :   gcc_assert (MEM_P (dst));
    5315              : 
    5316       222383 :   rtx src = SET_SRC (set);
    5317       222383 :   gcc_assert (MEM_P (src));
    5318              : 
    5319       222383 :   return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
    5320       444766 :           && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
    5321            0 :               || Pmode == word_mode));
    5322              : }
    5323              : 
    5324              : /* Return false if INSN contains a MEM with a non-default address space.  */
    5325              : bool
    5326        65420 : ix86_check_no_addr_space (rtx insn)
    5327              : {
    5328        65420 :   subrtx_var_iterator::array_type array;
    5329      1439692 :   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
    5330              :     {
    5331      1374272 :       rtx x = *iter;
    5332      1505112 :       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
    5333            0 :         return false;
    5334              :     }
    5335        65420 :   return true;
    5336        65420 : }
    5337              : 
    5338              : /* Initialize the table of extra 80387 mathematical constants.  */
    5339              : 
    5340              : static void
    5341         2345 : init_ext_80387_constants (void)
    5342              : {
    5343         2345 :   static const char * cst[5] =
    5344              :   {
    5345              :     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
    5346              :     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
    5347              :     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
    5348              :     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
    5349              :     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
    5350              :   };
    5351         2345 :   int i;
    5352              : 
    5353        14070 :   for (i = 0; i < 5; i++)
    5354              :     {
    5355        11725 :       real_from_string (&ext_80387_constants_table[i], cst[i]);
    5356              :       /* Ensure each constant is rounded to XFmode precision.  */
    5357        11725 :       real_convert (&ext_80387_constants_table[i],
    5358        23450 :                     XFmode, &ext_80387_constants_table[i]);
    5359              :     }
    5360              : 
    5361         2345 :   ext_80387_constants_init = 1;
    5362         2345 : }
    5363              : 
    5364              : /* Return non-zero if the constant is something that
    5365              :    can be loaded with a special instruction.  */
    5366              : 
    5367              : int
    5368      5034801 : standard_80387_constant_p (rtx x)
    5369              : {
    5370      5034801 :   machine_mode mode = GET_MODE (x);
    5371              : 
    5372      5034801 :   const REAL_VALUE_TYPE *r;
    5373              : 
    5374      5034801 :   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
    5375              :     return -1;
    5376              : 
    5377      4574524 :   if (x == CONST0_RTX (mode))
    5378              :     return 1;
    5379      2113290 :   if (x == CONST1_RTX (mode))
    5380              :     return 2;
    5381              : 
    5382      1229868 :   r = CONST_DOUBLE_REAL_VALUE (x);
    5383              : 
    5384              :   /* For XFmode constants, try to find a special 80387 instruction when
    5385              :      optimizing for size or on those CPUs that benefit from them.  */
    5386      1229868 :   if (mode == XFmode
    5387       796436 :       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
    5388      2026304 :       && !flag_rounding_math)
    5389              :     {
    5390       788386 :       int i;
    5391              : 
    5392       788386 :       if (! ext_80387_constants_init)
    5393         2338 :         init_ext_80387_constants ();
    5394              : 
    5395      4719794 :       for (i = 0; i < 5; i++)
    5396      3940249 :         if (real_identical (r, &ext_80387_constants_table[i]))
    5397         8841 :           return i + 3;
    5398              :     }
    5399              : 
    5400              :   /* Load of the constant -0.0 or -1.0 will be split as
    5401              :      fldz;fchs or fld1;fchs sequence.  */
    5402      1221027 :   if (real_isnegzero (r))
    5403              :     return 8;
    5404      1204537 :   if (real_identical (r, &dconstm1))
    5405       301825 :     return 9;
    5406              : 
    5407              :   return 0;
    5408              : }
    5409              : 
    5410              : /* Return the opcode of the special instruction to be used to load
    5411              :    the constant X.  */
    5412              : 
    5413              : const char *
    5414        54446 : standard_80387_constant_opcode (rtx x)
    5415              : {
    5416        54446 :   switch (standard_80387_constant_p (x))
    5417              :     {
    5418              :     case 1:
    5419              :       return "fldz";
    5420        33979 :     case 2:
    5421        33979 :       return "fld1";
    5422            1 :     case 3:
    5423            1 :       return "fldlg2";
    5424           10 :     case 4:
    5425           10 :       return "fldln2";
    5426           12 :     case 5:
    5427           12 :       return "fldl2e";
    5428            2 :     case 6:
    5429            2 :       return "fldl2t";
    5430          192 :     case 7:
    5431          192 :       return "fldpi";
    5432            0 :     case 8:
    5433            0 :     case 9:
    5434            0 :       return "#";
    5435            0 :     default:
    5436            0 :       gcc_unreachable ();
    5437              :     }
    5438              : }
    5439              : 
    5440              : /* Return the CONST_DOUBLE representing the 80387 constant that is
    5441              :    loaded by the specified special instruction.  The argument IDX
    5442              :    matches the return value from standard_80387_constant_p.  */
    5443              : 
    5444              : rtx
    5445           24 : standard_80387_constant_rtx (int idx)
    5446              : {
    5447           24 :   int i;
    5448              : 
    5449           24 :   if (! ext_80387_constants_init)
    5450            7 :     init_ext_80387_constants ();
    5451              : 
    5452           24 :   switch (idx)
    5453              :     {
    5454           24 :     case 3:
    5455           24 :     case 4:
    5456           24 :     case 5:
    5457           24 :     case 6:
    5458           24 :     case 7:
    5459           24 :       i = idx - 3;
    5460           24 :       break;
    5461              : 
    5462            0 :     default:
    5463            0 :       gcc_unreachable ();
    5464              :     }
    5465              : 
    5466           24 :   return const_double_from_real_value (ext_80387_constants_table[i],
    5467           24 :                                        XFmode);
    5468              : }
    5469              : 
    5470              : /* Return 1 if X is all bits 0, 2 if X is all bits 1
    5471              :    and 3 if X is all bits 1 with zero extend
    5472              :    in supported SSE/AVX vector mode.  */
    5473              : 
    5474              : int
    5475     55055238 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
    5476              : {
    5477     55055238 :   machine_mode mode;
    5478              : 
    5479     55055238 :   if (!TARGET_SSE)
    5480              :     return 0;
    5481              : 
    5482     54886207 :   mode = GET_MODE (x);
    5483              : 
    5484     54886207 :   if (x == const0_rtx || const0_operand (x, mode))
    5485     13105980 :     return 1;
    5486              : 
    5487     41780227 :   if (x == constm1_rtx
    5488     41640800 :       || vector_all_ones_operand (x, mode)
    5489     82853722 :       || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5490     34446074 :            || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
    5491      6628368 :           && float_vector_all_ones_operand (x, mode)))
    5492              :     {
    5493              :       /* VOIDmode integer constant, get mode from the predicate.  */
    5494       708823 :       if (mode == VOIDmode)
    5495       139427 :         mode = pred_mode;
    5496              : 
    5497      1417646 :       switch (GET_MODE_SIZE (mode))
    5498              :         {
    5499        30534 :         case 64:
    5500        30534 :           if (TARGET_AVX512F)
    5501              :             return 2;
    5502              :           break;
    5503        39939 :         case 32:
    5504        39939 :           if (TARGET_AVX2)
    5505              :             return 2;
    5506              :           break;
    5507       625957 :         case 16:
    5508       625957 :           if (TARGET_SSE2)
    5509              :             return 2;
    5510              :           break;
    5511            0 :         case 0:
    5512              :           /* VOIDmode */
    5513            0 :           gcc_unreachable ();
    5514              :         default:
    5515              :           break;
    5516              :         }
    5517              :     }
    5518              : 
    5519     41084728 :   if (vector_all_ones_zero_extend_half_operand (x, mode)
    5520     41084728 :       || vector_all_ones_zero_extend_quarter_operand (x, mode))
    5521          706 :     return 3;
    5522              : 
    5523              :   return 0;
    5524              : }
    5525              : 
    5526              : /* Return the opcode of the special instruction to be used to load
    5527              :    the constant operands[1] into operands[0].  */
    5528              : 
    5529              : const char *
    5530       464169 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
    5531              : {
    5532       464169 :   machine_mode mode;
    5533       464169 :   rtx x = operands[1];
    5534              : 
    5535       464169 :   gcc_assert (TARGET_SSE);
    5536              : 
    5537       464169 :   mode = GET_MODE (x);
    5538              : 
    5539       464169 :   if (x == const0_rtx || const0_operand (x, mode))
    5540              :     {
    5541       452524 :       switch (get_attr_mode (insn))
    5542              :         {
    5543       434865 :         case MODE_TI:
    5544       434865 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5545              :             return "%vpxor\t%0, %d0";
    5546              :           /* FALLTHRU */
    5547         6168 :         case MODE_XI:
    5548         6168 :         case MODE_OI:
    5549         6168 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5550              :             {
    5551           67 :               if (TARGET_AVX512VL)
    5552              :                 return "vpxord\t%x0, %x0, %x0";
    5553              :               else
    5554           28 :                 return "vpxord\t%g0, %g0, %g0";
    5555              :             }
    5556              :           return "vpxor\t%x0, %x0, %x0";
    5557              : 
    5558         2098 :         case MODE_V2DF:
    5559         2098 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5560              :             return "%vxorpd\t%0, %d0";
    5561              :           /* FALLTHRU */
    5562          829 :         case MODE_V8DF:
    5563          829 :         case MODE_V4DF:
    5564          829 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5565              :             {
    5566            4 :               if (TARGET_AVX512DQ)
    5567              :                 {
    5568            0 :                   if (TARGET_AVX512VL)
    5569              :                     return "vxorpd\t%x0, %x0, %x0";
    5570              :                   else
    5571            0 :                     return "vxorpd\t%g0, %g0, %g0";
    5572              :                 }
    5573              :               else
    5574              :                 {
    5575            4 :                   if (TARGET_AVX512VL)
    5576              :                     return "vpxorq\t%x0, %x0, %x0";
    5577              :                   else
    5578            4 :                     return "vpxorq\t%g0, %g0, %g0";
    5579              :                 }
    5580              :             }
    5581              :           return "vxorpd\t%x0, %x0, %x0";
    5582              : 
    5583         6617 :         case MODE_V4SF:
    5584         6617 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5585              :             return "%vxorps\t%0, %d0";
    5586              :           /* FALLTHRU */
    5587         2011 :         case MODE_V16SF:
    5588         2011 :         case MODE_V8SF:
    5589         2011 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5590              :             {
    5591           68 :               if (TARGET_AVX512DQ)
    5592              :                 {
    5593           26 :                   if (TARGET_AVX512VL)
    5594              :                     return "vxorps\t%x0, %x0, %x0";
    5595              :                   else
    5596            0 :                     return "vxorps\t%g0, %g0, %g0";
    5597              :                 }
    5598              :               else
    5599              :                 {
    5600           42 :                   if (TARGET_AVX512VL)
    5601              :                     return "vpxord\t%x0, %x0, %x0";
    5602              :                   else
    5603           40 :                     return "vpxord\t%g0, %g0, %g0";
    5604              :                 }
    5605              :             }
    5606              :           return "vxorps\t%x0, %x0, %x0";
    5607              : 
    5608            0 :         default:
    5609            0 :           gcc_unreachable ();
    5610              :         }
    5611              :     }
    5612        11645 :   else if (x == constm1_rtx
    5613        11634 :            || vector_all_ones_operand (x, mode)
    5614        11712 :            || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5615           45 :                && float_vector_all_ones_operand (x, mode)))
    5616              :     {
    5617        11623 :       enum attr_mode insn_mode = get_attr_mode (insn);
    5618              : 
    5619        11623 :       switch (insn_mode)
    5620              :         {
    5621            3 :         case MODE_XI:
    5622            3 :         case MODE_V8DF:
    5623            3 :         case MODE_V16SF:
    5624            3 :           gcc_assert (TARGET_AVX512F);
    5625              :           return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5626              : 
    5627          959 :         case MODE_OI:
    5628          959 :         case MODE_V4DF:
    5629          959 :         case MODE_V8SF:
    5630          959 :           gcc_assert (TARGET_AVX2);
    5631              :           /* FALLTHRU */
    5632        11620 :         case MODE_TI:
    5633        11620 :         case MODE_V2DF:
    5634        11620 :         case MODE_V4SF:
    5635        11620 :           gcc_assert (TARGET_SSE2);
    5636        11620 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5637              :             {
    5638            2 :               if (TARGET_AVX512VL)
    5639              :                 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
    5640              :               else
    5641            0 :                 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5642              :             }
    5643        11618 :           return (TARGET_AVX
    5644        11618 :                   ? "vpcmpeqd\t%0, %0, %0"
    5645        11618 :                   : "pcmpeqd\t%0, %0");
    5646              : 
    5647            0 :         default:
    5648            0 :           gcc_unreachable ();
    5649              :         }
    5650              :    }
    5651           22 :   else if (vector_all_ones_zero_extend_half_operand (x, mode))
    5652              :     {
    5653           40 :       if (GET_MODE_SIZE (mode) == 64)
    5654              :         {
    5655            5 :           gcc_assert (TARGET_AVX512F);
    5656              :           return "vpcmpeqd\t%t0, %t0, %t0";
    5657              :         }
    5658           30 :       else if (GET_MODE_SIZE (mode) == 32)
    5659              :         {
    5660           15 :           gcc_assert (TARGET_AVX);
    5661              :           return "vpcmpeqd\t%x0, %x0, %x0";
    5662              :         }
    5663            0 :       gcc_unreachable ();
    5664              :     }
    5665            2 :   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
    5666              :     {
    5667            2 :       gcc_assert (TARGET_AVX512F);
    5668              :       return "vpcmpeqd\t%x0, %x0, %x0";
    5669              :     }
    5670              : 
    5671            0 :   gcc_unreachable ();
    5672              : }
    5673              : 
    5674              : /* Returns true if INSN can be transformed from a memory load
    5675              :    to a supported FP constant load.  */
    5676              : 
    5677              : bool
    5678      2144820 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
    5679              : {
    5680      2144820 :   rtx src = find_constant_src (insn);
    5681              : 
    5682      2144820 :   gcc_assert (REG_P (dst));
    5683              : 
    5684      2144820 :   if (src == NULL
    5685       595991 :       || (SSE_REGNO_P (REGNO (dst))
    5686       463968 :           && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
    5687       165919 :       || (!TARGET_AVX512VL
    5688       165858 :           && EXT_REX_SSE_REGNO_P (REGNO (dst))
    5689            0 :           && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
    5690      2310739 :       || (STACK_REGNO_P (REGNO (dst))
    5691       132023 :            && standard_80387_constant_p (src) < 1))
    5692      2068335 :     return false;
    5693              : 
    5694              :   return true;
    5695              : }
    5696              : 
    5697              : /* Predicate for pre-reload splitters with associated instructions,
    5698              :    which can match any time before the split1 pass (usually combine),
    5699              :    then are unconditionally split in that pass and should not be
    5700              :    matched again afterwards.  */
    5701              : 
    5702              : bool
    5703     17909070 : ix86_pre_reload_split (void)
    5704              : {
    5705     17909070 :   return (can_create_pseudo_p ()
    5706     27315368 :           && !(cfun->curr_properties & PROP_rtl_split_insns));
    5707              : }
    5708              : 
    5709              : /* Return the opcode of the TYPE_SSEMOV instruction.  To move from
    5710              :    or to xmm16-xmm31/ymm16-ymm31 registers, we either require
    5711              :    TARGET_AVX512VL or it is a register to register move which can
    5712              :    be done with zmm register move. */
    5713              : 
    5714              : static const char *
    5715      4188696 : ix86_get_ssemov (rtx *operands, unsigned size,
    5716              :                  enum attr_mode insn_mode, machine_mode mode)
    5717              : {
    5718      4188696 :   char buf[128];
    5719      4188696 :   bool misaligned_p = (misaligned_operand (operands[0], mode)
    5720      4188696 :                        || misaligned_operand (operands[1], mode));
    5721      4188696 :   bool evex_reg_p = (size == 64
    5722      4102318 :                      || EXT_REX_SSE_REG_P (operands[0])
    5723      8290273 :                      || EXT_REX_SSE_REG_P (operands[1]));
    5724              : 
    5725      4188696 :   bool egpr_p = (TARGET_APX_EGPR
    5726      4188696 :                  && (x86_extended_rex2reg_mentioned_p (operands[0])
    5727          183 :                      || x86_extended_rex2reg_mentioned_p (operands[1])));
    5728          196 :   bool egpr_vl = egpr_p && TARGET_AVX512VL;
    5729              : 
    5730      4188696 :   machine_mode scalar_mode;
    5731              : 
    5732      4188696 :   const char *opcode = NULL;
    5733      4188696 :   enum
    5734              :     {
    5735              :       opcode_int,
    5736              :       opcode_float,
    5737              :       opcode_double
    5738      4188696 :     } type = opcode_int;
    5739              : 
    5740      4188696 :   switch (insn_mode)
    5741              :     {
    5742              :     case MODE_V16SF:
    5743              :     case MODE_V8SF:
    5744              :     case MODE_V4SF:
    5745              :       scalar_mode = E_SFmode;
    5746              :       type = opcode_float;
    5747              :       break;
    5748       209257 :     case MODE_V8DF:
    5749       209257 :     case MODE_V4DF:
    5750       209257 :     case MODE_V2DF:
    5751       209257 :       scalar_mode = E_DFmode;
    5752       209257 :       type = opcode_double;
    5753       209257 :       break;
    5754      1520081 :     case MODE_XI:
    5755      1520081 :     case MODE_OI:
    5756      1520081 :     case MODE_TI:
    5757      1520081 :       scalar_mode = GET_MODE_INNER (mode);
    5758              :       break;
    5759            0 :     default:
    5760            0 :       gcc_unreachable ();
    5761              :     }
    5762              : 
    5763              :   /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
    5764              :      we can only use zmm register move without memory operand.  */
    5765      4188696 :   if (evex_reg_p
    5766        88427 :       && !TARGET_AVX512VL
    5767      4238817 :       && GET_MODE_SIZE (mode) < 64)
    5768              :     {
    5769              :       /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
    5770              :          xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
    5771              :          AVX512VL is disabled, LRA can still generate reg to
    5772              :          reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
    5773              :          modes.  */
    5774            0 :       if (memory_operand (operands[0], mode)
    5775            0 :           || memory_operand (operands[1], mode))
    5776            0 :         gcc_unreachable ();
    5777            0 :       size = 64;
    5778            0 :       switch (type)
    5779              :         {
    5780            0 :         case opcode_int:
    5781            0 :           if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
    5782            0 :             opcode = (misaligned_p
    5783            0 :                       ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
    5784              :                       : "vmovdqa64");
    5785              :           else
    5786            0 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5787              :           break;
    5788            0 :         case opcode_float:
    5789            0 :           opcode = misaligned_p ? "vmovups" : "vmovaps";
    5790              :           break;
    5791            0 :         case opcode_double:
    5792            0 :           opcode = misaligned_p ? "vmovupd" : "vmovapd";
    5793              :           break;
    5794              :         }
    5795              :     }
    5796      4188696 :   else if (SCALAR_FLOAT_MODE_P (scalar_mode))
    5797              :     {
    5798      2844557 :       switch (scalar_mode)
    5799              :         {
    5800        36750 :         case E_HFmode:
    5801        36750 :         case E_BFmode:
    5802        36750 :           if (evex_reg_p || egpr_vl)
    5803        11597 :             opcode = (misaligned_p
    5804          173 :                       ? (TARGET_AVX512BW
    5805              :                          ? "vmovdqu16"
    5806              :                          : "vmovdqu64")
    5807              :                       : "vmovdqa64");
    5808        25153 :           else if (egpr_p)
    5809       803435 :             opcode = (misaligned_p
    5810            0 :                       ? (TARGET_AVX512BW
    5811            0 :                          ? "vmovdqu16"
    5812              :                          : "%vmovups")
    5813              :                       : "%vmovaps");
    5814              :           else
    5815       425679 :             opcode = (misaligned_p
    5816        25153 :                       ? (TARGET_AVX512BW && evex_reg_p
    5817              :                          ? "vmovdqu16"
    5818              :                          : "%vmovdqu")
    5819              :                       : "%vmovdqa");
    5820              :           break;
    5821      2459358 :         case E_SFmode:
    5822      2459358 :           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5823              :           break;
    5824       209257 :         case E_DFmode:
    5825       209257 :           opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
    5826              :           break;
    5827       139192 :         case E_TFmode:
    5828       139192 :           if (evex_reg_p || egpr_vl)
    5829           14 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5830       139178 :           else if (egpr_p)
    5831            0 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5832              :           else
    5833       139178 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5834              :           break;
    5835            0 :         default:
    5836            0 :           gcc_unreachable ();
    5837              :         }
    5838              :     }
    5839      1344139 :   else if (SCALAR_INT_MODE_P (scalar_mode))
    5840              :     {
    5841      1344139 :       switch (scalar_mode)
    5842              :         {
    5843       106732 :         case E_QImode:
    5844       106732 :           if (evex_reg_p || egpr_vl)
    5845      4198907 :             opcode = (misaligned_p
    5846        10211 :                       ? (TARGET_AVX512BW
    5847         5071 :                          ? "vmovdqu8"
    5848              :                          : "vmovdqu64")
    5849              :                       : "vmovdqa64");
    5850        96521 :           else if (egpr_p)
    5851           30 :             opcode = (misaligned_p
    5852            0 :                       ? (TARGET_AVX512BW
    5853              :                          ? "vmovdqu8"
    5854              :                          : "%vmovups")
    5855              :                       : "%vmovaps");
    5856              :           else
    5857        96491 :             opcode = (misaligned_p
    5858              :                       ? (TARGET_AVX512BW && evex_reg_p
    5859              :                          ? "vmovdqu8"
    5860              :                          : "%vmovdqu")
    5861              :                       : "%vmovdqa");
    5862              :           break;
    5863        42594 :         case E_HImode:
    5864        42594 :           if (evex_reg_p || egpr_vl)
    5865         3716 :             opcode = (misaligned_p
    5866          300 :                       ? (TARGET_AVX512BW
    5867              :                          ? "vmovdqu16"
    5868              :                          : "vmovdqu64")
    5869              :                       : "vmovdqa64");
    5870        38878 :           else if (egpr_p)
    5871       803435 :             opcode = (misaligned_p
    5872           27 :                       ? (TARGET_AVX512BW
    5873            0 :                          ? "vmovdqu16"
    5874              :                          : "%vmovups")
    5875              :                       : "%vmovaps");
    5876              :           else
    5877       400526 :             opcode = (misaligned_p
    5878        38851 :                       ? (TARGET_AVX512BW && evex_reg_p
    5879              :                          ? "vmovdqu16"
    5880              :                          : "%vmovdqu")
    5881              :                       : "%vmovdqa");
    5882              :           break;
    5883       182029 :         case E_SImode:
    5884       182029 :           if (evex_reg_p || egpr_vl)
    5885         8200 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5886       173829 :           else if (egpr_p)
    5887           14 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5888              :           else
    5889       173815 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5890              :           break;
    5891      1000976 :         case E_DImode:
    5892      1000976 :         case E_TImode:
    5893      1000976 :         case E_OImode:
    5894      1000976 :           if (evex_reg_p || egpr_vl)
    5895        18500 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5896       982476 :           else if (egpr_p)
    5897           26 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5898              :           else
    5899       982450 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5900              :           break;
    5901        11808 :         case E_XImode:
    5902        49491 :           opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5903              :           break;
    5904            0 :         default:
    5905            0 :           gcc_unreachable ();
    5906              :         }
    5907              :     }
    5908              :   else
    5909            0 :     gcc_unreachable ();
    5910              : 
    5911      4188696 :   switch (size)
    5912              :     {
    5913        86378 :     case 64:
    5914        86378 :       snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
    5915              :                 opcode);
    5916        86378 :       break;
    5917        91643 :     case 32:
    5918        91643 :       snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
    5919              :                 opcode);
    5920        91643 :       break;
    5921      4010675 :     case 16:
    5922      4010675 :       snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
    5923              :                 opcode);
    5924      4010675 :       break;
    5925            0 :     default:
    5926            0 :       gcc_unreachable ();
    5927              :     }
    5928      4188696 :   output_asm_insn (buf, operands);
    5929      4188696 :   return "";
    5930              : }
    5931              : 
    5932              : /* Return the template of the TYPE_SSEMOV instruction to move
    5933              :    operands[1] into operands[0].  */
    5934              : 
    5935              : const char *
    5936      6565236 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
    5937              : {
    5938      6565236 :   machine_mode mode = GET_MODE (operands[0]);
    5939      6565236 :   if (get_attr_type (insn) != TYPE_SSEMOV
    5940      6565236 :       || mode != GET_MODE (operands[1]))
    5941            0 :     gcc_unreachable ();
    5942              : 
    5943      6565236 :   enum attr_mode insn_mode = get_attr_mode (insn);
    5944              : 
    5945      6565236 :   switch (insn_mode)
    5946              :     {
    5947        86378 :     case MODE_XI:
    5948        86378 :     case MODE_V8DF:
    5949        86378 :     case MODE_V16SF:
    5950        86378 :       return ix86_get_ssemov (operands, 64, insn_mode, mode);
    5951              : 
    5952        91643 :     case MODE_OI:
    5953        91643 :     case MODE_V4DF:
    5954        91643 :     case MODE_V8SF:
    5955        91643 :       return ix86_get_ssemov (operands, 32, insn_mode, mode);
    5956              : 
    5957      4010675 :     case MODE_TI:
    5958      4010675 :     case MODE_V2DF:
    5959      4010675 :     case MODE_V4SF:
    5960      4010675 :       return ix86_get_ssemov (operands, 16, insn_mode, mode);
    5961              : 
    5962       662429 :     case MODE_DI:
    5963              :       /* Handle broken assemblers that require movd instead of movq. */
    5964       662429 :       if (GENERAL_REG_P (operands[0]))
    5965              :         {
    5966              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5967              :             return "%vmovq\t{%1, %q0|%q0, %1}";
    5968              :           else
    5969              :             return "%vmovd\t{%1, %q0|%q0, %1}";
    5970              :         }
    5971       586224 :       else if (GENERAL_REG_P (operands[1]))
    5972              :         {
    5973              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5974              :             return "%vmovq\t{%q1, %0|%0, %q1}";
    5975              :           else
    5976              :             return "%vmovd\t{%q1, %0|%0, %q1}";
    5977              :         }
    5978              :       else
    5979       420082 :         return "%vmovq\t{%1, %0|%0, %1}";
    5980              : 
    5981       201352 :     case MODE_SI:
    5982       201352 :       if (GENERAL_REG_P (operands[0]))
    5983              :         return "%vmovd\t{%1, %k0|%k0, %1}";
    5984       145341 :       else if (GENERAL_REG_P (operands[1]))
    5985              :         return "%vmovd\t{%k1, %0|%0, %k1}";
    5986              :       else
    5987        60662 :         return "%vmovd\t{%1, %0|%0, %1}";
    5988              : 
    5989        54085 :     case MODE_HI:
    5990        54085 :       if (GENERAL_REG_P (operands[0]))
    5991              :         return "vmovw\t{%1, %k0|%k0, %1}";
    5992        53922 :       else if (GENERAL_REG_P (operands[1]))
    5993              :         return "vmovw\t{%k1, %0|%0, %k1}";
    5994              :       else
    5995        53688 :         return "vmovw\t{%1, %0|%0, %1}";
    5996              : 
    5997       780993 :     case MODE_DF:
    5998       780993 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5999              :         return "vmovsd\t{%d1, %0|%0, %d1}";
    6000              :       else
    6001       780170 :         return "%vmovsd\t{%1, %0|%0, %1}";
    6002              : 
    6003       673614 :     case MODE_SF:
    6004       673614 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    6005              :         return "vmovss\t{%d1, %0|%0, %d1}";
    6006              :       else
    6007       673046 :         return "%vmovss\t{%1, %0|%0, %1}";
    6008              : 
    6009           96 :     case MODE_HF:
    6010           96 :     case MODE_BF:
    6011           96 :       if (REG_P (operands[0]) && REG_P (operands[1]))
    6012              :         return "vmovsh\t{%d1, %0|%0, %d1}";
    6013              :       else
    6014            0 :         return "vmovsh\t{%1, %0|%0, %1}";
    6015              : 
    6016           36 :     case MODE_V1DF:
    6017           36 :       gcc_assert (!TARGET_AVX);
    6018              :       return "movlpd\t{%1, %0|%0, %1}";
    6019              : 
    6020         3935 :     case MODE_V2SF:
    6021         3935 :       if (TARGET_AVX && REG_P (operands[0]))
    6022              :         return "vmovlps\t{%1, %d0|%d0, %1}";
    6023              :       else
    6024         3862 :         return "%vmovlps\t{%1, %0|%0, %1}";
    6025              : 
    6026            0 :     default:
    6027            0 :       gcc_unreachable ();
    6028              :     }
    6029              : }
    6030              : 
    6031              : /* Returns true if OP contains a symbol reference */
    6032              : 
    6033              : bool
    6034    583867066 : symbolic_reference_mentioned_p (const_rtx op)
    6035              : {
    6036    583867066 :   const char *fmt;
    6037    583867066 :   int i;
    6038              : 
    6039    583867066 :   if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
    6040              :     return true;
    6041              : 
    6042    441335511 :   fmt = GET_RTX_FORMAT (GET_CODE (op));
    6043    748818725 :   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
    6044              :     {
    6045    597460605 :       if (fmt[i] == 'E')
    6046              :         {
    6047      2020273 :           int j;
    6048              : 
    6049      4041105 :           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
    6050      3327260 :             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
    6051              :               return true;
    6052              :         }
    6053              : 
    6054    595440332 :       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
    6055              :         return true;
    6056              :     }
    6057              : 
    6058              :   return false;
    6059              : }
    6060              : 
    6061              : /* Return true if it is appropriate to emit `ret' instructions in the
    6062              :    body of a function.  Do this only if the epilogue is simple, needing a
    6063              :    couple of insns.  Prior to reloading, we can't tell how many registers
    6064              :    must be saved, so return false then.  Return false if there is no frame
    6065              :    marker to de-allocate.  */
    6066              : 
    6067              : bool
    6068            0 : ix86_can_use_return_insn_p (void)
    6069              : {
    6070            0 :   if (ix86_function_ms_hook_prologue (current_function_decl))
    6071              :     return false;
    6072              : 
    6073            0 :   if (ix86_function_naked (current_function_decl))
    6074              :     return false;
    6075              : 
    6076              :   /* Don't use `ret' instruction in interrupt handler.  */
    6077            0 :   if (! reload_completed
    6078            0 :       || frame_pointer_needed
    6079            0 :       || cfun->machine->func_type != TYPE_NORMAL)
    6080              :     return 0;
    6081              : 
    6082              :   /* Don't allow more than 32k pop, since that's all we can do
    6083              :      with one instruction.  */
    6084            0 :   if (crtl->args.pops_args && crtl->args.size >= 32768)
    6085              :     return 0;
    6086              : 
    6087            0 :   struct ix86_frame &frame = cfun->machine->frame;
    6088            0 :   return (frame.stack_pointer_offset == UNITS_PER_WORD
    6089            0 :           && (frame.nregs + frame.nsseregs) == 0);
    6090              : }
    6091              : 
    6092              : /* Return stack frame size.  get_frame_size () returns used stack slots
    6093              :    during compilation, which may be optimized out later.  If stack frame
    6094              :    is needed, stack_frame_required should be true.  */
    6095              : 
    6096              : static HOST_WIDE_INT
    6097      8243772 : ix86_get_frame_size (void)
    6098              : {
    6099      8243772 :   if (cfun->machine->stack_frame_required)
    6100      8174415 :     return get_frame_size ();
    6101              :   else
    6102              :     return 0;
    6103              : }
    6104              : 
    6105              : /* Value should be nonzero if functions must have frame pointers.
    6106              :    Zero means the frame pointer need not be set up (and parms may
    6107              :    be accessed via the stack pointer) in functions that seem suitable.  */
    6108              : 
    6109              : static bool
    6110      1227961 : ix86_frame_pointer_required (void)
    6111              : {
    6112              :   /* If we accessed previous frames, then the generated code expects
    6113              :      to be able to access the saved ebp value in our frame.  */
    6114      1227961 :   if (cfun->machine->accesses_prev_frame)
    6115              :     return true;
    6116              : 
    6117              :   /* Several x86 os'es need a frame pointer for other reasons,
    6118              :      usually pertaining to setjmp.  */
    6119      1227928 :   if (SUBTARGET_FRAME_POINTER_REQUIRED)
    6120              :     return true;
    6121              : 
    6122              :   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
    6123      1227928 :   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
    6124              :     return true;
    6125              : 
    6126              :   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
    6127              :      allocation is 4GB.  */
    6128      1227928 :   if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
    6129              :     return true;
    6130              : 
    6131              :   /* SSE saves require frame-pointer when stack is misaligned.  */
    6132      1227928 :   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
    6133              :     return true;
    6134              : 
    6135              :   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
    6136              :      turns off the frame pointer by default.  Turn it back on now if
    6137              :      we've not got a leaf function.  */
    6138      1227927 :   if (TARGET_OMIT_LEAF_FRAME_POINTER
    6139      1227927 :       && (!crtl->is_leaf
    6140            0 :           || ix86_current_function_calls_tls_descriptor))
    6141            0 :     return true;
    6142              : 
    6143              :   /* Several versions of mcount for the x86 assumes that there is a
    6144              :      frame, so we cannot allow profiling without a frame pointer.  */
    6145      1227927 :   if (crtl->profile && !flag_fentry)
    6146              :     return true;
    6147              : 
    6148              :   return false;
    6149              : }
    6150              : 
    6151              : /* Record that the current function accesses previous call frames.  */
    6152              : 
    6153              : void
    6154          966 : ix86_setup_frame_addresses (void)
    6155              : {
    6156          966 :   cfun->machine->accesses_prev_frame = 1;
    6157          966 : }
    6158              : 
    6159              : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
    6160              : # define USE_HIDDEN_LINKONCE 1
    6161              : #else
    6162              : # define USE_HIDDEN_LINKONCE 0
    6163              : #endif
    6164              : 
    6165              : /* Label count for call and return thunks.  It is used to make unique
    6166              :    labels in call and return thunks.  */
    6167              : static int indirectlabelno;
    6168              : 
    6169              : /* True if call thunk function is needed.  */
    6170              : static bool indirect_thunk_needed = false;
    6171              : 
    6172              : /* Bit masks of integer registers, which contain branch target, used
    6173              :    by call thunk functions.  */
    6174              : static HARD_REG_SET indirect_thunks_used;
    6175              : 
    6176              : /* True if return thunk function is needed.  */
    6177              : static bool indirect_return_needed = false;
    6178              : 
    6179              : /* True if return thunk function via CX is needed.  */
    6180              : static bool indirect_return_via_cx;
    6181              : 
    6182              : #ifndef INDIRECT_LABEL
    6183              : # define INDIRECT_LABEL "LIND"
    6184              : #endif
    6185              : 
    6186              : /* Indicate what prefix is needed for an indirect branch.  */
    6187              : enum indirect_thunk_prefix
    6188              : {
    6189              :   indirect_thunk_prefix_none,
    6190              :   indirect_thunk_prefix_nt
    6191              : };
    6192              : 
    6193              : /* Return the prefix needed for an indirect branch INSN.  */
    6194              : 
    6195              : enum indirect_thunk_prefix
    6196           67 : indirect_thunk_need_prefix (rtx_insn *insn)
    6197              : {
    6198           67 :   enum indirect_thunk_prefix need_prefix;
    6199           67 :   if ((cfun->machine->indirect_branch_type
    6200           67 :             == indirect_branch_thunk_extern)
    6201           67 :            && ix86_notrack_prefixed_insn_p (insn))
    6202              :     {
    6203              :       /* NOTRACK prefix is only used with external thunk so that it
    6204              :          can be properly updated to support CET at run-time.  */
    6205              :       need_prefix = indirect_thunk_prefix_nt;
    6206              :     }
    6207              :   else
    6208              :     need_prefix = indirect_thunk_prefix_none;
    6209           67 :   return need_prefix;
    6210              : }
    6211              : 
    6212              : /* Fills in the label name that should be used for the indirect thunk.  */
    6213              : 
    6214              : static void
    6215           73 : indirect_thunk_name (char name[32], unsigned int regno,
    6216              :                      enum indirect_thunk_prefix need_prefix,
    6217              :                      bool ret_p)
    6218              : {
    6219           73 :   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
    6220            0 :     gcc_unreachable ();
    6221              : 
    6222           73 :   if (USE_HIDDEN_LINKONCE)
    6223              :     {
    6224           73 :       const char *prefix;
    6225              : 
    6226           73 :       if (need_prefix == indirect_thunk_prefix_nt
    6227           73 :           && regno != INVALID_REGNUM)
    6228              :         {
    6229              :           /* NOTRACK prefix is only used with external thunk via
    6230              :              register so that NOTRACK prefix can be added to indirect
    6231              :              branch via register to support CET at run-time.  */
    6232              :           prefix = "_nt";
    6233              :         }
    6234              :       else
    6235           71 :         prefix = "";
    6236              : 
    6237           73 :       const char *ret = ret_p ? "return" : "indirect";
    6238              : 
    6239           73 :       if (regno != INVALID_REGNUM)
    6240              :         {
    6241           55 :           const char *reg_prefix;
    6242           55 :           if (LEGACY_INT_REGNO_P (regno))
    6243           53 :             reg_prefix = TARGET_64BIT ? "r" : "e";
    6244              :           else
    6245              :             reg_prefix = "";
    6246           55 :           sprintf (name, "__x86_%s_thunk%s_%s%s",
    6247              :                    ret, prefix, reg_prefix, reg_names[regno]);
    6248              :         }
    6249              :       else
    6250           18 :         sprintf (name, "__x86_%s_thunk%s", ret, prefix);
    6251              :     }
    6252              :   else
    6253              :     {
    6254              :       if (regno != INVALID_REGNUM)
    6255              :         ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
    6256              :       else
    6257              :         {
    6258              :           if (ret_p)
    6259              :             ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
    6260              :           else
    6261           73 :             ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
    6262              :         }
    6263              :     }
    6264           73 : }
    6265              : 
    6266              : /* Output a call and return thunk for indirect branch.  If REGNO != -1,
    6267              :    the function address is in REGNO and the call and return thunk looks like:
    6268              : 
    6269              :         call    L2
    6270              :    L1:
    6271              :         pause
    6272              :         lfence
    6273              :         jmp     L1
    6274              :    L2:
    6275              :         mov     %REG, (%sp)
    6276              :         ret
    6277              : 
    6278              :    Otherwise, the function address is on the top of stack and the
    6279              :    call and return thunk looks like:
    6280              : 
    6281              :         call L2
    6282              :   L1:
    6283              :         pause
    6284              :         lfence
    6285              :         jmp L1
    6286              :   L2:
    6287              :         lea WORD_SIZE(%sp), %sp
    6288              :         ret
    6289              :  */
    6290              : 
    6291              : static void
    6292           38 : output_indirect_thunk (unsigned int regno)
    6293              : {
    6294           38 :   char indirectlabel1[32];
    6295           38 :   char indirectlabel2[32];
    6296              : 
    6297           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
    6298              :                                indirectlabelno++);
    6299           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
    6300              :                                indirectlabelno++);
    6301              : 
    6302              :   /* Call */
    6303           38 :   fputs ("\tcall\t", asm_out_file);
    6304           38 :   assemble_name_raw (asm_out_file, indirectlabel2);
    6305           38 :   fputc ('\n', asm_out_file);
    6306              : 
    6307           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
    6308              : 
    6309              :   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
    6310              :      Usage of both pause + lfence is compromise solution.  */
    6311           38 :   fprintf (asm_out_file, "\tpause\n\tlfence\n");
    6312              : 
    6313              :   /* Jump.  */
    6314           38 :   fputs ("\tjmp\t", asm_out_file);
    6315           38 :   assemble_name_raw (asm_out_file, indirectlabel1);
    6316           38 :   fputc ('\n', asm_out_file);
    6317              : 
    6318           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
    6319              : 
    6320              :   /* The above call insn pushed a word to stack.  Adjust CFI info.  */
    6321           38 :   if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
    6322              :     {
    6323           38 :       if (! dwarf2out_do_cfi_asm ())
    6324              :         {
    6325            0 :           dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6326            0 :           xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
    6327            0 :           xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
    6328            0 :           vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6329              :         }
    6330           38 :       dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6331           38 :       xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
    6332           38 :       xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
    6333           38 :       vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6334           38 :       dwarf2out_emit_cfi (xcfi);
    6335              :     }
    6336              : 
    6337           38 :   if (regno != INVALID_REGNUM)
    6338              :     {
    6339              :       /* MOV.  */
    6340           27 :       rtx xops[2];
    6341           27 :       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
    6342           27 :       xops[1] = gen_rtx_REG (word_mode, regno);
    6343           27 :       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
    6344              :     }
    6345              :   else
    6346              :     {
    6347              :       /* LEA.  */
    6348           11 :       rtx xops[2];
    6349           11 :       xops[0] = stack_pointer_rtx;
    6350           11 :       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    6351           11 :       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
    6352              :     }
    6353              : 
    6354           38 :   fputs ("\tret\n", asm_out_file);
    6355           38 :   if ((ix86_harden_sls & harden_sls_return))
    6356            1 :     fputs ("\tint3\n", asm_out_file);
    6357           38 : }
    6358              : 
    6359              : /* Output a funtion with a call and return thunk for indirect branch.
    6360              :    If REGNO != INVALID_REGNUM, the function address is in REGNO.
    6361              :    Otherwise, the function address is on the top of stack.  Thunk is
    6362              :    used for function return if RET_P is true.  */
    6363              : 
    6364              : static void
    6365           22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
    6366              :                                 unsigned int regno, bool ret_p)
    6367              : {
    6368           22 :   char name[32];
    6369           22 :   tree decl;
    6370              : 
    6371              :   /* Create __x86_indirect_thunk.  */
    6372           22 :   indirect_thunk_name (name, regno, need_prefix, ret_p);
    6373           22 :   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6374              :                      get_identifier (name),
    6375              :                      build_function_type_list (void_type_node, NULL_TREE));
    6376           22 :   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6377              :                                    NULL_TREE, void_type_node);
    6378           22 :   TREE_PUBLIC (decl) = 1;
    6379           22 :   TREE_STATIC (decl) = 1;
    6380           22 :   DECL_IGNORED_P (decl) = 1;
    6381              : 
    6382              : #if TARGET_MACHO
    6383              :   if (TARGET_MACHO)
    6384              :     {
    6385              :       switch_to_section (darwin_sections[picbase_thunk_section]);
    6386              :       fputs ("\t.weak_definition\t", asm_out_file);
    6387              :       assemble_name (asm_out_file, name);
    6388              :       fputs ("\n\t.private_extern\t", asm_out_file);
    6389              :       assemble_name (asm_out_file, name);
    6390              :       putc ('\n', asm_out_file);
    6391              :       ASM_OUTPUT_LABEL (asm_out_file, name);
    6392              :       DECL_WEAK (decl) = 1;
    6393              :     }
    6394              :   else
    6395              : #endif
    6396           22 :     if (USE_HIDDEN_LINKONCE)
    6397              :       {
    6398           22 :         cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6399              : 
    6400           22 :         targetm.asm_out.unique_section (decl, 0);
    6401           22 :         switch_to_section (get_named_section (decl, NULL, 0));
    6402              : 
    6403           22 :         targetm.asm_out.globalize_label (asm_out_file, name);
    6404           22 :         fputs ("\t.hidden\t", asm_out_file);
    6405           22 :         assemble_name (asm_out_file, name);
    6406           22 :         putc ('\n', asm_out_file);
    6407           22 :         ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6408              :       }
    6409              :     else
    6410              :       {
    6411              :         switch_to_section (text_section);
    6412           22 :         ASM_OUTPUT_LABEL (asm_out_file, name);
    6413              :       }
    6414              : 
    6415           22 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6416           22 :   current_function_decl = decl;
    6417           22 :   allocate_struct_function (decl, false);
    6418           22 :   init_function_start (decl);
    6419              :   /* We're about to hide the function body from callees of final_* by
    6420              :      emitting it directly; tell them we're a thunk, if they care.  */
    6421           22 :   cfun->is_thunk = true;
    6422           22 :   first_function_block_is_cold = false;
    6423              :   /* Make sure unwind info is emitted for the thunk if needed.  */
    6424           22 :   final_start_function (emit_barrier (), asm_out_file, 1);
    6425              : 
    6426           22 :   output_indirect_thunk (regno);
    6427              : 
    6428           22 :   final_end_function ();
    6429           22 :   init_insn_lengths ();
    6430           22 :   free_after_compilation (cfun);
    6431           22 :   set_cfun (NULL);
    6432           22 :   current_function_decl = NULL;
    6433           22 : }
    6434              : 
    6435              : static int pic_labels_used;
    6436              : 
    6437              : /* Fills in the label name that should be used for a pc thunk for
    6438              :    the given register.  */
    6439              : 
    6440              : static void
    6441        37452 : get_pc_thunk_name (char name[32], unsigned int regno)
    6442              : {
    6443        37452 :   gcc_assert (!TARGET_64BIT);
    6444              : 
    6445        37452 :   if (USE_HIDDEN_LINKONCE)
    6446        37452 :     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
    6447              :   else
    6448        37452 :     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
    6449        37452 : }
    6450              : 
    6451              : 
    6452              : /* This function generates code for -fpic that loads %ebx with
    6453              :    the return address of the caller and then returns.  */
    6454              : 
    6455              : static void
    6456       232630 : ix86_code_end (void)
    6457              : {
    6458       232630 :   rtx xops[2];
    6459       232630 :   unsigned int regno;
    6460              : 
    6461       232630 :   if (indirect_return_needed)
    6462            6 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6463              :                                     INVALID_REGNUM, true);
    6464       232630 :   if (indirect_return_via_cx)
    6465            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6466              :                                     CX_REG, true);
    6467       232630 :   if (indirect_thunk_needed)
    6468            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6469              :                                     INVALID_REGNUM, false);
    6470              : 
    6471      2093670 :   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
    6472              :     {
    6473      1861040 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6474            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6475              :                                         regno, false);
    6476              :     }
    6477              : 
    6478      3954710 :   for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
    6479              :     {
    6480      3722080 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6481            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6482              :                                         regno, false);
    6483              :     }
    6484              : 
    6485      2093670 :   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
    6486              :     {
    6487      1861040 :       char name[32];
    6488      1861040 :       tree decl;
    6489              : 
    6490      1861040 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6491           16 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6492              :                                         regno, false);
    6493              : 
    6494      1861040 :       if (!(pic_labels_used & (1 << regno)))
    6495      1857456 :         continue;
    6496              : 
    6497         3584 :       get_pc_thunk_name (name, regno);
    6498              : 
    6499         3584 :       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6500              :                          get_identifier (name),
    6501              :                          build_function_type_list (void_type_node, NULL_TREE));
    6502         3584 :       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6503              :                                        NULL_TREE, void_type_node);
    6504         3584 :       TREE_PUBLIC (decl) = 1;
    6505         3584 :       TREE_STATIC (decl) = 1;
    6506         3584 :       DECL_IGNORED_P (decl) = 1;
    6507              : 
    6508              : #if TARGET_MACHO
    6509              :       if (TARGET_MACHO)
    6510              :         {
    6511              :           switch_to_section (darwin_sections[picbase_thunk_section]);
    6512              :           fputs ("\t.weak_definition\t", asm_out_file);
    6513              :           assemble_name (asm_out_file, name);
    6514              :           fputs ("\n\t.private_extern\t", asm_out_file);
    6515              :           assemble_name (asm_out_file, name);
    6516              :           putc ('\n', asm_out_file);
    6517              :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6518              :           DECL_WEAK (decl) = 1;
    6519              :         }
    6520              :       else
    6521              : #endif
    6522         3584 :       if (USE_HIDDEN_LINKONCE)
    6523              :         {
    6524         3584 :           cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6525              : 
    6526         3584 :           targetm.asm_out.unique_section (decl, 0);
    6527         3584 :           switch_to_section (get_named_section (decl, NULL, 0));
    6528              : 
    6529         3584 :           targetm.asm_out.globalize_label (asm_out_file, name);
    6530         3584 :           fputs ("\t.hidden\t", asm_out_file);
    6531         3584 :           assemble_name (asm_out_file, name);
    6532         3584 :           putc ('\n', asm_out_file);
    6533         3584 :           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6534              :         }
    6535              :       else
    6536              :         {
    6537              :           switch_to_section (text_section);
    6538         3584 :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6539              :         }
    6540              : 
    6541         3584 :       DECL_INITIAL (decl) = make_node (BLOCK);
    6542         3584 :       current_function_decl = decl;
    6543         3584 :       allocate_struct_function (decl, false);
    6544         3584 :       init_function_start (decl);
    6545              :       /* We're about to hide the function body from callees of final_* by
    6546              :          emitting it directly; tell them we're a thunk, if they care.  */
    6547         3584 :       cfun->is_thunk = true;
    6548         3584 :       first_function_block_is_cold = false;
    6549              :       /* Make sure unwind info is emitted for the thunk if needed.  */
    6550         3584 :       final_start_function (emit_barrier (), asm_out_file, 1);
    6551              : 
    6552              :       /* Pad stack IP move with 4 instructions (two NOPs count
    6553              :          as one instruction).  */
    6554         3584 :       if (TARGET_PAD_SHORT_FUNCTION)
    6555              :         {
    6556              :           int i = 8;
    6557              : 
    6558            0 :           while (i--)
    6559            0 :             fputs ("\tnop\n", asm_out_file);
    6560              :         }
    6561              : 
    6562         7168 :       xops[0] = gen_rtx_REG (Pmode, regno);
    6563         7168 :       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
    6564         3584 :       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
    6565         3584 :       fputs ("\tret\n", asm_out_file);
    6566         3584 :       final_end_function ();
    6567         3584 :       init_insn_lengths ();
    6568         3584 :       free_after_compilation (cfun);
    6569         3584 :       set_cfun (NULL);
    6570         3584 :       current_function_decl = NULL;
    6571              :     }
    6572              : 
    6573       232630 :   if (flag_split_stack)
    6574         4710 :     file_end_indicate_split_stack ();
    6575       232630 : }
    6576              : 
    6577              : /* Emit code for the SET_GOT patterns.  */
    6578              : 
    6579              : const char *
    6580        33868 : output_set_got (rtx dest, rtx label)
    6581              : {
    6582        33868 :   rtx xops[3];
    6583              : 
    6584        33868 :   xops[0] = dest;
    6585              : 
    6586        33868 :   if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
    6587              :     {
    6588              :       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
    6589              :       xops[2] = gen_rtx_MEM (Pmode,
    6590              :                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
    6591              :       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
    6592              : 
    6593              :       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
    6594              :          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
    6595              :          an unadorned address.  */
    6596              :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
    6597              :       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
    6598              :       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
    6599              :       return "";
    6600              :     }
    6601              : 
    6602        67736 :   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
    6603              : 
    6604        33868 :   if (flag_pic)
    6605              :     {
    6606        33868 :       char name[32];
    6607        33868 :       get_pc_thunk_name (name, REGNO (dest));
    6608        33868 :       pic_labels_used |= 1 << REGNO (dest);
    6609              : 
    6610        67736 :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
    6611        33868 :       xops[2] = gen_rtx_MEM (QImode, xops[2]);
    6612        33868 :       output_asm_insn ("%!call\t%X2", xops);
    6613              : 
    6614              : #if TARGET_MACHO
    6615              :       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
    6616              :          This is what will be referenced by the Mach-O PIC subsystem.  */
    6617              :       if (machopic_should_output_picbase_label () || !label)
    6618              :         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
    6619              : 
    6620              :       /* When we are restoring the pic base at the site of a nonlocal label,
    6621              :          and we decided to emit the pic base above, we will still output a
    6622              :          local label used for calculating the correction offset (even though
    6623              :          the offset will be 0 in that case).  */
    6624              :       if (label)
    6625              :         targetm.asm_out.internal_label (asm_out_file, "L",
    6626              :                                            CODE_LABEL_NUMBER (label));
    6627              : #endif
    6628              :     }
    6629              :   else
    6630              :     {
    6631            0 :       if (TARGET_MACHO)
    6632              :         /* We don't need a pic base, we're not producing pic.  */
    6633              :         gcc_unreachable ();
    6634              : 
    6635            0 :       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
    6636            0 :       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
    6637            0 :       targetm.asm_out.internal_label (asm_out_file, "L",
    6638            0 :                                       CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
    6639              :     }
    6640              : 
    6641        33868 :   if (!TARGET_MACHO)
    6642        33868 :     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
    6643              : 
    6644        33868 :   return "";
    6645              : }
    6646              : 
    6647              : /* Generate an "push" pattern for input ARG.  */
    6648              : 
    6649              : rtx
    6650      1885996 : gen_push (rtx arg, bool ppx_p)
    6651              : {
    6652      1885996 :   struct machine_function *m = cfun->machine;
    6653              : 
    6654      1885996 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6655      1610498 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6656      1885996 :   m->fs.sp_offset += UNITS_PER_WORD;
    6657              : 
    6658      1885996 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6659           31 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6660              : 
    6661      1885996 :   rtx stack = gen_rtx_MEM (word_mode,
    6662      1885996 :                            gen_rtx_PRE_DEC (Pmode,
    6663              :                                             stack_pointer_rtx));
    6664      3771904 :   return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
    6665              : }
    6666              : 
    6667              : rtx
    6668           23 : gen_pushfl (void)
    6669              : {
    6670           23 :   struct machine_function *m = cfun->machine;
    6671           23 :   rtx flags, mem;
    6672              : 
    6673           23 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6674            0 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6675           23 :   m->fs.sp_offset += UNITS_PER_WORD;
    6676              : 
    6677           23 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6678              : 
    6679           23 :   mem = gen_rtx_MEM (word_mode,
    6680           23 :                      gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
    6681              : 
    6682           23 :   return gen_pushfl2 (word_mode, mem, flags);
    6683              : }
    6684              : 
    6685              : /* Generate an "pop" pattern for input ARG.  */
    6686              : 
    6687              : rtx
    6688      1468989 : gen_pop (rtx arg, bool ppx_p)
    6689              : {
    6690      1468989 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6691           27 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6692              : 
    6693      1468989 :   rtx stack = gen_rtx_MEM (word_mode,
    6694      1468989 :                            gen_rtx_POST_INC (Pmode,
    6695              :                                              stack_pointer_rtx));
    6696              : 
    6697      2937890 :   return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
    6698              : }
    6699              : 
    6700              : rtx
    6701           21 : gen_popfl (void)
    6702              : {
    6703           21 :   rtx flags, mem;
    6704              : 
    6705           21 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6706              : 
    6707           21 :   mem = gen_rtx_MEM (word_mode,
    6708           21 :                      gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
    6709              : 
    6710           21 :   return gen_popfl1 (word_mode, flags, mem);
    6711              : }
    6712              : 
    6713              : /* Generate a "push2" pattern for input ARG.  */
    6714              : rtx
    6715           19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
    6716              : {
    6717           19 :   struct machine_function *m = cfun->machine;
    6718           19 :   const int offset = UNITS_PER_WORD * 2;
    6719              : 
    6720           19 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6721           14 :     m->fs.cfa_offset += offset;
    6722           19 :   m->fs.sp_offset += offset;
    6723              : 
    6724           19 :   if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
    6725            0 :     reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
    6726              : 
    6727           19 :   if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
    6728            0 :     reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
    6729              : 
    6730           19 :   return ppx_p ? gen_push2p_di (mem, reg1, reg2)
    6731            4 :                : gen_push2_di (mem, reg1, reg2);
    6732              : }
    6733              : 
    6734              : /* Return >= 0 if there is an unused call-clobbered register available
    6735              :    for the entire function.  */
    6736              : 
    6737              : static unsigned int
    6738            0 : ix86_select_alt_pic_regnum (void)
    6739              : {
    6740            0 :   if (ix86_use_pseudo_pic_reg ())
    6741              :     return INVALID_REGNUM;
    6742              : 
    6743            0 :   if (crtl->is_leaf
    6744            0 :       && !crtl->profile
    6745            0 :       && !ix86_current_function_calls_tls_descriptor)
    6746              :     {
    6747            0 :       int i, drap;
    6748              :       /* Can't use the same register for both PIC and DRAP.  */
    6749            0 :       if (crtl->drap_reg)
    6750            0 :         drap = REGNO (crtl->drap_reg);
    6751              :       else
    6752              :         drap = -1;
    6753            0 :       for (i = 2; i >= 0; --i)
    6754            0 :         if (i != drap && !df_regs_ever_live_p (i))
    6755              :           return i;
    6756              :     }
    6757              : 
    6758              :   return INVALID_REGNUM;
    6759              : }
    6760              : 
    6761              : /* Return true if REGNO is used by the epilogue.  */
    6762              : 
    6763              : bool
    6764   1665528998 : ix86_epilogue_uses (int regno)
    6765              : {
    6766              :   /* If there are no caller-saved registers, we preserve all registers,
    6767              :      except for MMX and x87 registers which aren't supported when saving
    6768              :      and restoring registers.  Don't explicitly save SP register since
    6769              :      it is always preserved.  */
    6770   1665528998 :   return (epilogue_completed
    6771    263492814 :           && (cfun->machine->call_saved_registers
    6772    263492814 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    6773        27140 :           && !fixed_regs[regno]
    6774         4857 :           && !STACK_REGNO_P (regno)
    6775   1665533855 :           && !MMX_REGNO_P (regno));
    6776              : }
    6777              : 
    6778              : /* Return nonzero if register REGNO can be used as a scratch register
    6779              :    in peephole2.  */
    6780              : 
    6781              : static bool
    6782      1224157 : ix86_hard_regno_scratch_ok (unsigned int regno)
    6783              : {
    6784              :   /* If there are no caller-saved registers, we can't use any register
    6785              :      as a scratch register after epilogue and use REGNO as scratch
    6786              :      register only if it has been used before to avoid saving and
    6787              :      restoring it.  */
    6788      1224157 :   return ((cfun->machine->call_saved_registers
    6789      1224157 :            != TYPE_NO_CALLER_SAVED_REGISTERS)
    6790      1224157 :           || (!epilogue_completed
    6791            0 :               && df_regs_ever_live_p (regno)));
    6792              : }
    6793              : 
    6794              : /* Return TRUE if we need to save REGNO.  */
    6795              : 
    6796              : bool
    6797    353851150 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
    6798              : {
    6799    353851150 :   rtx reg;
    6800              : 
    6801              :   /* Save and restore DRAP register between prologue and epilogue so
    6802              :      that stack pointer can be restored.  */
    6803    353851150 :   if (crtl->drap_reg
    6804      2287026 :       && regno == REGNO (crtl->drap_reg)
    6805    353906813 :       && !cfun->machine->no_drap_save_restore)
    6806              :     return true;
    6807              : 
    6808    353795487 :   switch (cfun->machine->call_saved_registers)
    6809              :     {
    6810              :     case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
    6811              :       break;
    6812              : 
    6813        57152 :     case TYPE_NO_CALLER_SAVED_REGISTERS:
    6814              :       /* If there are no caller-saved registers, we preserve all
    6815              :          registers, except for MMX and x87 registers which aren't
    6816              :          supported when saving and restoring registers.  Don't
    6817              :          explicitly save SP register since it is always preserved.
    6818              : 
    6819              :          Don't preserve registers used for function return value.  */
    6820        57152 :       reg = crtl->return_rtx;
    6821        57152 :       if (reg)
    6822              :         {
    6823          768 :           unsigned int i = REGNO (reg);
    6824          768 :           unsigned int nregs = REG_NREGS (reg);
    6825         1522 :           while (nregs-- > 0)
    6826          768 :             if ((i + nregs) == regno)
    6827              :               return false;
    6828              :         }
    6829              : 
    6830        57138 :       return (df_regs_ever_live_p (regno)
    6831         6930 :               && !fixed_regs[regno]
    6832         5962 :               && !STACK_REGNO_P (regno)
    6833         5962 :               && !MMX_REGNO_P (regno)
    6834        63100 :               && (regno != HARD_FRAME_POINTER_REGNUM
    6835          249 :                   || !frame_pointer_needed));
    6836              : 
    6837        18192 :     case TYPE_NO_CALLEE_SAVED_REGISTERS:
    6838        18192 :     case TYPE_PRESERVE_NONE:
    6839        18192 :       if (regno != HARD_FRAME_POINTER_REGNUM)
    6840              :         return false;
    6841              :       break;
    6842              :     }
    6843              : 
    6844    387694417 :   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
    6845     10762684 :       && pic_offset_table_rtx)
    6846              :     {
    6847       385422 :       if (ix86_use_pseudo_pic_reg ())
    6848              :         {
    6849              :           /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
    6850              :           _mcount in prologue.  */
    6851       385422 :           if (!TARGET_64BIT && flag_pic && crtl->profile)
    6852              :             return true;
    6853              :         }
    6854            0 :       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
    6855            0 :                || crtl->profile
    6856            0 :                || crtl->calls_eh_return
    6857            0 :                || crtl->uses_const_pool
    6858            0 :                || cfun->has_nonlocal_label)
    6859            0 :         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
    6860              :     }
    6861              : 
    6862    353720712 :   if (crtl->calls_eh_return && maybe_eh_return)
    6863              :     {
    6864              :       unsigned i;
    6865        13237 :       for (i = 0; ; i++)
    6866              :         {
    6867        20181 :           unsigned test = EH_RETURN_DATA_REGNO (i);
    6868        13671 :           if (test == INVALID_REGNUM)
    6869              :             break;
    6870        13671 :           if (test == regno)
    6871              :             return true;
    6872        13237 :         }
    6873              :     }
    6874              : 
    6875    353720278 :   if (ignore_outlined && cfun->machine->call_ms2sysv)
    6876              :     {
    6877      2641728 :       unsigned count = cfun->machine->call_ms2sysv_extra_regs
    6878              :                        + xlogue_layout::MIN_REGS;
    6879      2641728 :       if (xlogue_layout::is_stub_managed_reg (regno, count))
    6880              :         return false;
    6881              :     }
    6882              : 
    6883    353220409 :   return (df_regs_ever_live_p (regno)
    6884    372634325 :           && !call_used_or_fixed_reg_p (regno)
    6885    372003584 :           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
    6886              : }
    6887              : 
    6888              : /* Return number of saved general prupose registers.  */
    6889              : 
    6890              : static int
    6891      8168022 : ix86_nsaved_regs (void)
    6892              : {
    6893      8168022 :   int nregs = 0;
    6894      8168022 :   int regno;
    6895              : 
    6896    759626046 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6897    751458024 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6898      8221039 :       nregs ++;
    6899      8168022 :   return nregs;
    6900              : }
    6901              : 
    6902              : /* Return number of saved SSE registers.  */
    6903              : 
    6904              : static int
    6905      8202986 : ix86_nsaved_sseregs (void)
    6906              : {
    6907      8202986 :   int nregs = 0;
    6908      8202986 :   int regno;
    6909              : 
    6910      7400915 :   if (!TARGET_64BIT_MS_ABI
    6911      8202986 :       && (cfun->machine->call_saved_registers
    6912      7977385 :           != TYPE_NO_CALLER_SAVED_REGISTERS))
    6913              :     return 0;
    6914     21049434 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6915     20823096 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6916      1897045 :       nregs ++;
    6917              :   return nregs;
    6918              : }
    6919              : 
    6920              : /* Given FROM and TO register numbers, say whether this elimination is
    6921              :    allowed.  If stack alignment is needed, we can only replace argument
    6922              :    pointer with hard frame pointer, or replace frame pointer with stack
    6923              :    pointer.  Otherwise, frame pointer elimination is automatically
    6924              :    handled and all other eliminations are valid.  */
    6925              : 
    6926              : static bool
    6927     48372279 : ix86_can_eliminate (const int from, const int to)
    6928              : {
    6929     48372279 :   if (stack_realign_fp)
    6930      1704480 :     return ((from == ARG_POINTER_REGNUM
    6931      1704480 :              && to == HARD_FRAME_POINTER_REGNUM)
    6932      1704480 :             || (from == FRAME_POINTER_REGNUM
    6933      1704480 :                 && to == STACK_POINTER_REGNUM));
    6934              :   else
    6935     86831734 :     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
    6936              : }
    6937              : 
    6938              : /* Return the offset between two registers, one to be eliminated, and the other
    6939              :    its replacement, at the start of a routine.  */
    6940              : 
    6941              : HOST_WIDE_INT
    6942    141280883 : ix86_initial_elimination_offset (int from, int to)
    6943              : {
    6944    141280883 :   struct ix86_frame &frame = cfun->machine->frame;
    6945              : 
    6946    141280883 :   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
    6947     10418894 :     return frame.hard_frame_pointer_offset;
    6948    130861989 :   else if (from == FRAME_POINTER_REGNUM
    6949    130861989 :            && to == HARD_FRAME_POINTER_REGNUM)
    6950      8132732 :     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
    6951              :   else
    6952              :     {
    6953    122729257 :       gcc_assert (to == STACK_POINTER_REGNUM);
    6954              : 
    6955    122729257 :       if (from == ARG_POINTER_REGNUM)
    6956    114596525 :         return frame.stack_pointer_offset;
    6957              : 
    6958      8132732 :       gcc_assert (from == FRAME_POINTER_REGNUM);
    6959      8132732 :       return frame.stack_pointer_offset - frame.frame_pointer_offset;
    6960              :     }
    6961              : }
    6962              : 
    6963              : /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
    6964              : void
    6965            0 : warn_once_call_ms2sysv_xlogues (const char *feature)
    6966              : {
    6967            0 :   static bool warned_once = false;
    6968            0 :   if (!warned_once)
    6969              :     {
    6970            0 :       warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
    6971              :                feature);
    6972            0 :       warned_once = true;
    6973              :     }
    6974            0 : }
    6975              : 
    6976              : /* Return the probing interval for -fstack-clash-protection.  */
    6977              : 
    6978              : static HOST_WIDE_INT
    6979          494 : get_probe_interval (void)
    6980              : {
    6981          341 :   if (flag_stack_clash_protection)
    6982          412 :     return (HOST_WIDE_INT_1U
    6983          412 :             << param_stack_clash_protection_probe_interval);
    6984              :   else
    6985              :     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
    6986              : }
    6987              : 
    6988              : /* When using -fsplit-stack, the allocation routines set a field in
    6989              :    the TCB to the bottom of the stack plus this much space, measured
    6990              :    in bytes.  */
    6991              : 
    6992              : #define SPLIT_STACK_AVAILABLE 256
    6993              : 
    6994              : /* Return true if push2/pop2 can be generated.  */
    6995              : 
    6996              : static bool
    6997      8168675 : ix86_can_use_push2pop2 (void)
    6998              : {
    6999              :   /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
    7000      8168675 :   unsigned int incoming_stack_boundary
    7001      8168675 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    7002      8168675 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    7003      8168675 :   return incoming_stack_boundary % 128 == 0;
    7004              : }
    7005              : 
    7006              : /* Helper function to determine whether push2/pop2 can be used in prologue or
    7007              :    epilogue for register save/restore.  */
    7008              : static bool
    7009      8168022 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
    7010              : {
    7011      8168022 :   if (!ix86_can_use_push2pop2 ())
    7012              :     return false;
    7013      8132111 :   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7014      8132111 :   return TARGET_APX_PUSH2POP2
    7015         2839 :          && !cfun->machine->frame.save_regs_using_mov
    7016         2827 :          && cfun->machine->func_type == TYPE_NORMAL
    7017      8134930 :          && (nregs + aligned) >= 3;
    7018              : }
    7019              : 
    7020              : /* Check if push/pop should be used to save/restore registers.  */
    7021              : static bool
    7022      8893921 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
    7023              : {
    7024      3196071 :   return ((!to_allocate && cfun->machine->frame.nregs <= 1)
    7025      5946382 :           || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
    7026              :           /* If static stack checking is enabled and done with probes,
    7027              :              the registers need to be saved before allocating the frame.  */
    7028      5945721 :           || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
    7029              :           /* If stack clash probing needs a loop, then it needs a
    7030              :              scratch register.  But the returned register is only guaranteed
    7031              :              to be safe to use after register saves are complete.  So if
    7032              :              stack clash protections are enabled and the allocated frame is
    7033              :              larger than the probe interval, then use pushes to save
    7034              :              callee saved registers.  */
    7035     14839568 :           || (flag_stack_clash_protection
    7036          341 :               && !ix86_target_stack_probe ()
    7037          341 :               && to_allocate > get_probe_interval ()));
    7038              : }
    7039              : 
    7040              : /* Fill structure ix86_frame about frame of currently computed function.  */
    7041              : 
    7042              : static void
    7043      8168022 : ix86_compute_frame_layout (void)
    7044              : {
    7045      8168022 :   struct ix86_frame *frame = &cfun->machine->frame;
    7046      8168022 :   struct machine_function *m = cfun->machine;
    7047      8168022 :   unsigned HOST_WIDE_INT stack_alignment_needed;
    7048      8168022 :   HOST_WIDE_INT offset;
    7049      8168022 :   unsigned HOST_WIDE_INT preferred_alignment;
    7050      8168022 :   HOST_WIDE_INT size = ix86_get_frame_size ();
    7051      8168022 :   HOST_WIDE_INT to_allocate;
    7052              : 
    7053              :   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
    7054              :    * ms_abi functions that call a sysv function.  We now need to prune away
    7055              :    * cases where it should be disabled.  */
    7056      8168022 :   if (TARGET_64BIT && m->call_ms2sysv)
    7057              :     {
    7058        35225 :       gcc_assert (TARGET_64BIT_MS_ABI);
    7059        35225 :       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
    7060        35225 :       gcc_assert (!TARGET_SEH);
    7061        35225 :       gcc_assert (TARGET_SSE);
    7062        35225 :       gcc_assert (!ix86_using_red_zone ());
    7063              : 
    7064        35225 :       if (crtl->calls_eh_return)
    7065              :         {
    7066            0 :           gcc_assert (!reload_completed);
    7067            0 :           m->call_ms2sysv = false;
    7068            0 :           warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
    7069              :         }
    7070              : 
    7071        35225 :       else if (ix86_static_chain_on_stack)
    7072              :         {
    7073            0 :           gcc_assert (!reload_completed);
    7074            0 :           m->call_ms2sysv = false;
    7075            0 :           warn_once_call_ms2sysv_xlogues ("static call chains");
    7076              :         }
    7077              : 
    7078              :       /* Finally, compute which registers the stub will manage.  */
    7079              :       else
    7080              :         {
    7081        35225 :           unsigned count = xlogue_layout::count_stub_managed_regs ();
    7082        35225 :           m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
    7083        35225 :           m->call_ms2sysv_pad_in = 0;
    7084              :         }
    7085              :     }
    7086              : 
    7087      8168022 :   frame->nregs = ix86_nsaved_regs ();
    7088      8168022 :   frame->nsseregs = ix86_nsaved_sseregs ();
    7089              : 
    7090              :   /* 64-bit MS ABI seem to require stack alignment to be always 16,
    7091              :      except for function prologues, leaf functions and when the defult
    7092              :      incoming stack boundary is overriden at command line or via
    7093              :      force_align_arg_pointer attribute.
    7094              : 
    7095              :      Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
    7096              :      at call sites, including profile function calls.
    7097              : 
    7098              :      For APX push2/pop2, the stack also requires 128b alignment.  */
    7099      8168022 :   if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
    7100           67 :        && crtl->preferred_stack_boundary < 128)
    7101      8168087 :       || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
    7102       225599 :            && crtl->preferred_stack_boundary < 128)
    7103            0 :           && (!crtl->is_leaf || cfun->calls_alloca != 0
    7104            0 :               || ix86_current_function_calls_tls_descriptor
    7105            0 :               || (TARGET_MACHO && crtl->profile)
    7106            0 :               || ix86_incoming_stack_boundary < 128)))
    7107              :     {
    7108            2 :       crtl->preferred_stack_boundary = 128;
    7109            2 :       if (crtl->stack_alignment_needed < 128)
    7110            1 :         crtl->stack_alignment_needed = 128;
    7111              :     }
    7112              : 
    7113      8168022 :   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
    7114      8168022 :   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
    7115              : 
    7116      8168022 :   gcc_assert (!size || stack_alignment_needed);
    7117      8970064 :   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
    7118      8168022 :   gcc_assert (preferred_alignment <= stack_alignment_needed);
    7119              : 
    7120              :   /* The only ABI saving SSE regs should be 64-bit ms_abi or with
    7121              :      no_caller_saved_registers attribue.  */
    7122      8168022 :   gcc_assert (TARGET_64BIT
    7123              :               || (cfun->machine->call_saved_registers
    7124              :                   == TYPE_NO_CALLER_SAVED_REGISTERS)
    7125              :               || !frame->nsseregs);
    7126      8168022 :   if (TARGET_64BIT && m->call_ms2sysv)
    7127              :     {
    7128        35225 :       gcc_assert (stack_alignment_needed >= 16);
    7129        35225 :       gcc_assert ((cfun->machine->call_saved_registers
    7130              :                    == TYPE_NO_CALLER_SAVED_REGISTERS)
    7131              :                   || !frame->nsseregs);
    7132              :     }
    7133              : 
    7134              :   /* For SEH we have to limit the amount of code movement into the prologue.
    7135              :      At present we do this via a BLOCKAGE, at which point there's very little
    7136              :      scheduling that can be done, which means that there's very little point
    7137              :      in doing anything except PUSHs.  */
    7138      8168022 :   if (TARGET_SEH)
    7139              :     m->use_fast_prologue_epilogue = false;
    7140      8168022 :   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
    7141              :     {
    7142      7832372 :       int count = frame->nregs;
    7143      7832372 :       struct cgraph_node *node = cgraph_node::get (current_function_decl);
    7144              : 
    7145              :       /* The fast prologue uses move instead of push to save registers.  This
    7146              :          is significantly longer, but also executes faster as modern hardware
    7147              :          can execute the moves in parallel, but can't do that for push/pop.
    7148              : 
    7149              :          Be careful about choosing what prologue to emit:  When function takes
    7150              :          many instructions to execute we may use slow version as well as in
    7151              :          case function is known to be outside hot spot (this is known with
    7152              :          feedback only).  Weight the size of function by number of registers
    7153              :          to save as it is cheap to use one or two push instructions but very
    7154              :          slow to use many of them.
    7155              : 
    7156              :          Calling this hook multiple times with the same frame requirements
    7157              :          must produce the same layout, since the RA might otherwise be
    7158              :          unable to reach a fixed point or might fail its final sanity checks.
    7159              :          This means that once we've assumed that a function does or doesn't
    7160              :          have a particular size, we have to stick to that assumption
    7161              :          regardless of how the function has changed since.  */
    7162      7832372 :       if (count)
    7163      2612226 :         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
    7164      7832372 :       if (node->frequency < NODE_FREQUENCY_NORMAL
    7165      7137991 :           || (flag_branch_probabilities
    7166         1031 :               && node->frequency < NODE_FREQUENCY_HOT))
    7167       694691 :         m->use_fast_prologue_epilogue = false;
    7168              :       else
    7169              :         {
    7170      7137681 :           if (count != frame->expensive_count)
    7171              :             {
    7172       286452 :               frame->expensive_count = count;
    7173       286452 :               frame->expensive_p = expensive_function_p (count);
    7174              :             }
    7175      7137681 :           m->use_fast_prologue_epilogue = !frame->expensive_p;
    7176              :         }
    7177              :     }
    7178              : 
    7179      8168022 :   frame->save_regs_using_mov
    7180      8168022 :     = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
    7181              : 
    7182              :   /* Skip return address and error code in exception handler.  */
    7183      8168022 :   offset = INCOMING_FRAME_SP_OFFSET;
    7184              : 
    7185              :   /* Skip pushed static chain.  */
    7186      8168022 :   if (ix86_static_chain_on_stack)
    7187            0 :     offset += UNITS_PER_WORD;
    7188              : 
    7189              :   /* Skip saved base pointer.  */
    7190      8168022 :   if (frame_pointer_needed)
    7191      2769745 :     offset += UNITS_PER_WORD;
    7192      8168022 :   frame->hfp_save_offset = offset;
    7193              : 
    7194              :   /* The traditional frame pointer location is at the top of the frame.  */
    7195      8168022 :   frame->hard_frame_pointer_offset = offset;
    7196              : 
    7197              :   /* Register save area */
    7198      8168022 :   offset += frame->nregs * UNITS_PER_WORD;
    7199      8168022 :   frame->reg_save_offset = offset;
    7200              : 
    7201              :   /* Calculate the size of the va-arg area (not including padding, if any).  */
    7202      8168022 :   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
    7203              : 
    7204              :   /* Also adjust stack_realign_offset for the largest alignment of
    7205              :      stack slot actually used.  */
    7206      8168022 :   if (stack_realign_fp
    7207      7861480 :       || (cfun->machine->max_used_stack_alignment != 0
    7208          138 :           && (offset % cfun->machine->max_used_stack_alignment) != 0))
    7209              :     {
    7210              :       /* We may need a 16-byte aligned stack for the remainder of the
    7211              :          register save area, but the stack frame for the local function
    7212              :          may require a greater alignment if using AVX/2/512.  In order
    7213              :          to avoid wasting space, we first calculate the space needed for
    7214              :          the rest of the register saves, add that to the stack pointer,
    7215              :          and then realign the stack to the boundary of the start of the
    7216              :          frame for the local function.  */
    7217       306611 :       HOST_WIDE_INT space_needed = 0;
    7218       306611 :       HOST_WIDE_INT sse_reg_space_needed = 0;
    7219              : 
    7220       306611 :       if (TARGET_64BIT)
    7221              :         {
    7222       304826 :           if (m->call_ms2sysv)
    7223              :             {
    7224         6415 :               m->call_ms2sysv_pad_in = 0;
    7225         6415 :               space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
    7226              :             }
    7227              : 
    7228       298411 :           else if (frame->nsseregs)
    7229              :             /* The only ABI that has saved SSE registers (Win64) also has a
    7230              :                16-byte aligned default stack.  However, many programs violate
    7231              :                the ABI, and Wine64 forces stack realignment to compensate.  */
    7232         6447 :             space_needed = frame->nsseregs * 16;
    7233              : 
    7234       304826 :           sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
    7235              : 
    7236              :           /* 64-bit frame->va_arg_size should always be a multiple of 16, but
    7237              :              rounding to be pedantic.  */
    7238       304826 :           space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
    7239              :         }
    7240              :       else
    7241         1785 :         space_needed = frame->va_arg_size;
    7242              : 
    7243              :       /* Record the allocation size required prior to the realignment AND.  */
    7244       306611 :       frame->stack_realign_allocate = space_needed;
    7245              : 
    7246              :       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
    7247              :          before this point are not directly comparable with values below
    7248              :          this point.  Use sp_valid_at to determine if the stack pointer is
    7249              :          valid for a given offset, fp_valid_at for the frame pointer, or
    7250              :          choose_baseaddr to have a base register chosen for you.
    7251              : 
    7252              :          Note that the result of (frame->stack_realign_offset
    7253              :          & (stack_alignment_needed - 1)) may not equal zero.  */
    7254       306611 :       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
    7255       306611 :       frame->stack_realign_offset = offset - space_needed;
    7256       306611 :       frame->sse_reg_save_offset = frame->stack_realign_offset
    7257       306611 :                                                         + sse_reg_space_needed;
    7258       306611 :     }
    7259              :   else
    7260              :     {
    7261      7861411 :       frame->stack_realign_offset = offset;
    7262              : 
    7263      7861411 :       if (TARGET_64BIT && m->call_ms2sysv)
    7264              :         {
    7265        28810 :           m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
    7266        28810 :           offset += xlogue_layout::get_instance ().get_stack_space_used ();
    7267              :         }
    7268              : 
    7269              :       /* Align and set SSE register save area.  */
    7270      7832601 :       else if (frame->nsseregs)
    7271              :         {
    7272              :           /* If the incoming stack boundary is at least 16 bytes, or DRAP is
    7273              :              required and the DRAP re-alignment boundary is at least 16 bytes,
    7274              :              then we want the SSE register save area properly aligned.  */
    7275       183247 :           if (ix86_incoming_stack_boundary >= 128
    7276         6400 :                   || (stack_realign_drap && stack_alignment_needed >= 16))
    7277       183247 :             offset = ROUND_UP (offset, 16);
    7278       183247 :           offset += frame->nsseregs * 16;
    7279              :         }
    7280      7861411 :       frame->sse_reg_save_offset = offset;
    7281      7861411 :       offset += frame->va_arg_size;
    7282              :     }
    7283              : 
    7284              :   /* Align start of frame for local function.  When a function call
    7285              :      is removed, it may become a leaf function.  But if argument may
    7286              :      be passed on stack, we need to align the stack when there is no
    7287              :      tail call.  */
    7288      8168022 :   if (m->call_ms2sysv
    7289      8132797 :       || frame->va_arg_size != 0
    7290      8053775 :       || size != 0
    7291      4389342 :       || !crtl->is_leaf
    7292      2048772 :       || (!crtl->tail_call_emit
    7293      1729163 :           && cfun->machine->outgoing_args_on_stack)
    7294      2048722 :       || cfun->calls_alloca
    7295     10215035 :       || ix86_current_function_calls_tls_descriptor)
    7296      6121421 :     offset = ROUND_UP (offset, stack_alignment_needed);
    7297              : 
    7298              :   /* Frame pointer points here.  */
    7299      8168022 :   frame->frame_pointer_offset = offset;
    7300              : 
    7301      8168022 :   offset += size;
    7302              : 
    7303              :   /* Add outgoing arguments area.  Can be skipped if we eliminated
    7304              :      all the function calls as dead code.
    7305              :      Skipping is however impossible when function calls alloca.  Alloca
    7306              :      expander assumes that last crtl->outgoing_args_size
    7307              :      of stack frame are unused.  */
    7308      8168022 :   if (ACCUMULATE_OUTGOING_ARGS
    7309      8786138 :       && (!crtl->is_leaf || cfun->calls_alloca
    7310       391912 :           || ix86_current_function_calls_tls_descriptor))
    7311              :     {
    7312       226204 :       offset += crtl->outgoing_args_size;
    7313       226204 :       frame->outgoing_arguments_size = crtl->outgoing_args_size;
    7314              :     }
    7315              :   else
    7316      7941818 :     frame->outgoing_arguments_size = 0;
    7317              : 
    7318              :   /* Align stack boundary.  Only needed if we're calling another function
    7319              :      or using alloca.  */
    7320      2760961 :   if (!crtl->is_leaf || cfun->calls_alloca
    7321     10925575 :       || ix86_current_function_calls_tls_descriptor)
    7322      5412265 :     offset = ROUND_UP (offset, preferred_alignment);
    7323              : 
    7324              :   /* We've reached end of stack frame.  */
    7325      8168022 :   frame->stack_pointer_offset = offset;
    7326              : 
    7327              :   /* Size prologue needs to allocate.  */
    7328      8168022 :   to_allocate = offset - frame->sse_reg_save_offset;
    7329              : 
    7330      8168022 :   if (save_regs_using_push_pop (to_allocate))
    7331      2574647 :     frame->save_regs_using_mov = false;
    7332              : 
    7333      8168022 :   if (ix86_using_red_zone ()
    7334      7140190 :       && crtl->sp_is_unchanging
    7335      6497358 :       && crtl->is_leaf
    7336      2661536 :       && !cfun->machine->asm_redzone_clobber_seen
    7337      2661523 :       && !ix86_pc_thunk_call_expanded
    7338     10829545 :       && !ix86_current_function_calls_tls_descriptor)
    7339              :     {
    7340      2661508 :       frame->red_zone_size = to_allocate;
    7341      2661508 :       if (frame->save_regs_using_mov)
    7342       139945 :         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
    7343      2661508 :       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
    7344       102371 :         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
    7345              :     }
    7346              :   else
    7347      5506514 :     frame->red_zone_size = 0;
    7348      8168022 :   frame->stack_pointer_offset -= frame->red_zone_size;
    7349              : 
    7350              :   /* The SEH frame pointer location is near the bottom of the frame.
    7351              :      This is enforced by the fact that the difference between the
    7352              :      stack pointer and the frame pointer is limited to 240 bytes in
    7353              :      the unwind data structure.  */
    7354      8168022 :   if (TARGET_SEH)
    7355              :     {
    7356              :       /* Force the frame pointer to point at or below the lowest register save
    7357              :          area, see the SEH code in config/i386/winnt.cc for the rationale.  */
    7358              :       frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
    7359              : 
    7360              :       /* If we can leave the frame pointer where it is, do so; however return
    7361              :          the establisher frame for __builtin_frame_address (0) or else if the
    7362              :          frame overflows the SEH maximum frame size.
    7363              : 
    7364              :          Note that the value returned by __builtin_frame_address (0) is quite
    7365              :          constrained, because setjmp is piggybacked on the SEH machinery with
    7366              :          recent versions of MinGW:
    7367              : 
    7368              :           #    elif defined(__SEH__)
    7369              :           #     if defined(__aarch64__) || defined(_ARM64_)
    7370              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
    7371              :           #     elif (__MINGW_GCC_VERSION < 40702)
    7372              :           #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
    7373              :           #     else
    7374              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
    7375              :           #     endif
    7376              : 
    7377              :          and the second argument passed to _setjmp, if not null, is forwarded
    7378              :          to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
    7379              :          built an ExceptionRecord on the fly describing the setjmp buffer).  */
    7380              :       const HOST_WIDE_INT diff
    7381              :         = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
    7382              :       if (diff <= 255 && !crtl->accesses_prior_frames)
    7383              :         {
    7384              :           /* The resulting diff will be a multiple of 16 lower than 255,
    7385              :              i.e. at most 240 as required by the unwind data structure.  */
    7386              :           frame->hard_frame_pointer_offset += (diff & 15);
    7387              :         }
    7388              :       else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
    7389              :         {
    7390              :           /* Ideally we'd determine what portion of the local stack frame
    7391              :              (within the constraint of the lowest 240) is most heavily used.
    7392              :              But without that complication, simply bias the frame pointer
    7393              :              by 128 bytes so as to maximize the amount of the local stack
    7394              :              frame that is addressable with 8-bit offsets.  */
    7395              :           frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
    7396              :         }
    7397              :       else
    7398              :         frame->hard_frame_pointer_offset = frame->hfp_save_offset;
    7399              :     }
    7400      8168022 : }
    7401              : 
    7402              : /* This is semi-inlined memory_address_length, but simplified
    7403              :    since we know that we're always dealing with reg+offset, and
    7404              :    to avoid having to create and discard all that rtl.  */
    7405              : 
    7406              : static inline int
    7407      1011764 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
    7408              : {
    7409      1011764 :   int len = 4;
    7410              : 
    7411            0 :   if (offset == 0)
    7412              :     {
    7413              :       /* EBP and R13 cannot be encoded without an offset.  */
    7414            0 :       len = (regno == BP_REG || regno == R13_REG);
    7415              :     }
    7416      1003563 :   else if (IN_RANGE (offset, -128, 127))
    7417       635913 :     len = 1;
    7418              : 
    7419              :   /* ESP and R12 must be encoded with a SIB byte.  */
    7420            0 :   if (regno == SP_REG || regno == R12_REG)
    7421            0 :     len++;
    7422              : 
    7423      1011764 :   return len;
    7424              : }
    7425              : 
    7426              : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
    7427              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7428              : 
    7429              : static bool
    7430      3496623 : sp_valid_at (HOST_WIDE_INT cfa_offset)
    7431              : {
    7432      3496623 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7433      3496623 :   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
    7434              :     {
    7435              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7436        46356 :       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
    7437              :       return false;
    7438              :     }
    7439      3450267 :   return fs.sp_valid;
    7440              : }
    7441              : 
    7442              : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
    7443              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7444              : 
    7445              : static inline bool
    7446      1365359 : fp_valid_at (HOST_WIDE_INT cfa_offset)
    7447              : {
    7448      1365359 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7449      1365359 :   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
    7450              :     {
    7451              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7452        28328 :       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
    7453              :       return false;
    7454              :     }
    7455      1337031 :   return fs.fp_valid;
    7456              : }
    7457              : 
    7458              : /* Choose a base register based upon alignment requested, speed and/or
    7459              :    size.  */
    7460              : 
    7461              : static void
    7462      1365359 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
    7463              :                 HOST_WIDE_INT &base_offset,
    7464              :                 unsigned int align_reqested, unsigned int *align)
    7465              : {
    7466      1365359 :   const struct machine_function *m = cfun->machine;
    7467      1365359 :   unsigned int hfp_align;
    7468      1365359 :   unsigned int drap_align;
    7469      1365359 :   unsigned int sp_align;
    7470      1365359 :   bool hfp_ok  = fp_valid_at (cfa_offset);
    7471      1365359 :   bool drap_ok = m->fs.drap_valid;
    7472      1365359 :   bool sp_ok   = sp_valid_at (cfa_offset);
    7473              : 
    7474      1365359 :   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
    7475              : 
    7476              :   /* Filter out any registers that don't meet the requested alignment
    7477              :      criteria.  */
    7478      1365359 :   if (align_reqested)
    7479              :     {
    7480       967435 :       if (m->fs.realigned)
    7481        28160 :         hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
    7482              :       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
    7483              :          notes (which we would need to use a realigned stack pointer),
    7484              :          so disable on SEH targets.  */
    7485       939275 :       else if (m->fs.sp_realigned)
    7486        28328 :         sp_align = crtl->stack_alignment_needed;
    7487              : 
    7488       967435 :       hfp_ok = hfp_ok && hfp_align >= align_reqested;
    7489       967435 :       drap_ok = drap_ok && drap_align >= align_reqested;
    7490       967435 :       sp_ok = sp_ok && sp_align >= align_reqested;
    7491              :     }
    7492              : 
    7493      1365359 :   if (m->use_fast_prologue_epilogue)
    7494              :     {
    7495              :       /* Choose the base register most likely to allow the most scheduling
    7496              :          opportunities.  Generally FP is valid throughout the function,
    7497              :          while DRAP must be reloaded within the epilogue.  But choose either
    7498              :          over the SP due to increased encoding size.  */
    7499              : 
    7500       648977 :       if (hfp_ok)
    7501              :         {
    7502       117842 :           base_reg = hard_frame_pointer_rtx;
    7503       117842 :           base_offset = m->fs.fp_offset - cfa_offset;
    7504              :         }
    7505       531135 :       else if (drap_ok)
    7506              :         {
    7507            0 :           base_reg = crtl->drap_reg;
    7508            0 :           base_offset = 0 - cfa_offset;
    7509              :         }
    7510       531135 :       else if (sp_ok)
    7511              :         {
    7512       531135 :           base_reg = stack_pointer_rtx;
    7513       531135 :           base_offset = m->fs.sp_offset - cfa_offset;
    7514              :         }
    7515              :     }
    7516              :   else
    7517              :     {
    7518       716382 :       HOST_WIDE_INT toffset;
    7519       716382 :       int len = 16, tlen;
    7520              : 
    7521              :       /* Choose the base register with the smallest address encoding.
    7522              :          With a tie, choose FP > DRAP > SP.  */
    7523       716382 :       if (sp_ok)
    7524              :         {
    7525       699064 :           base_reg = stack_pointer_rtx;
    7526       699064 :           base_offset = m->fs.sp_offset - cfa_offset;
    7527      1389927 :           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
    7528              :         }
    7529       716382 :       if (drap_ok)
    7530              :         {
    7531            0 :           toffset = 0 - cfa_offset;
    7532            0 :           tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
    7533            0 :           if (tlen <= len)
    7534              :             {
    7535            0 :               base_reg = crtl->drap_reg;
    7536            0 :               base_offset = toffset;
    7537            0 :               len = tlen;
    7538              :             }
    7539              :         }
    7540       716382 :       if (hfp_ok)
    7541              :         {
    7542       312700 :           toffset = m->fs.fp_offset - cfa_offset;
    7543       312700 :           tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
    7544       312700 :           if (tlen <= len)
    7545              :             {
    7546       222030 :               base_reg = hard_frame_pointer_rtx;
    7547       222030 :               base_offset = toffset;
    7548              :             }
    7549              :         }
    7550              :     }
    7551              : 
    7552              :     /* Set the align return value.  */
    7553      1365359 :     if (align)
    7554              :       {
    7555       967435 :         if (base_reg == stack_pointer_rtx)
    7556       685730 :           *align = sp_align;
    7557       281705 :         else if (base_reg == crtl->drap_reg)
    7558            0 :           *align = drap_align;
    7559       281705 :         else if (base_reg == hard_frame_pointer_rtx)
    7560       281705 :           *align = hfp_align;
    7561              :       }
    7562      1365359 : }
    7563              : 
    7564              : /* Return an RTX that points to CFA_OFFSET within the stack frame and
    7565              :    the alignment of address.  If ALIGN is non-null, it should point to
    7566              :    an alignment value (in bits) that is preferred or zero and will
    7567              :    recieve the alignment of the base register that was selected,
    7568              :    irrespective of rather or not CFA_OFFSET is a multiple of that
    7569              :    alignment value.  If it is possible for the base register offset to be
    7570              :    non-immediate then SCRATCH_REGNO should specify a scratch register to
    7571              :    use.
    7572              : 
    7573              :    The valid base registers are taken from CFUN->MACHINE->FS.  */
    7574              : 
    7575              : static rtx
    7576      1365359 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
    7577              :                  unsigned int scratch_regno = INVALID_REGNUM)
    7578              : {
    7579      1365359 :   rtx base_reg = NULL;
    7580      1365359 :   HOST_WIDE_INT base_offset = 0;
    7581              : 
    7582              :   /* If a specific alignment is requested, try to get a base register
    7583              :      with that alignment first.  */
    7584      1365359 :   if (align && *align)
    7585       967435 :     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
    7586              : 
    7587      1365359 :   if (!base_reg)
    7588       397924 :     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
    7589              : 
    7590      1365359 :   gcc_assert (base_reg != NULL);
    7591              : 
    7592      1365359 :   rtx base_offset_rtx = GEN_INT (base_offset);
    7593              : 
    7594      1416751 :   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
    7595              :     {
    7596            1 :       gcc_assert (scratch_regno != INVALID_REGNUM);
    7597              : 
    7598            1 :       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
    7599            1 :       emit_move_insn (scratch_reg, base_offset_rtx);
    7600              : 
    7601            1 :       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
    7602              :     }
    7603              : 
    7604      1416750 :   return plus_constant (Pmode, base_reg, base_offset);
    7605              : }
    7606              : 
    7607              : /* Emit code to save registers in the prologue.  */
    7608              : 
    7609              : static void
    7610       429611 : ix86_emit_save_regs (void)
    7611              : {
    7612       429611 :   int regno;
    7613       429611 :   rtx_insn *insn;
    7614       429611 :   bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
    7615              : 
    7616       429611 :   if (!TARGET_APX_PUSH2POP2
    7617           90 :       || !ix86_can_use_push2pop2 ()
    7618       429699 :       || cfun->machine->func_type != TYPE_NORMAL)
    7619              :     {
    7620     39945732 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7621     39516208 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7622              :           {
    7623      1202179 :             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7624              :                                         use_ppx));
    7625      1202179 :             RTX_FRAME_RELATED_P (insn) = 1;
    7626              :           }
    7627              :     }
    7628              :   else
    7629              :     {
    7630           87 :       int regno_list[2];
    7631           87 :       regno_list[0] = regno_list[1] = -1;
    7632           87 :       int loaded_regnum = 0;
    7633           87 :       bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7634              : 
    7635         8091 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7636         8004 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7637              :           {
    7638          127 :             if (aligned)
    7639              :               {
    7640           45 :                 regno_list[loaded_regnum++] = regno;
    7641           45 :                 if (loaded_regnum == 2)
    7642              :                   {
    7643           19 :                     gcc_assert (regno_list[0] != -1
    7644              :                                 && regno_list[1] != -1
    7645              :                                 && regno_list[0] != regno_list[1]);
    7646           19 :                     const int offset = UNITS_PER_WORD * 2;
    7647           19 :                     rtx mem = gen_rtx_MEM (TImode,
    7648           19 :                                            gen_rtx_PRE_DEC (Pmode,
    7649              :                                                             stack_pointer_rtx));
    7650           19 :                     insn = emit_insn (gen_push2 (mem,
    7651              :                                                  gen_rtx_REG (word_mode,
    7652              :                                                               regno_list[0]),
    7653              :                                                  gen_rtx_REG (word_mode,
    7654              :                                                               regno_list[1]),
    7655              :                                                  use_ppx));
    7656           19 :                     RTX_FRAME_RELATED_P (insn) = 1;
    7657           19 :                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
    7658              : 
    7659           57 :                     for (int i = 0; i < 2; i++)
    7660              :                       {
    7661           76 :                         rtx dwarf_reg = gen_rtx_REG (word_mode,
    7662           38 :                                                      regno_list[i]);
    7663           38 :                         rtx sp_offset = plus_constant (Pmode,
    7664              :                                                        stack_pointer_rtx,
    7665           38 :                                                        + UNITS_PER_WORD
    7666           38 :                                                          * (1 - i));
    7667           38 :                         rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
    7668              :                                                               sp_offset),
    7669              :                                                dwarf_reg);
    7670           38 :                         RTX_FRAME_RELATED_P (tmp) = 1;
    7671           38 :                         XVECEXP (dwarf, 0, i + 1) = tmp;
    7672              :                       }
    7673           19 :                     rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
    7674              :                                               plus_constant (Pmode,
    7675              :                                                              stack_pointer_rtx,
    7676              :                                                              -offset));
    7677           19 :                     RTX_FRAME_RELATED_P (sp_tmp) = 1;
    7678           19 :                     XVECEXP (dwarf, 0, 0) = sp_tmp;
    7679           19 :                     add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
    7680              : 
    7681           19 :                     loaded_regnum = 0;
    7682           19 :                     regno_list[0] = regno_list[1] = -1;
    7683              :                   }
    7684              :               }
    7685              :             else
    7686              :               {
    7687           82 :                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7688              :                                             use_ppx));
    7689           82 :                 RTX_FRAME_RELATED_P (insn) = 1;
    7690           82 :                 aligned = true;
    7691              :               }
    7692              :           }
    7693           87 :       if (loaded_regnum == 1)
    7694              :         {
    7695            7 :           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
    7696            7 :                                                    regno_list[0]),
    7697              :                                       use_ppx));
    7698            7 :           RTX_FRAME_RELATED_P (insn) = 1;
    7699              :         }
    7700              :     }
    7701       429611 : }
    7702              : 
    7703              : /* Emit a single register save at CFA - CFA_OFFSET.  */
    7704              : 
    7705              : static void
    7706       613352 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
    7707              :                               HOST_WIDE_INT cfa_offset)
    7708              : {
    7709       613352 :   struct machine_function *m = cfun->machine;
    7710       613352 :   rtx reg = gen_rtx_REG (mode, regno);
    7711       613352 :   rtx mem, addr, base, insn;
    7712       613352 :   unsigned int align = GET_MODE_ALIGNMENT (mode);
    7713              : 
    7714       613352 :   addr = choose_baseaddr (cfa_offset, &align);
    7715       613352 :   mem = gen_frame_mem (mode, addr);
    7716              : 
    7717              :   /* The location aligment depends upon the base register.  */
    7718       613352 :   align = MIN (GET_MODE_ALIGNMENT (mode), align);
    7719       613352 :   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
    7720       613352 :   set_mem_align (mem, align);
    7721              : 
    7722       613352 :   insn = emit_insn (gen_rtx_SET (mem, reg));
    7723       613352 :   RTX_FRAME_RELATED_P (insn) = 1;
    7724              : 
    7725       613352 :   base = addr;
    7726       613352 :   if (GET_CODE (base) == PLUS)
    7727       601467 :     base = XEXP (base, 0);
    7728       613352 :   gcc_checking_assert (REG_P (base));
    7729              : 
    7730              :   /* When saving registers into a re-aligned local stack frame, avoid
    7731              :      any tricky guessing by dwarf2out.  */
    7732       613352 :   if (m->fs.realigned)
    7733              :     {
    7734        12800 :       gcc_checking_assert (stack_realign_drap);
    7735              : 
    7736        12800 :       if (regno == REGNO (crtl->drap_reg))
    7737              :         {
    7738              :           /* A bit of a hack.  We force the DRAP register to be saved in
    7739              :              the re-aligned stack frame, which provides us with a copy
    7740              :              of the CFA that will last past the prologue.  Install it.  */
    7741            0 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7742            0 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7743            0 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7744            0 :           mem = gen_rtx_MEM (mode, addr);
    7745            0 :           add_reg_note (insn, REG_CFA_DEF_CFA, mem);
    7746              :         }
    7747              :       else
    7748              :         {
    7749              :           /* The frame pointer is a stable reference within the
    7750              :              aligned frame.  Use it.  */
    7751        12800 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7752        12800 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7753        12800 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7754        12800 :           mem = gen_rtx_MEM (mode, addr);
    7755        12800 :           add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7756              :         }
    7757              :     }
    7758              : 
    7759       600552 :   else if (base == stack_pointer_rtx && m->fs.sp_realigned
    7760        12881 :            && cfa_offset >= m->fs.sp_realigned_offset)
    7761              :     {
    7762        12881 :       gcc_checking_assert (stack_realign_fp);
    7763        12881 :       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7764              :     }
    7765              : 
    7766              :   /* The memory may not be relative to the current CFA register,
    7767              :      which means that we may need to generate a new pattern for
    7768              :      use by the unwind info.  */
    7769       587671 :   else if (base != m->fs.cfa_reg)
    7770              :     {
    7771        45097 :       addr = plus_constant (Pmode, m->fs.cfa_reg,
    7772        45097 :                             m->fs.cfa_offset - cfa_offset);
    7773        45097 :       mem = gen_rtx_MEM (mode, addr);
    7774        45097 :       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
    7775              :     }
    7776       613352 : }
    7777              : 
    7778              : /* Emit code to save registers using MOV insns.
    7779              :    First register is stored at CFA - CFA_OFFSET.  */
    7780              : static void
    7781        44786 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7782              : {
    7783        44786 :   unsigned int regno;
    7784              : 
    7785      4165098 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7786      4120312 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7787              :       {
    7788              :         /* Skip registers, already processed by shrink wrap separate.  */
    7789       191076 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
    7790        85173 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
    7791       205883 :         cfa_offset -= UNITS_PER_WORD;
    7792              :       }
    7793        44786 : }
    7794              : 
    7795              : /* Emit code to save SSE registers using MOV insns.
    7796              :    First register is stored at CFA - CFA_OFFSET.  */
    7797              : static void
    7798        33363 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7799              : {
    7800        33363 :   unsigned int regno;
    7801              : 
    7802      3102759 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7803      3069396 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7804              :       {
    7805       333657 :         ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
    7806       333657 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
    7807              :       }
    7808        33363 : }
    7809              : 
    7810              : static GTY(()) rtx queued_cfa_restores;
    7811              : 
    7812              : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
    7813              :    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
    7814              :    Don't add the note if the previously saved value will be left untouched
    7815              :    within stack red-zone till return, as unwinders can find the same value
    7816              :    in the register and on the stack.  */
    7817              : 
    7818              : static void
    7819      2290280 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
    7820              : {
    7821      2290280 :   if (!crtl->shrink_wrapped
    7822      2271276 :       && cfa_offset <= cfun->machine->fs.red_zone_offset)
    7823              :     return;
    7824              : 
    7825       771533 :   if (insn)
    7826              :     {
    7827       360729 :       add_reg_note (insn, REG_CFA_RESTORE, reg);
    7828       360729 :       RTX_FRAME_RELATED_P (insn) = 1;
    7829              :     }
    7830              :   else
    7831       410804 :     queued_cfa_restores
    7832       410804 :       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
    7833              : }
    7834              : 
    7835              : /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
    7836              : 
    7837              : static void
    7838      2550682 : ix86_add_queued_cfa_restore_notes (rtx insn)
    7839              : {
    7840      2550682 :   rtx last;
    7841      2550682 :   if (!queued_cfa_restores)
    7842              :     return;
    7843       410804 :   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
    7844              :     ;
    7845        53053 :   XEXP (last, 1) = REG_NOTES (insn);
    7846        53053 :   REG_NOTES (insn) = queued_cfa_restores;
    7847        53053 :   queued_cfa_restores = NULL_RTX;
    7848        53053 :   RTX_FRAME_RELATED_P (insn) = 1;
    7849              : }
    7850              : 
    7851              : /* Expand prologue or epilogue stack adjustment.
    7852              :    The pattern exist to put a dependency on all ebp-based memory accesses.
    7853              :    STYLE should be negative if instructions should be marked as frame related,
    7854              :    zero if %r11 register is live and cannot be freely used and positive
    7855              :    otherwise.  */
    7856              : 
    7857              : static rtx
    7858      1583422 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
    7859              :                            int style, bool set_cfa)
    7860              : {
    7861      1583422 :   struct machine_function *m = cfun->machine;
    7862      1583422 :   rtx addend = offset;
    7863      1583422 :   rtx insn;
    7864      1583422 :   bool add_frame_related_expr = false;
    7865              : 
    7866      1801950 :   if (!x86_64_immediate_operand (offset, Pmode))
    7867              :     {
    7868              :       /* r11 is used by indirect sibcall return as well, set before the
    7869              :          epilogue and used after the epilogue.  */
    7870          199 :       if (style)
    7871          174 :         addend = gen_rtx_REG (Pmode, R11_REG);
    7872              :       else
    7873              :         {
    7874           25 :           gcc_assert (src != hard_frame_pointer_rtx
    7875              :                       && dest != hard_frame_pointer_rtx);
    7876              :           addend = hard_frame_pointer_rtx;
    7877              :         }
    7878          199 :       emit_insn (gen_rtx_SET (addend, offset));
    7879          199 :       if (style < 0)
    7880           88 :         add_frame_related_expr = true;
    7881              :     }
    7882              : 
    7883              :   /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
    7884              :       not to affect EFlags, emit add without reg clobbering.  */
    7885      1583422 :   if (crtl->shrink_wrapped_separate)
    7886        94670 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
    7887        94670 :                       (Pmode, dest, src, addend));
    7888              :   else
    7889      1488752 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add
    7890      1488752 :                       (Pmode, dest, src, addend));
    7891              : 
    7892      1583422 :   if (style >= 0)
    7893       698301 :     ix86_add_queued_cfa_restore_notes (insn);
    7894              : 
    7895      1583422 :   if (set_cfa)
    7896              :     {
    7897      1221797 :       rtx r;
    7898              : 
    7899      1221797 :       gcc_assert (m->fs.cfa_reg == src);
    7900      1221797 :       m->fs.cfa_offset += INTVAL (offset);
    7901      1221797 :       m->fs.cfa_reg = dest;
    7902              : 
    7903      1417957 :       r = gen_rtx_PLUS (Pmode, src, offset);
    7904      1221797 :       r = gen_rtx_SET (dest, r);
    7905      1221797 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
    7906      1221797 :       RTX_FRAME_RELATED_P (insn) = 1;
    7907              :     }
    7908       361625 :   else if (style < 0)
    7909              :     {
    7910       295048 :       RTX_FRAME_RELATED_P (insn) = 1;
    7911       295048 :       if (add_frame_related_expr)
    7912              :         {
    7913           20 :           rtx r = gen_rtx_PLUS (Pmode, src, offset);
    7914           20 :           r = gen_rtx_SET (dest, r);
    7915           20 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
    7916              :         }
    7917              :     }
    7918              : 
    7919      1583422 :   if (dest == stack_pointer_rtx)
    7920              :     {
    7921      1583422 :       HOST_WIDE_INT ooffset = m->fs.sp_offset;
    7922      1583422 :       bool valid = m->fs.sp_valid;
    7923      1583422 :       bool realigned = m->fs.sp_realigned;
    7924              : 
    7925      1583422 :       if (src == hard_frame_pointer_rtx)
    7926              :         {
    7927        29749 :           valid = m->fs.fp_valid;
    7928        29749 :           realigned = false;
    7929        29749 :           ooffset = m->fs.fp_offset;
    7930              :         }
    7931      1553673 :       else if (src == crtl->drap_reg)
    7932              :         {
    7933            0 :           valid = m->fs.drap_valid;
    7934            0 :           realigned = false;
    7935            0 :           ooffset = 0;
    7936              :         }
    7937              :       else
    7938              :         {
    7939              :           /* Else there are two possibilities: SP itself, which we set
    7940              :              up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
    7941              :              taken care of this by hand along the eh_return path.  */
    7942      1553673 :           gcc_checking_assert (src == stack_pointer_rtx
    7943              :                                || offset == const0_rtx);
    7944              :         }
    7945              : 
    7946      1583422 :       m->fs.sp_offset = ooffset - INTVAL (offset);
    7947      1583422 :       m->fs.sp_valid = valid;
    7948      1583422 :       m->fs.sp_realigned = realigned;
    7949              :     }
    7950      1583422 :   return insn;
    7951              : }
    7952              : 
    7953              : /* Find an available register to be used as dynamic realign argument
    7954              :    pointer regsiter.  Such a register will be written in prologue and
    7955              :    used in begin of body, so it must not be
    7956              :         1. parameter passing register.
    7957              :         2. GOT pointer.
    7958              :    We reuse static-chain register if it is available.  Otherwise, we
    7959              :    use DI for i386 and R13 for x86-64.  We chose R13 since it has
    7960              :    shorter encoding.
    7961              : 
    7962              :    Return: the regno of chosen register.  */
    7963              : 
    7964              : static unsigned int
    7965         7295 : find_drap_reg (void)
    7966              : {
    7967         7295 :   tree decl = cfun->decl;
    7968              : 
    7969              :   /* Always use callee-saved register if there are no caller-saved
    7970              :      registers.  */
    7971         7295 :   if (TARGET_64BIT)
    7972              :     {
    7973              :       /* In preserve_none functions, any register can be used for DRAP,
    7974              :          except AX, R12–R15, DI, SI (argument registers), SP, and BP.  */
    7975         7010 :       if (cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    7976              :         return R11_REG;
    7977              : 
    7978              :       /* Use R13 for nested function or function need static chain.
    7979              :          Since function with tail call may use any caller-saved
    7980              :          registers in epilogue, DRAP must not use caller-saved
    7981              :          register in such case.  */
    7982         7009 :       if (DECL_STATIC_CHAIN (decl)
    7983         6967 :           || (cfun->machine->call_saved_registers
    7984              :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7985        13976 :           || crtl->tail_call_emit)
    7986          191 :         return R13_REG;
    7987              : 
    7988              :       return R10_REG;
    7989              :     }
    7990              :   else
    7991              :     {
    7992              :       /* Use DI for nested function or function need static chain.
    7993              :          Since function with tail call may use any caller-saved
    7994              :          registers in epilogue, DRAP must not use caller-saved
    7995              :          register in such case.  */
    7996          285 :       if (DECL_STATIC_CHAIN (decl)
    7997          285 :           || (cfun->machine->call_saved_registers
    7998          285 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7999          285 :           || crtl->tail_call_emit
    8000          550 :           || crtl->calls_eh_return)
    8001              :         return DI_REG;
    8002              : 
    8003              :       /* Reuse static chain register if it isn't used for parameter
    8004              :          passing.  */
    8005          265 :       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
    8006              :         {
    8007          265 :           unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
    8008          265 :           if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
    8009              :             return CX_REG;
    8010              :         }
    8011            0 :       return DI_REG;
    8012              :     }
    8013              : }
    8014              : 
    8015              : /* Return minimum incoming stack alignment.  */
    8016              : 
    8017              : static unsigned int
    8018      1615608 : ix86_minimum_incoming_stack_boundary (bool sibcall)
    8019              : {
    8020      1615608 :   unsigned int incoming_stack_boundary;
    8021              : 
    8022              :   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
    8023      1615608 :   if (cfun->machine->func_type != TYPE_NORMAL)
    8024          120 :     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
    8025              :   /* Prefer the one specified at command line. */
    8026      1615488 :   else if (ix86_user_incoming_stack_boundary)
    8027              :     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
    8028              :   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
    8029              :      if -mstackrealign is used, it isn't used for sibcall check and
    8030              :      estimated stack alignment is 128bit.  */
    8031      1615466 :   else if (!sibcall
    8032      1482124 :            && ix86_force_align_arg_pointer
    8033         4572 :            && crtl->stack_alignment_estimated == 128)
    8034          596 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8035              :   else
    8036      1614870 :     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
    8037              : 
    8038              :   /* Incoming stack alignment can be changed on individual functions
    8039              :      via force_align_arg_pointer attribute.  We use the smallest
    8040              :      incoming stack boundary.  */
    8041      1615608 :   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
    8042      3230610 :       && lookup_attribute ("force_align_arg_pointer",
    8043      1615002 :                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
    8044         5708 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8045              : 
    8046              :   /* The incoming stack frame has to be aligned at least at
    8047              :      parm_stack_boundary.  */
    8048      1615608 :   if (incoming_stack_boundary < crtl->parm_stack_boundary)
    8049              :     incoming_stack_boundary = crtl->parm_stack_boundary;
    8050              : 
    8051              :   /* Stack at entrance of main is aligned by runtime.  We use the
    8052              :      smallest incoming stack boundary. */
    8053      1615608 :   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
    8054       140777 :       && DECL_NAME (current_function_decl)
    8055       140777 :       && MAIN_NAME_P (DECL_NAME (current_function_decl))
    8056      1618082 :       && DECL_FILE_SCOPE_P (current_function_decl))
    8057         2474 :     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
    8058              : 
    8059      1615608 :   return incoming_stack_boundary;
    8060              : }
    8061              : 
    8062              : /* Update incoming stack boundary and estimated stack alignment.  */
    8063              : 
    8064              : static void
    8065      1482261 : ix86_update_stack_boundary (void)
    8066              : {
    8067      1482261 :   ix86_incoming_stack_boundary
    8068      1482261 :     = ix86_minimum_incoming_stack_boundary (false);
    8069              : 
    8070              :   /* x86_64 vararg needs 16byte stack alignment for register save area.  */
    8071      1482261 :   if (TARGET_64BIT
    8072      1355726 :       && cfun->stdarg
    8073        21336 :       && crtl->stack_alignment_estimated < 128)
    8074        10152 :     crtl->stack_alignment_estimated = 128;
    8075              : 
    8076              :   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
    8077      1482261 :   if (ix86_tls_descriptor_calls_expanded_in_cfun
    8078         1073 :       && crtl->preferred_stack_boundary < 128)
    8079          745 :     crtl->preferred_stack_boundary = 128;
    8080              : 
    8081              :   /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
    8082              :      are 32 bits, but if force_align_arg_pointer is specified, it should
    8083              :      prefer 128 bits for a backward-compatibility reason, which is also
    8084              :      what the doc suggests.  */
    8085      1482261 :   if (lookup_attribute ("force_align_arg_pointer",
    8086      1482261 :                         TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
    8087      1482261 :       && crtl->preferred_stack_boundary < 128)
    8088            4 :     crtl->preferred_stack_boundary = 128;
    8089      1482261 : }
    8090              : 
    8091              : /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
    8092              :    needed or an rtx for DRAP otherwise.  */
    8093              : 
    8094              : static rtx
    8095      1586119 : ix86_get_drap_rtx (void)
    8096              : {
    8097              :   /* We must use DRAP if there are outgoing arguments on stack or
    8098              :      the stack pointer register is clobbered by asm statement and
    8099              :      ACCUMULATE_OUTGOING_ARGS is false.  */
    8100      1586119 :   if (ix86_force_drap
    8101      1586119 :       || ((cfun->machine->outgoing_args_on_stack
    8102      1254088 :            || crtl->sp_is_clobbered_by_asm)
    8103       330086 :           && !ACCUMULATE_OUTGOING_ARGS))
    8104       309891 :     crtl->need_drap = true;
    8105              : 
    8106      1586119 :   if (stack_realign_drap)
    8107              :     {
    8108              :       /* Assign DRAP to vDRAP and returns vDRAP */
    8109         7295 :       unsigned int regno = find_drap_reg ();
    8110         7295 :       rtx drap_vreg;
    8111         7295 :       rtx arg_ptr;
    8112         7295 :       rtx_insn *seq, *insn;
    8113              : 
    8114         7580 :       arg_ptr = gen_rtx_REG (Pmode, regno);
    8115         7295 :       crtl->drap_reg = arg_ptr;
    8116              : 
    8117         7295 :       start_sequence ();
    8118         7295 :       drap_vreg = copy_to_reg (arg_ptr);
    8119         7295 :       seq = end_sequence ();
    8120              : 
    8121         7295 :       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
    8122         7295 :       if (!optimize)
    8123              :         {
    8124         1896 :           add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
    8125         1896 :           RTX_FRAME_RELATED_P (insn) = 1;
    8126              :         }
    8127         7295 :       return drap_vreg;
    8128              :     }
    8129              :   else
    8130              :     return NULL;
    8131              : }
    8132              : 
    8133              : /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
    8134              : 
    8135              : static rtx
    8136      1482262 : ix86_internal_arg_pointer (void)
    8137              : {
    8138      1482262 :   return virtual_incoming_args_rtx;
    8139              : }
    8140              : 
    8141              : struct scratch_reg {
    8142              :   rtx reg;
    8143              :   bool saved;
    8144              : };
    8145              : 
    8146              : /* Return a short-lived scratch register for use on function entry.
    8147              :    In 32-bit mode, it is valid only after the registers are saved
    8148              :    in the prologue.  This register must be released by means of
    8149              :    release_scratch_register_on_entry once it is dead.  */
    8150              : 
    8151              : static void
    8152           25 : get_scratch_register_on_entry (struct scratch_reg *sr)
    8153              : {
    8154           25 :   int regno;
    8155              : 
    8156           25 :   sr->saved = false;
    8157              : 
    8158           25 :   if (TARGET_64BIT)
    8159              :     {
    8160              :       /* We always use R11 in 64-bit mode.  */
    8161              :       regno = R11_REG;
    8162              :     }
    8163              :   else
    8164              :     {
    8165            0 :       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
    8166            0 :       bool fastcall_p
    8167            0 :         = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8168            0 :       bool thiscall_p
    8169            0 :         = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8170            0 :       bool static_chain_p = DECL_STATIC_CHAIN (decl);
    8171            0 :       int regparm = ix86_function_regparm (fntype, decl);
    8172            0 :       int drap_regno
    8173            0 :         = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
    8174              : 
    8175              :       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
    8176              :           for the static chain register.  */
    8177            0 :       if ((regparm < 1 || (fastcall_p && !static_chain_p))
    8178            0 :           && drap_regno != AX_REG)
    8179              :         regno = AX_REG;
    8180              :       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
    8181              :           for the static chain register.  */
    8182            0 :       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
    8183              :         regno = AX_REG;
    8184            0 :       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
    8185              :         regno = DX_REG;
    8186              :       /* ecx is the static chain register.  */
    8187            0 :       else if (regparm < 3 && !fastcall_p && !thiscall_p
    8188            0 :                && !static_chain_p
    8189            0 :                && drap_regno != CX_REG)
    8190              :         regno = CX_REG;
    8191            0 :       else if (ix86_save_reg (BX_REG, true, false))
    8192              :         regno = BX_REG;
    8193              :       /* esi is the static chain register.  */
    8194            0 :       else if (!(regparm == 3 && static_chain_p)
    8195            0 :                && ix86_save_reg (SI_REG, true, false))
    8196              :         regno = SI_REG;
    8197            0 :       else if (ix86_save_reg (DI_REG, true, false))
    8198              :         regno = DI_REG;
    8199              :       else
    8200              :         {
    8201            0 :           regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
    8202            0 :           sr->saved = true;
    8203              :         }
    8204              :     }
    8205              : 
    8206           25 :   sr->reg = gen_rtx_REG (Pmode, regno);
    8207           25 :   if (sr->saved)
    8208              :     {
    8209            0 :       rtx_insn *insn = emit_insn (gen_push (sr->reg));
    8210            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    8211              :     }
    8212           25 : }
    8213              : 
    8214              : /* Release a scratch register obtained from the preceding function.
    8215              : 
    8216              :    If RELEASE_VIA_POP is true, we just pop the register off the stack
    8217              :    to release it.  This is what non-Linux systems use with -fstack-check.
    8218              : 
    8219              :    Otherwise we use OFFSET to locate the saved register and the
    8220              :    allocated stack space becomes part of the local frame and is
    8221              :    deallocated by the epilogue.  */
    8222              : 
    8223              : static void
    8224           25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
    8225              :                                    bool release_via_pop)
    8226              : {
    8227           25 :   if (sr->saved)
    8228              :     {
    8229            0 :       if (release_via_pop)
    8230              :         {
    8231            0 :           struct machine_function *m = cfun->machine;
    8232            0 :           rtx x, insn = emit_insn (gen_pop (sr->reg));
    8233              : 
    8234              :           /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
    8235            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    8236            0 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8237            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8238            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
    8239            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
    8240              :         }
    8241              :       else
    8242              :         {
    8243            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    8244            0 :           x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
    8245            0 :           emit_insn (x);
    8246              :         }
    8247              :     }
    8248           25 : }
    8249              : 
    8250              : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
    8251              : 
    8252              :    If INT_REGISTERS_SAVED is true, then integer registers have already been
    8253              :    pushed on the stack.
    8254              : 
    8255              :    If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
    8256              :    beyond SIZE bytes.
    8257              : 
    8258              :    This assumes no knowledge of the current probing state, i.e. it is never
    8259              :    allowed to allocate more than PROBE_INTERVAL bytes of stack space without
    8260              :    a suitable probe.  */
    8261              : 
    8262              : static void
    8263          126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
    8264              :                              const bool int_registers_saved,
    8265              :                              const bool protection_area)
    8266              : {
    8267          126 :   struct machine_function *m = cfun->machine;
    8268              : 
    8269              :   /* If this function does not statically allocate stack space, then
    8270              :      no probes are needed.  */
    8271          126 :   if (!size)
    8272              :     {
    8273              :       /* However, the allocation of space via pushes for register
    8274              :          saves could be viewed as allocating space, but without the
    8275              :          need to probe.  */
    8276           43 :       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
    8277           23 :         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8278              :       else
    8279           20 :         dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
    8280           43 :       return;
    8281              :     }
    8282              : 
    8283              :   /* If we are a noreturn function, then we have to consider the
    8284              :      possibility that we're called via a jump rather than a call.
    8285              : 
    8286              :      Thus we don't have the implicit probe generated by saving the
    8287              :      return address into the stack at the call.  Thus, the stack
    8288              :      pointer could be anywhere in the guard page.  The safe thing
    8289              :      to do is emit a probe now.
    8290              : 
    8291              :      The probe can be avoided if we have already emitted any callee
    8292              :      register saves into the stack or have a frame pointer (which will
    8293              :      have been saved as well).  Those saves will function as implicit
    8294              :      probes.
    8295              : 
    8296              :      ?!? This should be revamped to work like aarch64 and s390 where
    8297              :      we track the offset from the most recent probe.  Normally that
    8298              :      offset would be zero.  For a noreturn function we would reset
    8299              :      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
    8300              :      we just probe when we cross PROBE_INTERVAL.  */
    8301           83 :   if (TREE_THIS_VOLATILE (cfun->decl)
    8302           15 :       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
    8303              :     {
    8304              :       /* We can safely use any register here since we're just going to push
    8305              :          its value and immediately pop it back.  But we do try and avoid
    8306              :          argument passing registers so as not to introduce dependencies in
    8307              :          the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
    8308           15 :       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
    8309           15 :       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
    8310           15 :       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
    8311           15 :       m->fs.sp_offset -= UNITS_PER_WORD;
    8312           15 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8313              :         {
    8314           15 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    8315           15 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    8316           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8317           15 :           add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
    8318           15 :           RTX_FRAME_RELATED_P (insn_push) = 1;
    8319           15 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8320           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8321           15 :           add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
    8322           15 :           RTX_FRAME_RELATED_P (insn_pop) = 1;
    8323              :         }
    8324           15 :       emit_insn (gen_blockage ());
    8325              :     }
    8326              : 
    8327           83 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8328           83 :   const int dope = 4 * UNITS_PER_WORD;
    8329              : 
    8330              :   /* If there is protection area, take it into account in the size.  */
    8331           83 :   if (protection_area)
    8332           24 :     size += probe_interval + dope;
    8333              : 
    8334              :   /* If we allocate less than the size of the guard statically,
    8335              :      then no probing is necessary, but we do need to allocate
    8336              :      the stack.  */
    8337           59 :   else if (size < (1 << param_stack_clash_protection_guard_size))
    8338              :     {
    8339           38 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8340              :                                  GEN_INT (-size), -1,
    8341           38 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    8342           38 :       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8343           38 :       return;
    8344              :     }
    8345              : 
    8346              :   /* We're allocating a large enough stack frame that we need to
    8347              :      emit probes.  Either emit them inline or in a loop depending
    8348              :      on the size.  */
    8349           45 :   if (size <= 4 * probe_interval)
    8350              :     {
    8351              :       HOST_WIDE_INT i;
    8352           47 :       for (i = probe_interval; i <= size; i += probe_interval)
    8353              :         {
    8354              :           /* Allocate PROBE_INTERVAL bytes.  */
    8355           27 :           rtx insn
    8356           27 :             = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8357              :                                          GEN_INT (-probe_interval), -1,
    8358           27 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    8359           27 :           add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
    8360              : 
    8361              :           /* And probe at *sp.  */
    8362           27 :           emit_stack_probe (stack_pointer_rtx);
    8363           27 :           emit_insn (gen_blockage ());
    8364              :         }
    8365              : 
    8366              :       /* We need to allocate space for the residual, but we do not need
    8367              :          to probe the residual...  */
    8368           20 :       HOST_WIDE_INT residual = (i - probe_interval - size);
    8369           20 :       if (residual)
    8370              :         {
    8371           20 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8372              :                                      GEN_INT (residual), -1,
    8373           20 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8374              : 
    8375              :           /* ...except if there is a protection area to maintain.  */
    8376           20 :           if (protection_area)
    8377           11 :             emit_stack_probe (stack_pointer_rtx);
    8378              :         }
    8379              : 
    8380           20 :       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
    8381              :     }
    8382              :   else
    8383              :     {
    8384              :       /* We expect the GP registers to be saved when probes are used
    8385              :          as the probing sequences might need a scratch register and
    8386              :          the routine to allocate one assumes the integer registers
    8387              :          have already been saved.  */
    8388           25 :       gcc_assert (int_registers_saved);
    8389              : 
    8390           25 :       struct scratch_reg sr;
    8391           25 :       get_scratch_register_on_entry (&sr);
    8392              : 
    8393              :       /* If we needed to save a register, then account for any space
    8394              :          that was pushed (we are not going to pop the register when
    8395              :          we do the restore).  */
    8396           25 :       if (sr.saved)
    8397            0 :         size -= UNITS_PER_WORD;
    8398              : 
    8399              :       /* Step 1: round SIZE down to a multiple of the interval.  */
    8400           25 :       HOST_WIDE_INT rounded_size = size & -probe_interval;
    8401              : 
    8402              :       /* Step 2: compute final value of the loop counter.  Use lea if
    8403              :          possible.  */
    8404           25 :       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
    8405           25 :       rtx insn;
    8406           25 :       if (address_no_seg_operand (addr, Pmode))
    8407           13 :         insn = emit_insn (gen_rtx_SET (sr.reg, addr));
    8408              :       else
    8409              :         {
    8410           12 :           emit_move_insn (sr.reg, GEN_INT (-rounded_size));
    8411           12 :           insn = emit_insn (gen_rtx_SET (sr.reg,
    8412              :                                          gen_rtx_PLUS (Pmode, sr.reg,
    8413              :                                                        stack_pointer_rtx)));
    8414              :         }
    8415           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8416              :         {
    8417           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8418           22 :                         plus_constant (Pmode, sr.reg,
    8419           22 :                                        m->fs.cfa_offset + rounded_size));
    8420           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8421              :         }
    8422              : 
    8423              :       /* Step 3: the loop.  */
    8424           25 :       rtx size_rtx = GEN_INT (rounded_size);
    8425           25 :       insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
    8426              :                                                     size_rtx));
    8427           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8428              :         {
    8429           22 :           m->fs.cfa_offset += rounded_size;
    8430           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8431           22 :                         plus_constant (Pmode, stack_pointer_rtx,
    8432           22 :                                        m->fs.cfa_offset));
    8433           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8434              :         }
    8435           25 :       m->fs.sp_offset += rounded_size;
    8436           25 :       emit_insn (gen_blockage ());
    8437              : 
    8438              :       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
    8439              :          is equal to ROUNDED_SIZE.  */
    8440              : 
    8441           25 :       if (size != rounded_size)
    8442              :         {
    8443           25 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8444              :                                      GEN_INT (rounded_size - size), -1,
    8445           25 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8446              : 
    8447           25 :           if (protection_area)
    8448           13 :             emit_stack_probe (stack_pointer_rtx);
    8449              :         }
    8450              : 
    8451           25 :       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
    8452              : 
    8453              :       /* This does not deallocate the space reserved for the scratch
    8454              :          register.  That will be deallocated in the epilogue.  */
    8455           25 :       release_scratch_register_on_entry (&sr, size, false);
    8456              :     }
    8457              : 
    8458              :   /* Adjust back to account for the protection area.  */
    8459           45 :   if (protection_area)
    8460           24 :     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8461           24 :                                GEN_INT (probe_interval + dope), -1,
    8462           24 :                                m->fs.cfa_reg == stack_pointer_rtx);
    8463              : 
    8464              :   /* Make sure nothing is scheduled before we are done.  */
    8465           45 :   emit_insn (gen_blockage ());
    8466              : }
    8467              : 
    8468              : /* Adjust the stack pointer up to REG while probing it.  */
    8469              : 
    8470              : const char *
    8471           25 : output_adjust_stack_and_probe (rtx reg)
    8472              : {
    8473           25 :   static int labelno = 0;
    8474           25 :   char loop_lab[32];
    8475           25 :   rtx xops[2];
    8476              : 
    8477           25 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8478              : 
    8479              :   /* Loop.  */
    8480           25 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8481              : 
    8482              :   /* SP = SP + PROBE_INTERVAL.  */
    8483           25 :   xops[0] = stack_pointer_rtx;
    8484           37 :   xops[1] = GEN_INT (get_probe_interval ());
    8485           25 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8486              : 
    8487              :   /* Probe at SP.  */
    8488           25 :   xops[1] = const0_rtx;
    8489           25 :   output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
    8490              : 
    8491              :   /* Test if SP == LAST_ADDR.  */
    8492           25 :   xops[0] = stack_pointer_rtx;
    8493           25 :   xops[1] = reg;
    8494           25 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8495              : 
    8496              :   /* Branch.  */
    8497           25 :   fputs ("\tjne\t", asm_out_file);
    8498           25 :   assemble_name_raw (asm_out_file, loop_lab);
    8499           25 :   fputc ('\n', asm_out_file);
    8500              : 
    8501           25 :   return "";
    8502              : }
    8503              : 
    8504              : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
    8505              :    inclusive.  These are offsets from the current stack pointer.
    8506              : 
    8507              :    INT_REGISTERS_SAVED is true if integer registers have already been
    8508              :    pushed on the stack.  */
    8509              : 
    8510              : static void
    8511            0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
    8512              :                              const bool int_registers_saved)
    8513              : {
    8514            0 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8515              : 
    8516              :   /* See if we have a constant small number of probes to generate.  If so,
    8517              :      that's the easy case.  The run-time loop is made up of 6 insns in the
    8518              :      generic case while the compile-time loop is made up of n insns for n #
    8519              :      of intervals.  */
    8520            0 :   if (size <= 6 * probe_interval)
    8521              :     {
    8522              :       HOST_WIDE_INT i;
    8523              : 
    8524              :       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
    8525              :          it exceeds SIZE.  If only one probe is needed, this will not
    8526              :          generate any code.  Then probe at FIRST + SIZE.  */
    8527            0 :       for (i = probe_interval; i < size; i += probe_interval)
    8528            0 :         emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8529            0 :                                          -(first + i)));
    8530              : 
    8531            0 :       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8532            0 :                                        -(first + size)));
    8533              :     }
    8534              : 
    8535              :   /* Otherwise, do the same as above, but in a loop.  Note that we must be
    8536              :      extra careful with variables wrapping around because we might be at
    8537              :      the very top (or the very bottom) of the address space and we have
    8538              :      to be able to handle this case properly; in particular, we use an
    8539              :      equality test for the loop condition.  */
    8540              :   else
    8541              :     {
    8542              :       /* We expect the GP registers to be saved when probes are used
    8543              :          as the probing sequences might need a scratch register and
    8544              :          the routine to allocate one assumes the integer registers
    8545              :          have already been saved.  */
    8546            0 :       gcc_assert (int_registers_saved);
    8547              : 
    8548            0 :       HOST_WIDE_INT rounded_size, last;
    8549            0 :       struct scratch_reg sr;
    8550              : 
    8551            0 :       get_scratch_register_on_entry (&sr);
    8552              : 
    8553              : 
    8554              :       /* Step 1: round SIZE to the previous multiple of the interval.  */
    8555              : 
    8556            0 :       rounded_size = ROUND_DOWN (size, probe_interval);
    8557              : 
    8558              : 
    8559              :       /* Step 2: compute initial and final value of the loop counter.  */
    8560              : 
    8561              :       /* TEST_OFFSET = FIRST.  */
    8562            0 :       emit_move_insn (sr.reg, GEN_INT (-first));
    8563              : 
    8564              :       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
    8565            0 :       last = first + rounded_size;
    8566              : 
    8567              : 
    8568              :       /* Step 3: the loop
    8569              : 
    8570              :          do
    8571              :            {
    8572              :              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
    8573              :              probe at TEST_ADDR
    8574              :            }
    8575              :          while (TEST_ADDR != LAST_ADDR)
    8576              : 
    8577              :          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
    8578              :          until it is equal to ROUNDED_SIZE.  */
    8579              : 
    8580            0 :       emit_insn
    8581            0 :         (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
    8582              : 
    8583              : 
    8584              :       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
    8585              :          that SIZE is equal to ROUNDED_SIZE.  */
    8586              : 
    8587            0 :       if (size != rounded_size)
    8588            0 :         emit_stack_probe (plus_constant (Pmode,
    8589            0 :                                          gen_rtx_PLUS (Pmode,
    8590              :                                                        stack_pointer_rtx,
    8591              :                                                        sr.reg),
    8592            0 :                                          rounded_size - size));
    8593              : 
    8594            0 :       release_scratch_register_on_entry (&sr, size, true);
    8595              :     }
    8596              : 
    8597              :   /* Make sure nothing is scheduled before we are done.  */
    8598            0 :   emit_insn (gen_blockage ());
    8599            0 : }
    8600              : 
    8601              : /* Probe a range of stack addresses from REG to END, inclusive.  These are
    8602              :    offsets from the current stack pointer.  */
    8603              : 
    8604              : const char *
    8605            0 : output_probe_stack_range (rtx reg, rtx end)
    8606              : {
    8607            0 :   static int labelno = 0;
    8608            0 :   char loop_lab[32];
    8609            0 :   rtx xops[3];
    8610              : 
    8611            0 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8612              : 
    8613              :   /* Loop.  */
    8614            0 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8615              : 
    8616              :   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
    8617            0 :   xops[0] = reg;
    8618            0 :   xops[1] = GEN_INT (get_probe_interval ());
    8619            0 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8620              : 
    8621              :   /* Probe at TEST_ADDR.  */
    8622            0 :   xops[0] = stack_pointer_rtx;
    8623            0 :   xops[1] = reg;
    8624            0 :   xops[2] = const0_rtx;
    8625            0 :   output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
    8626              : 
    8627              :   /* Test if TEST_ADDR == LAST_ADDR.  */
    8628            0 :   xops[0] = reg;
    8629            0 :   xops[1] = end;
    8630            0 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8631              : 
    8632              :   /* Branch.  */
    8633            0 :   fputs ("\tjne\t", asm_out_file);
    8634            0 :   assemble_name_raw (asm_out_file, loop_lab);
    8635            0 :   fputc ('\n', asm_out_file);
    8636              : 
    8637            0 :   return "";
    8638              : }
    8639              : 
    8640              : /* Data passed to ix86_update_stack_alignment.  */
    8641              : struct stack_access_data
    8642              : {
    8643              :   /* The stack access register.  */
    8644              :   const_rtx reg;
    8645              :   /* Pointer to stack alignment.  */
    8646              :   unsigned int *stack_alignment;
    8647              : };
    8648              : 
    8649              : /* Return true if OP references an argument passed on stack.  */
    8650              : 
    8651              : static bool
    8652       135312 : ix86_argument_passed_on_stack_p (const_rtx op)
    8653              : {
    8654       135312 :   tree mem_expr = MEM_EXPR (op);
    8655       135312 :   if (mem_expr)
    8656              :     {
    8657       133440 :       tree var = get_base_address (mem_expr);
    8658       133440 :       return TREE_CODE (var) == PARM_DECL;
    8659              :     }
    8660              :   return false;
    8661              : }
    8662              : 
    8663              : /* Update the maximum stack slot alignment from memory alignment in PAT.  */
    8664              : 
    8665              : static void
    8666       168807 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
    8667              : {
    8668              :   /* This insn may reference stack slot.  Update the maximum stack slot
    8669              :      alignment if the memory is referenced by the stack access register. */
    8670       168807 :   stack_access_data *p = (stack_access_data *) data;
    8671              : 
    8672       168807 :   subrtx_iterator::array_type array;
    8673       705888 :   FOR_EACH_SUBRTX (iter, array, pat, ALL)
    8674              :     {
    8675       565743 :       auto op = *iter;
    8676       565743 :       if (MEM_P (op))
    8677              :         {
    8678              :           /* NB: Ignore arguments passed on stack since caller is
    8679              :              responsible to align the outgoing stack for arguments
    8680              :              passed on stack.  */
    8681       165565 :           if (reg_mentioned_p (p->reg, XEXP (op, 0))
    8682       165565 :               && !ix86_argument_passed_on_stack_p (op))
    8683              :             {
    8684        28662 :               unsigned int alignment = MEM_ALIGN (op);
    8685              : 
    8686        28662 :               if (alignment > *p->stack_alignment)
    8687        28581 :                 *p->stack_alignment = alignment;
    8688              :               break;
    8689              :             }
    8690              :           else
    8691       136903 :             iter.skip_subrtxes ();
    8692              :         }
    8693              :     }
    8694       168807 : }
    8695              : 
    8696              : /* Helper function for ix86_find_all_reg_uses.  */
    8697              : 
    8698              : static void
    8699     45391300 : ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
    8700              :                           rtx set, unsigned int regno,
    8701              :                           auto_bitmap &worklist)
    8702              : {
    8703     45391300 :   rtx dest = SET_DEST (set);
    8704              : 
    8705     45391300 :   if (!REG_P (dest))
    8706     41119175 :     return;
    8707              : 
    8708              :   /* Reject non-Pmode modes.  */
    8709     34368672 :   if (GET_MODE (dest) != Pmode)
    8710              :     return;
    8711              : 
    8712     18201057 :   unsigned int dst_regno = REGNO (dest);
    8713              : 
    8714     18201057 :   if (TEST_HARD_REG_BIT (regset, dst_regno))
    8715              :     return;
    8716              : 
    8717      4272125 :   const_rtx src = SET_SRC (set);
    8718              : 
    8719      4272125 :   subrtx_iterator::array_type array;
    8720      8494657 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8721              :     {
    8722      5494699 :       auto op = *iter;
    8723              : 
    8724      5494699 :       if (MEM_P (op))
    8725      2979477 :         iter.skip_subrtxes ();
    8726              : 
    8727      5494699 :       if (REG_P (op) && REGNO (op) == regno)
    8728              :         {
    8729              :           /* Add this register to register set.  */
    8730      1440603 :           add_to_hard_reg_set (&regset, Pmode, dst_regno);
    8731      1272167 :           bitmap_set_bit (worklist, dst_regno);
    8732      1272167 :           break;
    8733              :         }
    8734              :     }
    8735      4272125 : }
    8736              : 
    8737              : /* Find all registers defined with register REGNO.  */
    8738              : 
    8739              : static void
    8740      2291425 : ix86_find_all_reg_uses (HARD_REG_SET &regset,
    8741              :                         unsigned int regno, auto_bitmap &worklist)
    8742              : {
    8743      2291425 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8744     81573571 :        ref != NULL;
    8745     79282146 :        ref = DF_REF_NEXT_REG (ref))
    8746              :     {
    8747     79282146 :       if (DF_REF_IS_ARTIFICIAL (ref))
    8748     16556898 :         continue;
    8749              : 
    8750     62725248 :       rtx_insn *insn = DF_REF_INSN (ref);
    8751              : 
    8752     62725248 :       if (!NONJUMP_INSN_P (insn))
    8753     17992908 :         continue;
    8754              : 
    8755     44732340 :       unsigned int ref_regno = DF_REF_REGNO (ref);
    8756              : 
    8757     44732340 :       rtx set = single_set (insn);
    8758     44732340 :       if (set)
    8759              :         {
    8760     43958863 :           ix86_find_all_reg_uses_1 (regset, set,
    8761              :                                     ref_regno, worklist);
    8762     43958863 :           continue;
    8763              :         }
    8764              : 
    8765       773477 :       rtx pat = PATTERN (insn);
    8766       773477 :       if (GET_CODE (pat) != PARALLEL)
    8767       124306 :         continue;
    8768              : 
    8769      2505771 :       for (int i = 0; i < XVECLEN (pat, 0); i++)
    8770              :         {
    8771      1856600 :           rtx exp = XVECEXP (pat, 0, i);
    8772              : 
    8773      1856600 :           if (GET_CODE (exp) == SET)
    8774      1432437 :             ix86_find_all_reg_uses_1 (regset, exp,
    8775              :                                       ref_regno, worklist);
    8776              :         }
    8777              :     }
    8778      2291425 : }
    8779              : 
    8780              : /* Return true if the hard register REGNO used for a stack access is
    8781              :    defined in a basic block that dominates the block where it is used.  */
    8782              : 
    8783              : static bool
    8784        39627 : ix86_access_stack_p (unsigned int regno, basic_block bb,
    8785              :                      HARD_REG_SET &set_up_by_prologue,
    8786              :                      HARD_REG_SET &prologue_used,
    8787              :                      auto_bitmap reg_dominate_bbs_known[],
    8788              :                      auto_bitmap reg_dominate_bbs[])
    8789              : {
    8790        39627 :   if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
    8791        10575 :     return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
    8792              : 
    8793        29052 :   bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
    8794              : 
    8795              :   /* Get all BBs which set REGNO and dominate the current BB from all
    8796              :      DEFs of REGNO.  */
    8797        29052 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    8798      1550310 :        def;
    8799      1521258 :        def = DF_REF_NEXT_REG (def))
    8800      1548762 :     if (!DF_REF_IS_ARTIFICIAL (def)
    8801      1546902 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    8802      1520044 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    8803              :       {
    8804      1518171 :         basic_block set_bb = DF_REF_BB (def);
    8805      1518171 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    8806              :           {
    8807        85618 :             rtx_insn *insn = DF_REF_INSN (def);
    8808              :             /* Return true if INSN requires stack.  */
    8809        85618 :             if (requires_stack_frame_p (insn, prologue_used,
    8810              :                                         set_up_by_prologue))
    8811              :               {
    8812        27504 :                 bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
    8813        27504 :                 return true;
    8814              :               }
    8815              :           }
    8816              :       }
    8817              : 
    8818              :   /* When we get here, REGNO used in the current BB doesn't access
    8819              :      stack.  */
    8820              :   return false;
    8821              : }
    8822              : 
    8823              : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
    8824              :    needs alignment > ALIGNMENT.  */
    8825              : 
    8826              : static bool
    8827     27715514 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
    8828              : {
    8829     27715514 :   bool need_alignment = MEM_ALIGN (op) > alignment;
    8830     27715514 :   tree mem_expr = MEM_EXPR (op);
    8831     27715514 :   if (!mem_expr)
    8832              :     return need_alignment;
    8833              : 
    8834     22685841 :   tree var = get_base_address (mem_expr);
    8835     22685841 :   if (!VAR_P (var) || !DECL_RTL_SET_P (var))
    8836              :     return need_alignment;
    8837              : 
    8838     14375495 :   rtx x = DECL_RTL (var);
    8839     14375495 :   if (!MEM_P (x))
    8840              :     return need_alignment;
    8841              : 
    8842     14375492 :   x = XEXP (x, 0);
    8843     14375492 :   return !SYMBOLIC_CONST (x) && need_alignment;
    8844              : }
    8845              : 
    8846              : /* Return true if SET needs alignment > ALIGNMENT.  */
    8847              : 
    8848              : static bool
    8849     45390590 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
    8850              : {
    8851     45390590 :   rtx dest = SET_DEST (set);
    8852              : 
    8853     45390590 :   if (MEM_P (dest))
    8854     17168816 :     return ix86_need_alignment_p_2 (dest, alignment);
    8855              : 
    8856     28221774 :   const_rtx src = SET_SRC (set);
    8857              : 
    8858     28221774 :   subrtx_iterator::array_type array;
    8859     81708055 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8860              :     {
    8861     64032979 :       auto op = *iter;
    8862              : 
    8863     64032979 :       if (MEM_P (op))
    8864     10546698 :         return ix86_need_alignment_p_2 (op, alignment);
    8865              :     }
    8866              : 
    8867     17675076 :   return false;
    8868     28221774 : }
    8869              : 
    8870              : /* Return true if INSN needs alignment > ALIGNMENT.  */
    8871              : 
    8872              : static bool
    8873     44732340 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
    8874              : {
    8875     44732340 :   rtx set = single_set (insn);
    8876     44732340 :   if (set)
    8877     43958863 :     return ix86_need_alignment_p_1 (set, alignment);
    8878              : 
    8879       773477 :   rtx pat = PATTERN (insn);
    8880       773477 :   if (GET_CODE (pat) != PARALLEL)
    8881              :     return false;
    8882              : 
    8883      2504072 :   for (int i = 0; i < XVECLEN (pat, 0); i++)
    8884              :     {
    8885      1855694 :       rtx exp = XVECEXP (pat, 0, i);
    8886              : 
    8887      1855694 :       if (GET_CODE (exp) == SET
    8888      1855694 :           && ix86_need_alignment_p_1 (exp, alignment))
    8889              :         return true;
    8890              :     }
    8891              : 
    8892              :   return false;
    8893              : }
    8894              : 
    8895              : /* Set stack_frame_required to false if stack frame isn't required.
    8896              :    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    8897              :    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
    8898              : 
    8899              : static void
    8900      1481410 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
    8901              :                                     bool check_stack_slot)
    8902              : {
    8903      1481410 :   HARD_REG_SET set_up_by_prologue, prologue_used;
    8904      1481410 :   basic_block bb;
    8905              : 
    8906      5925640 :   CLEAR_HARD_REG_SET (prologue_used);
    8907      1481410 :   CLEAR_HARD_REG_SET (set_up_by_prologue);
    8908      1608055 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
    8909      1481410 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
    8910      1481410 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
    8911              :                        HARD_FRAME_POINTER_REGNUM);
    8912              : 
    8913      1481410 :   bool require_stack_frame = false;
    8914              : 
    8915     15826711 :   FOR_EACH_BB_FN (bb, cfun)
    8916              :     {
    8917     14345301 :       rtx_insn *insn;
    8918     90173956 :       FOR_BB_INSNS (bb, insn)
    8919     83678730 :         if (NONDEBUG_INSN_P (insn)
    8920     83678730 :             && requires_stack_frame_p (insn, prologue_used,
    8921              :                                        set_up_by_prologue))
    8922              :           {
    8923              :             require_stack_frame = true;
    8924              :             break;
    8925              :           }
    8926              :     }
    8927              : 
    8928      1481410 :   cfun->machine->stack_frame_required = require_stack_frame;
    8929              : 
    8930              :   /* Stop if we don't need to check stack slot.  */
    8931      1481410 :   if (!check_stack_slot)
    8932       787900 :     return;
    8933              : 
    8934              :   /* The preferred stack alignment is the minimum stack alignment.  */
    8935       693510 :   if (stack_alignment > crtl->preferred_stack_boundary)
    8936       142938 :     stack_alignment = crtl->preferred_stack_boundary;
    8937              : 
    8938              :   HARD_REG_SET stack_slot_access;
    8939       693510 :   CLEAR_HARD_REG_SET (stack_slot_access);
    8940              : 
    8941              :   /* Stack slot can be accessed by stack pointer, frame pointer or
    8942              :      registers defined by stack pointer or frame pointer.  */
    8943       693510 :   auto_bitmap worklist;
    8944              : 
    8945       753052 :   add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
    8946       693510 :   bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
    8947              : 
    8948       693510 :   if (frame_pointer_needed)
    8949              :     {
    8950       334773 :       add_to_hard_reg_set (&stack_slot_access, Pmode,
    8951              :                            HARD_FRAME_POINTER_REGNUM);
    8952       325748 :       bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
    8953              :     }
    8954              : 
    8955              :   /* Registers on HARD_STACK_SLOT_ACCESS always access stack.  */
    8956       693510 :   HARD_REG_SET hard_stack_slot_access = stack_slot_access;
    8957              : 
    8958       693510 :   calculate_dominance_info (CDI_DOMINATORS);
    8959              : 
    8960      2291425 :   unsigned int regno;
    8961              : 
    8962      2291425 :   do
    8963              :     {
    8964      2291425 :       regno = bitmap_clear_first_set_bit (worklist);
    8965      2291425 :       ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
    8966              :     }
    8967      2291425 :   while (!bitmap_empty_p (worklist));
    8968              : 
    8969              :   hard_reg_set_iterator hrsi;
    8970              :   stack_access_data data;
    8971              : 
    8972    128299350 :   auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
    8973    128299350 :   auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
    8974              : 
    8975       693510 :   data.stack_alignment = &stack_alignment;
    8976              : 
    8977      2984935 :   EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
    8978              :     {
    8979      2291425 :       for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8980     81573571 :            ref != NULL;
    8981     79282146 :            ref = DF_REF_NEXT_REG (ref))
    8982              :         {
    8983     79282146 :           if (DF_REF_IS_ARTIFICIAL (ref))
    8984     16556898 :             continue;
    8985              : 
    8986     62725248 :           rtx_insn *insn = DF_REF_INSN (ref);
    8987              : 
    8988     62725248 :           if (!NONJUMP_INSN_P (insn))
    8989     17992908 :             continue;
    8990              : 
    8991              :           /* Call ix86_access_stack_p only if INSN needs alignment >
    8992              :              STACK_ALIGNMENT.  */
    8993     44732340 :           if (ix86_need_alignment_p (insn, stack_alignment)
    8994     44732340 :               && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
    8995        39627 :                   || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
    8996              :                                           set_up_by_prologue,
    8997              :                                           prologue_used,
    8998              :                                           reg_dominate_bbs_known,
    8999              :                                           reg_dominate_bbs)))
    9000              :             {
    9001              :               /* Update stack alignment if REGNO is used for stack
    9002              :                  access.  */
    9003       162255 :               data.reg = DF_REF_REG (ref);
    9004       162255 :               note_stores (insn, ix86_update_stack_alignment, &data);
    9005              :             }
    9006              :         }
    9007              :     }
    9008              : 
    9009       693510 :   free_dominance_info (CDI_DOMINATORS);
    9010    129686370 : }
    9011              : 
    9012              : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
    9013              :    will guide prologue/epilogue to be generated in correct form.  */
    9014              : 
    9015              : static void
    9016      3436762 : ix86_finalize_stack_frame_flags (void)
    9017              : {
    9018              :   /* Check if stack realign is really needed after reload, and
    9019              :      stores result in cfun */
    9020      3436762 :   unsigned int incoming_stack_boundary
    9021      3436762 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    9022      3436762 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    9023      3436762 :   unsigned int stack_alignment
    9024      1179863 :     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
    9025      4616625 :        ? crtl->max_used_stack_slot_alignment
    9026      3436762 :        : crtl->stack_alignment_needed);
    9027      3436762 :   unsigned int stack_realign
    9028      3436762 :     = (incoming_stack_boundary < stack_alignment);
    9029      3436762 :   bool recompute_frame_layout_p = false;
    9030              : 
    9031      3436762 :   if (crtl->stack_realign_finalized)
    9032              :     {
    9033              :       /* After stack_realign_needed is finalized, we can't no longer
    9034              :          change it.  */
    9035      1955352 :       gcc_assert (crtl->stack_realign_needed == stack_realign);
    9036      1955352 :       return;
    9037              :     }
    9038              : 
    9039              :   /* It is always safe to compute max_used_stack_alignment.  We
    9040              :      compute it only if 128-bit aligned load/store may be generated
    9041              :      on misaligned stack slot which will lead to segfault. */
    9042      2962820 :   bool check_stack_slot
    9043      1481410 :     = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
    9044      1481410 :   ix86_find_max_used_stack_alignment (stack_alignment,
    9045              :                                       check_stack_slot);
    9046              : 
    9047              :   /* If the only reason for frame_pointer_needed is that we conservatively
    9048              :      assumed stack realignment might be needed or -fno-omit-frame-pointer
    9049              :      is used, but in the end nothing that needed the stack alignment had
    9050              :      been spilled nor stack access, clear frame_pointer_needed and say we
    9051              :      don't need stack realignment.
    9052              : 
    9053              :      When vector register is used for piecewise move and store, we don't
    9054              :      increase stack_alignment_needed as there is no register spill for
    9055              :      piecewise move and store.  Since stack_realign_needed is set to true
    9056              :      by checking stack_alignment_estimated which is updated by pseudo
    9057              :      vector register usage, we also need to check stack_realign_needed to
    9058              :      eliminate frame pointer.  */
    9059      1481410 :   if ((stack_realign
    9060      1415168 :        || (!flag_omit_frame_pointer && optimize)
    9061      1404918 :        || crtl->stack_realign_needed)
    9062        77151 :       && frame_pointer_needed
    9063        77151 :       && crtl->is_leaf
    9064        52686 :       && crtl->sp_is_unchanging
    9065        52634 :       && !ix86_current_function_calls_tls_descriptor
    9066        52634 :       && !crtl->accesses_prior_frames
    9067        52634 :       && !cfun->calls_alloca
    9068        52634 :       && !crtl->calls_eh_return
    9069              :       /* See ira_setup_eliminable_regset for the rationale.  */
    9070        52634 :       && !(STACK_CHECK_MOVING_SP
    9071        52634 :            && flag_stack_check
    9072            0 :            && flag_exceptions
    9073            0 :            && cfun->can_throw_non_call_exceptions)
    9074        52634 :       && !ix86_frame_pointer_required ()
    9075        52633 :       && ix86_get_frame_size () == 0
    9076        34964 :       && ix86_nsaved_sseregs () == 0
    9077      1516374 :       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
    9078              :     {
    9079        34964 :       if (cfun->machine->stack_frame_required)
    9080              :         {
    9081              :           /* Stack frame is required.  If stack alignment needed is less
    9082              :              than incoming stack boundary, don't realign stack.  */
    9083          287 :           stack_realign = incoming_stack_boundary < stack_alignment;
    9084          287 :           if (!stack_realign)
    9085              :             {
    9086          287 :               crtl->max_used_stack_slot_alignment
    9087          287 :                 = incoming_stack_boundary;
    9088          287 :               crtl->stack_alignment_needed
    9089          287 :                 = incoming_stack_boundary;
    9090              :               /* Also update preferred_stack_boundary for leaf
    9091              :                  functions.  */
    9092          287 :               crtl->preferred_stack_boundary
    9093          287 :                 = incoming_stack_boundary;
    9094              :             }
    9095              :         }
    9096              :       else
    9097              :         {
    9098              :           /* If drap has been set, but it actually isn't live at the
    9099              :              start of the function, there is no reason to set it up.  */
    9100        34677 :           if (crtl->drap_reg)
    9101              :             {
    9102           35 :               basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9103           70 :               if (! REGNO_REG_SET_P (DF_LR_IN (bb),
    9104              :                                      REGNO (crtl->drap_reg)))
    9105              :                 {
    9106           35 :                   crtl->drap_reg = NULL_RTX;
    9107           35 :                   crtl->need_drap = false;
    9108              :                 }
    9109              :             }
    9110              :           else
    9111        34642 :             cfun->machine->no_drap_save_restore = true;
    9112              : 
    9113        34677 :           frame_pointer_needed = false;
    9114        34677 :           stack_realign = false;
    9115        34677 :           crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
    9116        34677 :           crtl->stack_alignment_needed = incoming_stack_boundary;
    9117        34677 :           crtl->stack_alignment_estimated = incoming_stack_boundary;
    9118        34677 :           if (crtl->preferred_stack_boundary > incoming_stack_boundary)
    9119            1 :             crtl->preferred_stack_boundary = incoming_stack_boundary;
    9120        34677 :           df_finish_pass (true);
    9121        34677 :           df_scan_alloc (NULL);
    9122        34677 :           df_scan_blocks ();
    9123        34677 :           df_compute_regs_ever_live (true);
    9124        34677 :           df_analyze ();
    9125              : 
    9126        34677 :           if (flag_var_tracking)
    9127              :             {
    9128              :               /* Since frame pointer is no longer available, replace it with
    9129              :                  stack pointer - UNITS_PER_WORD in debug insns.  */
    9130          134 :               df_ref ref, next;
    9131          134 :               for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
    9132          134 :                    ref; ref = next)
    9133              :                 {
    9134            0 :                   next = DF_REF_NEXT_REG (ref);
    9135            0 :                   if (!DF_REF_INSN_INFO (ref))
    9136            0 :                     continue;
    9137              : 
    9138              :                   /* Make sure the next ref is for a different instruction,
    9139              :                      so that we're not affected by the rescan.  */
    9140            0 :                   rtx_insn *insn = DF_REF_INSN (ref);
    9141            0 :                   while (next && DF_REF_INSN (next) == insn)
    9142            0 :                     next = DF_REF_NEXT_REG (next);
    9143              : 
    9144            0 :                   if (DEBUG_INSN_P (insn))
    9145              :                     {
    9146              :                       bool changed = false;
    9147            0 :                       for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    9148              :                         {
    9149            0 :                           rtx *loc = DF_REF_LOC (ref);
    9150            0 :                           if (*loc == hard_frame_pointer_rtx)
    9151              :                             {
    9152            0 :                               *loc = plus_constant (Pmode,
    9153              :                                                     stack_pointer_rtx,
    9154            0 :                                                     -UNITS_PER_WORD);
    9155            0 :                               changed = true;
    9156              :                             }
    9157              :                         }
    9158            0 :                       if (changed)
    9159            0 :                         df_insn_rescan (insn);
    9160              :                     }
    9161              :                 }
    9162              :             }
    9163              : 
    9164              :           recompute_frame_layout_p = true;
    9165              :         }
    9166              :     }
    9167      1446446 :   else if (crtl->max_used_stack_slot_alignment >= 128
    9168       657522 :            && cfun->machine->stack_frame_required)
    9169              :     {
    9170              :       /* We don't need to realign stack.  max_used_stack_alignment is
    9171              :          used to decide how stack frame should be aligned.  This is
    9172              :          independent of any psABIs nor 32-bit vs 64-bit.  */
    9173       612452 :       cfun->machine->max_used_stack_alignment
    9174       612452 :         = stack_alignment / BITS_PER_UNIT;
    9175              :     }
    9176              : 
    9177      1481410 :   if (crtl->stack_realign_needed != stack_realign)
    9178        35197 :     recompute_frame_layout_p = true;
    9179      1481410 :   crtl->stack_realign_needed = stack_realign;
    9180      1481410 :   crtl->stack_realign_finalized = true;
    9181      1481410 :   if (recompute_frame_layout_p)
    9182        35290 :     ix86_compute_frame_layout ();
    9183              : }
    9184              : 
    9185              : /* Delete SET_GOT right after entry block if it is allocated to reg.  */
    9186              : 
    9187              : static void
    9188            0 : ix86_elim_entry_set_got (rtx reg)
    9189              : {
    9190            0 :   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9191            0 :   rtx_insn *c_insn = BB_HEAD (bb);
    9192            0 :   if (!NONDEBUG_INSN_P (c_insn))
    9193            0 :     c_insn = next_nonnote_nondebug_insn (c_insn);
    9194            0 :   if (c_insn && NONJUMP_INSN_P (c_insn))
    9195              :     {
    9196            0 :       rtx pat = PATTERN (c_insn);
    9197            0 :       if (GET_CODE (pat) == PARALLEL)
    9198              :         {
    9199            0 :           rtx set = XVECEXP (pat, 0, 0);
    9200            0 :           if (GET_CODE (set) == SET
    9201            0 :               && GET_CODE (SET_SRC (set)) == UNSPEC
    9202            0 :               && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
    9203            0 :               && REGNO (SET_DEST (set)) == REGNO (reg))
    9204            0 :             delete_insn (c_insn);
    9205              :         }
    9206              :     }
    9207            0 : }
    9208              : 
    9209              : static rtx
    9210       193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
    9211              : {
    9212       193166 :   rtx addr, mem;
    9213              : 
    9214       193166 :   if (offset)
    9215       184480 :     addr = plus_constant (Pmode, frame_reg, offset);
    9216       193166 :   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
    9217       193166 :   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
    9218              : }
    9219              : 
    9220              : static inline rtx
    9221       100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
    9222              : {
    9223       100333 :   return gen_frame_set (reg, frame_reg, offset, false);
    9224              : }
    9225              : 
    9226              : static inline rtx
    9227        92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
    9228              : {
    9229        92833 :   return gen_frame_set (reg, frame_reg, offset, true);
    9230              : }
    9231              : 
    9232              : static void
    9233         7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
    9234              : {
    9235         7045 :   struct machine_function *m = cfun->machine;
    9236         7045 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
    9237         7045 :                           + m->call_ms2sysv_extra_regs;
    9238         7045 :   rtvec v = rtvec_alloc (ncregs + 1);
    9239         7045 :   unsigned int align, i, vi = 0;
    9240         7045 :   rtx_insn *insn;
    9241         7045 :   rtx sym, addr;
    9242         7045 :   rtx rax = gen_rtx_REG (word_mode, AX_REG);
    9243         7045 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
    9244              : 
    9245              :   /* AL should only be live with sysv_abi.  */
    9246         7045 :   gcc_assert (!ix86_eax_live_at_start_p ());
    9247         7045 :   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
    9248              : 
    9249              :   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
    9250              :      we've actually realigned the stack or not.  */
    9251         7045 :   align = GET_MODE_ALIGNMENT (V4SFmode);
    9252         7045 :   addr = choose_baseaddr (frame.stack_realign_offset
    9253         7045 :                           + xlogue.get_stub_ptr_offset (), &align, AX_REG);
    9254         7045 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
    9255              : 
    9256         7045 :   emit_insn (gen_rtx_SET (rax, addr));
    9257              : 
    9258              :   /* Get the stub symbol.  */
    9259         8327 :   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
    9260              :                                                   : XLOGUE_STUB_SAVE);
    9261         7045 :   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
    9262              : 
    9263        99878 :   for (i = 0; i < ncregs; ++i)
    9264              :     {
    9265        92833 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
    9266        92833 :       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
    9267        92833 :                              r.regno);
    9268        92833 :       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
    9269              :     }
    9270              : 
    9271         7045 :   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
    9272              : 
    9273         7045 :   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
    9274         7045 :   RTX_FRAME_RELATED_P (insn) = true;
    9275         7045 : }
    9276              : 
    9277              : /* Generate and return an insn body to AND X with Y.  */
    9278              : 
    9279              : static rtx_insn *
    9280        31704 : gen_and2_insn (rtx x, rtx y)
    9281              : {
    9282        31704 :   enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
    9283              : 
    9284        31704 :   gcc_assert (insn_operand_matches (icode, 0, x));
    9285        31704 :   gcc_assert (insn_operand_matches (icode, 1, x));
    9286        31704 :   gcc_assert (insn_operand_matches (icode, 2, y));
    9287              : 
    9288        31704 :   return GEN_FCN (icode) (x, x, y);
    9289              : }
    9290              : 
    9291              : /* Expand the prologue into a bunch of separate insns.  */
    9292              : 
    9293              : void
    9294      1526209 : ix86_expand_prologue (void)
    9295              : {
    9296      1526209 :   struct machine_function *m = cfun->machine;
    9297      1526209 :   rtx insn, t;
    9298      1526209 :   HOST_WIDE_INT allocate;
    9299      1526209 :   bool int_registers_saved;
    9300      1526209 :   bool sse_registers_saved;
    9301      1526209 :   bool save_stub_call_needed;
    9302      1526209 :   rtx static_chain = NULL_RTX;
    9303              : 
    9304      1526209 :   ix86_last_zero_store_uid = 0;
    9305      1526209 :   if (ix86_function_naked (current_function_decl))
    9306              :     {
    9307           74 :       if (flag_stack_usage_info)
    9308            0 :         current_function_static_stack_size = 0;
    9309           74 :       return;
    9310              :     }
    9311              : 
    9312      1526135 :   ix86_finalize_stack_frame_flags ();
    9313              : 
    9314              :   /* DRAP should not coexist with stack_realign_fp */
    9315      1526135 :   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
    9316              : 
    9317      1526135 :   memset (&m->fs, 0, sizeof (m->fs));
    9318              : 
    9319              :   /* Initialize CFA state for before the prologue.  */
    9320      1526135 :   m->fs.cfa_reg = stack_pointer_rtx;
    9321      1526135 :   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
    9322              : 
    9323              :   /* Track SP offset to the CFA.  We continue tracking this after we've
    9324              :      swapped the CFA register away from SP.  In the case of re-alignment
    9325              :      this is fudged; we're interested to offsets within the local frame.  */
    9326      1526135 :   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9327      1526135 :   m->fs.sp_valid = true;
    9328      1526135 :   m->fs.sp_realigned = false;
    9329              : 
    9330      1526135 :   const struct ix86_frame &frame = cfun->machine->frame;
    9331              : 
    9332      1526135 :   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
    9333              :     {
    9334              :       /* We should have already generated an error for any use of
    9335              :          ms_hook on a nested function.  */
    9336            0 :       gcc_checking_assert (!ix86_static_chain_on_stack);
    9337              : 
    9338              :       /* Check if profiling is active and we shall use profiling before
    9339              :          prologue variant. If so sorry.  */
    9340            0 :       if (crtl->profile && flag_fentry != 0)
    9341            0 :         sorry ("%<ms_hook_prologue%> attribute is not compatible "
    9342              :                "with %<-mfentry%> for 32-bit");
    9343              : 
    9344              :       /* In ix86_asm_output_function_label we emitted:
    9345              :          8b ff     movl.s %edi,%edi
    9346              :          55        push   %ebp
    9347              :          8b ec     movl.s %esp,%ebp
    9348              : 
    9349              :          This matches the hookable function prologue in Win32 API
    9350              :          functions in Microsoft Windows XP Service Pack 2 and newer.
    9351              :          Wine uses this to enable Windows apps to hook the Win32 API
    9352              :          functions provided by Wine.
    9353              : 
    9354              :          What that means is that we've already set up the frame pointer.  */
    9355              : 
    9356            0 :       if (frame_pointer_needed
    9357            0 :           && !(crtl->drap_reg && crtl->stack_realign_needed))
    9358              :         {
    9359            0 :           rtx push, mov;
    9360              : 
    9361              :           /* We've decided to use the frame pointer already set up.
    9362              :              Describe this to the unwinder by pretending that both
    9363              :              push and mov insns happen right here.
    9364              : 
    9365              :              Putting the unwind info here at the end of the ms_hook
    9366              :              is done so that we can make absolutely certain we get
    9367              :              the required byte sequence at the start of the function,
    9368              :              rather than relying on an assembler that can produce
    9369              :              the exact encoding required.
    9370              : 
    9371              :              However it does mean (in the unpatched case) that we have
    9372              :              a 1 insn window where the asynchronous unwind info is
    9373              :              incorrect.  However, if we placed the unwind info at
    9374              :              its correct location we would have incorrect unwind info
    9375              :              in the patched case.  Which is probably all moot since
    9376              :              I don't expect Wine generates dwarf2 unwind info for the
    9377              :              system libraries that use this feature.  */
    9378              : 
    9379            0 :           insn = emit_insn (gen_blockage ());
    9380              : 
    9381            0 :           push = gen_push (hard_frame_pointer_rtx);
    9382            0 :           mov = gen_rtx_SET (hard_frame_pointer_rtx,
    9383              :                              stack_pointer_rtx);
    9384            0 :           RTX_FRAME_RELATED_P (push) = 1;
    9385            0 :           RTX_FRAME_RELATED_P (mov) = 1;
    9386              : 
    9387            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9388            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9389              :                         gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
    9390              : 
    9391              :           /* Note that gen_push incremented m->fs.cfa_offset, even
    9392              :              though we didn't emit the push insn here.  */
    9393            0 :           m->fs.cfa_reg = hard_frame_pointer_rtx;
    9394            0 :           m->fs.fp_offset = m->fs.cfa_offset;
    9395            0 :           m->fs.fp_valid = true;
    9396            0 :         }
    9397              :       else
    9398              :         {
    9399              :           /* The frame pointer is not needed so pop %ebp again.
    9400              :              This leaves us with a pristine state.  */
    9401            0 :           emit_insn (gen_pop (hard_frame_pointer_rtx));
    9402              :         }
    9403              :     }
    9404              : 
    9405              :   /* The first insn of a function that accepts its static chain on the
    9406              :      stack is to push the register that would be filled in by a direct
    9407              :      call.  This insn will be skipped by the trampoline.  */
    9408      1526135 :   else if (ix86_static_chain_on_stack)
    9409              :     {
    9410            0 :       static_chain = ix86_static_chain (cfun->decl, false);
    9411            0 :       insn = emit_insn (gen_push (static_chain));
    9412            0 :       emit_insn (gen_blockage ());
    9413              : 
    9414              :       /* We don't want to interpret this push insn as a register save,
    9415              :          only as a stack adjustment.  The real copy of the register as
    9416              :          a save will be done later, if needed.  */
    9417            0 :       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    9418            0 :       t = gen_rtx_SET (stack_pointer_rtx, t);
    9419            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
    9420            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9421              :     }
    9422              : 
    9423              :   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
    9424              :      of DRAP is needed and stack realignment is really needed after reload */
    9425      1526135 :   if (stack_realign_drap)
    9426              :     {
    9427         7079 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9428              : 
    9429              :       /* Can't use DRAP in interrupt function.  */
    9430         7079 :       if (cfun->machine->func_type != TYPE_NORMAL)
    9431            0 :         sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
    9432              :                "in interrupt service routine.  This may be worked "
    9433              :                "around by avoiding functions with aggregate return.");
    9434              : 
    9435              :       /* Only need to push parameter pointer reg if it is caller saved.  */
    9436         7079 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9437              :         {
    9438              :           /* Push arg pointer reg */
    9439          137 :           insn = emit_insn (gen_push (crtl->drap_reg));
    9440          137 :           RTX_FRAME_RELATED_P (insn) = 1;
    9441              :         }
    9442              : 
    9443              :       /* Grab the argument pointer.  */
    9444         7364 :       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
    9445         7079 :       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9446         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9447         7079 :       m->fs.cfa_reg = crtl->drap_reg;
    9448         7079 :       m->fs.cfa_offset = 0;
    9449              : 
    9450              :       /* Align the stack.  */
    9451         7079 :       insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
    9452         7079 :                                        GEN_INT (-align_bytes)));
    9453         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9454              : 
    9455              :       /* Replicate the return address on the stack so that return
    9456              :          address can be reached via (argp - 1) slot.  This is needed
    9457              :          to implement macro RETURN_ADDR_RTX and intrinsic function
    9458              :          expand_builtin_return_addr etc.  */
    9459         7649 :       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
    9460         7079 :       t = gen_frame_mem (word_mode, t);
    9461         7079 :       insn = emit_insn (gen_push (t));
    9462         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9463              : 
    9464              :       /* For the purposes of frame and register save area addressing,
    9465              :          we've started over with a new frame.  */
    9466         7079 :       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9467         7079 :       m->fs.realigned = true;
    9468              : 
    9469         7079 :       if (static_chain)
    9470              :         {
    9471              :           /* Replicate static chain on the stack so that static chain
    9472              :              can be reached via (argp - 2) slot.  This is needed for
    9473              :              nested function with stack realignment.  */
    9474            0 :           insn = emit_insn (gen_push (static_chain));
    9475            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9476              :         }
    9477              :     }
    9478              : 
    9479      1526135 :   int_registers_saved = (frame.nregs == 0);
    9480      1526135 :   sse_registers_saved = (frame.nsseregs == 0);
    9481      1526135 :   save_stub_call_needed = (m->call_ms2sysv);
    9482      1526135 :   gcc_assert (sse_registers_saved || !save_stub_call_needed);
    9483              : 
    9484      1526135 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9485              :     {
    9486              :       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
    9487              :          slower on all targets.  Also sdb didn't like it.  */
    9488       480466 :       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
    9489       480466 :       RTX_FRAME_RELATED_P (insn) = 1;
    9490              : 
    9491       480466 :       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
    9492              :         {
    9493       480466 :           insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
    9494       480466 :           RTX_FRAME_RELATED_P (insn) = 1;
    9495              : 
    9496       480466 :           if (m->fs.cfa_reg == stack_pointer_rtx)
    9497       473387 :             m->fs.cfa_reg = hard_frame_pointer_rtx;
    9498       480466 :           m->fs.fp_offset = m->fs.sp_offset;
    9499       480466 :           m->fs.fp_valid = true;
    9500              :         }
    9501              :     }
    9502              : 
    9503      1526135 :   if (!int_registers_saved)
    9504              :     {
    9505              :       /* If saving registers via PUSH, do so now.  */
    9506       474397 :       if (!frame.save_regs_using_mov)
    9507              :         {
    9508       429611 :           ix86_emit_save_regs ();
    9509       429611 :           m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
    9510       429611 :           int_registers_saved = true;
    9511       429611 :           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
    9512              :         }
    9513              : 
    9514              :       /* When using red zone we may start register saving before allocating
    9515              :          the stack frame saving one cycle of the prologue.  However, avoid
    9516              :          doing this if we have to probe the stack; at least on x86_64 the
    9517              :          stack probe can turn into a call that clobbers a red zone location. */
    9518        44786 :       else if (ix86_using_red_zone ()
    9519        44786 :                 && (! TARGET_STACK_PROBE
    9520            0 :                     || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
    9521              :         {
    9522        40312 :           HOST_WIDE_INT allocate_offset;
    9523        40312 :           if (crtl->shrink_wrapped_separate)
    9524              :             {
    9525        40256 :               allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
    9526              : 
    9527              :               /* Adjust the total offset at the beginning of the function.  */
    9528        40256 :               pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9529              :                                          GEN_INT (allocate_offset), -1,
    9530        40256 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    9531        40256 :               m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
    9532              :             }
    9533              : 
    9534        40312 :           ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9535        40312 :           int_registers_saved = true;
    9536              :         }
    9537              :     }
    9538              : 
    9539      1526135 :   if (frame.red_zone_size != 0)
    9540       140422 :     cfun->machine->red_zone_used = true;
    9541              : 
    9542      1526135 :   if (stack_realign_fp)
    9543              :     {
    9544        24625 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9545        24974 :       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
    9546              : 
    9547              :       /* Record last valid frame pointer offset.  */
    9548        24625 :       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
    9549              : 
    9550              :       /* The computation of the size of the re-aligned stack frame means
    9551              :          that we must allocate the size of the register save area before
    9552              :          performing the actual alignment.  Otherwise we cannot guarantee
    9553              :          that there's enough storage above the realignment point.  */
    9554        24625 :       allocate = frame.reg_save_offset - m->fs.sp_offset
    9555        24625 :                  + frame.stack_realign_allocate;
    9556        24625 :       if (allocate)
    9557         2691 :         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9558              :                                    GEN_INT (-allocate), -1, false);
    9559              : 
    9560              :       /* Align the stack.  */
    9561        24625 :       emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
    9562        24625 :       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
    9563        24625 :       m->fs.sp_realigned_offset = m->fs.sp_offset
    9564        24625 :                                               - frame.stack_realign_allocate;
    9565              :       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
    9566              :          Beyond this point, stack access should be done via choose_baseaddr or
    9567              :          by using sp_valid_at and fp_valid_at to determine the correct base
    9568              :          register.  Henceforth, any CFA offset should be thought of as logical
    9569              :          and not physical.  */
    9570        24625 :       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
    9571        24625 :       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
    9572        24625 :       m->fs.sp_realigned = true;
    9573              : 
    9574              :       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
    9575              :          is needed to describe where a register is saved using a realigned
    9576              :          stack pointer, so we need to invalidate the stack pointer for that
    9577              :          target.  */
    9578        24625 :       if (TARGET_SEH)
    9579              :         m->fs.sp_valid = false;
    9580              : 
    9581              :       /* If SP offset is non-immediate after allocation of the stack frame,
    9582              :          then emit SSE saves or stub call prior to allocating the rest of the
    9583              :          stack frame.  This is less efficient for the out-of-line stub because
    9584              :          we can't combine allocations across the call barrier, but it's better
    9585              :          than using a scratch register.  */
    9586        24625 :       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
    9587              :                                                    - m->fs.sp_realigned_offset),
    9588        24625 :                                           Pmode))
    9589              :         {
    9590            3 :           if (!sse_registers_saved)
    9591              :             {
    9592            1 :               ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9593            1 :               sse_registers_saved = true;
    9594              :             }
    9595            2 :           else if (save_stub_call_needed)
    9596              :             {
    9597            1 :               ix86_emit_outlined_ms2sysv_save (frame);
    9598            1 :               save_stub_call_needed = false;
    9599              :             }
    9600              :         }
    9601              :     }
    9602              : 
    9603      1526135 :   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
    9604              : 
    9605      1526135 :   if (flag_stack_usage_info)
    9606              :     {
    9607              :       /* We start to count from ARG_POINTER.  */
    9608          355 :       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
    9609              : 
    9610              :       /* If it was realigned, take into account the fake frame.  */
    9611          355 :       if (stack_realign_drap)
    9612              :         {
    9613            1 :           if (ix86_static_chain_on_stack)
    9614            0 :             stack_size += UNITS_PER_WORD;
    9615              : 
    9616            1 :           if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9617            0 :             stack_size += UNITS_PER_WORD;
    9618              : 
    9619              :           /* This over-estimates by 1 minimal-stack-alignment-unit but
    9620              :              mitigates that by counting in the new return address slot.  */
    9621            1 :           current_function_dynamic_stack_size
    9622            1 :             += crtl->stack_alignment_needed / BITS_PER_UNIT;
    9623              :         }
    9624              : 
    9625          355 :       current_function_static_stack_size = stack_size;
    9626              :     }
    9627              : 
    9628              :   /* On SEH target with very large frame size, allocate an area to save
    9629              :      SSE registers (as the very large allocation won't be described).  */
    9630      1526135 :   if (TARGET_SEH
    9631              :       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
    9632              :       && !sse_registers_saved)
    9633              :     {
    9634              :       HOST_WIDE_INT sse_size
    9635              :         = frame.sse_reg_save_offset - frame.reg_save_offset;
    9636              : 
    9637              :       gcc_assert (int_registers_saved);
    9638              : 
    9639              :       /* No need to do stack checking as the area will be immediately
    9640              :          written.  */
    9641              :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9642              :                                  GEN_INT (-sse_size), -1,
    9643              :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9644              :       allocate -= sse_size;
    9645              :       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9646              :       sse_registers_saved = true;
    9647              :     }
    9648              : 
    9649              :   /* If stack clash protection is requested, then probe the stack, unless it
    9650              :      is already probed on the target.  */
    9651      1526135 :   if (allocate >= 0
    9652      1526131 :       && flag_stack_clash_protection
    9653      1526233 :       && !ix86_target_stack_probe ())
    9654              :     {
    9655           98 :       ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
    9656           98 :       allocate = 0;
    9657              :     }
    9658              : 
    9659              :   /* The stack has already been decremented by the instruction calling us
    9660              :      so probe if the size is non-negative to preserve the protection area.  */
    9661      1526037 :   else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    9662              :     {
    9663           45 :       const HOST_WIDE_INT probe_interval = get_probe_interval ();
    9664              : 
    9665           45 :       if (STACK_CHECK_MOVING_SP)
    9666              :         {
    9667           45 :           if (crtl->is_leaf
    9668           18 :               && !cfun->calls_alloca
    9669           18 :               && allocate <= probe_interval)
    9670              :             ;
    9671              : 
    9672              :           else
    9673              :             {
    9674           28 :               ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
    9675           28 :               allocate = 0;
    9676              :             }
    9677              :         }
    9678              : 
    9679              :       else
    9680              :         {
    9681              :           HOST_WIDE_INT size = allocate;
    9682              : 
    9683              :           if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
    9684              :             size = 0x80000000 - get_stack_check_protect () - 1;
    9685              : 
    9686              :           if (TARGET_STACK_PROBE)
    9687              :             {
    9688              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9689              :                 {
    9690              :                   if (size > probe_interval)
    9691              :                     ix86_emit_probe_stack_range (0, size, int_registers_saved);
    9692              :                 }
    9693              :               else
    9694              :                 ix86_emit_probe_stack_range (0,
    9695              :                                              size + get_stack_check_protect (),
    9696              :                                              int_registers_saved);
    9697              :             }
    9698              :           else
    9699              :             {
    9700              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9701              :                 {
    9702              :                   if (size > probe_interval
    9703              :                       && size > get_stack_check_protect ())
    9704              :                     ix86_emit_probe_stack_range (get_stack_check_protect (),
    9705              :                                                  (size
    9706              :                                                   - get_stack_check_protect ()),
    9707              :                                                  int_registers_saved);
    9708              :                 }
    9709              :               else
    9710              :                 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
    9711              :                                              int_registers_saved);
    9712              :             }
    9713              :         }
    9714              :     }
    9715              : 
    9716      1526131 :   if (allocate == 0)
    9717              :     ;
    9718       842085 :   else if (!ix86_target_stack_probe ()
    9719       842085 :            || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
    9720              :     {
    9721       842040 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9722              :                                  GEN_INT (-allocate), -1,
    9723       842040 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9724              :     }
    9725              :   else
    9726              :     {
    9727           45 :       rtx eax = gen_rtx_REG (Pmode, AX_REG);
    9728           45 :       rtx r10 = NULL;
    9729           45 :       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
    9730           45 :       bool eax_live = ix86_eax_live_at_start_p ();
    9731           45 :       bool r10_live = false;
    9732              : 
    9733           45 :       if (TARGET_64BIT)
    9734           45 :         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
    9735              : 
    9736           45 :       if (eax_live)
    9737              :         {
    9738            0 :           insn = emit_insn (gen_push (eax));
    9739            0 :           allocate -= UNITS_PER_WORD;
    9740              :           /* Note that SEH directives need to continue tracking the stack
    9741              :              pointer even after the frame pointer has been set up.  */
    9742            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9743              :             {
    9744            0 :               if (sp_is_cfa_reg)
    9745            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9746            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9747            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9748            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9749              :                                          plus_constant (Pmode,
    9750              :                                                         stack_pointer_rtx,
    9751              :                                                         -UNITS_PER_WORD)));
    9752              :             }
    9753              :         }
    9754              : 
    9755           45 :       if (r10_live)
    9756              :         {
    9757            0 :           r10 = gen_rtx_REG (Pmode, R10_REG);
    9758            0 :           insn = emit_insn (gen_push (r10));
    9759            0 :           allocate -= UNITS_PER_WORD;
    9760            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9761              :             {
    9762            0 :               if (sp_is_cfa_reg)
    9763            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9764            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9765            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9766            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9767              :                                          plus_constant (Pmode,
    9768              :                                                         stack_pointer_rtx,
    9769              :                                                         -UNITS_PER_WORD)));
    9770              :             }
    9771              :         }
    9772              : 
    9773           45 :       emit_move_insn (eax, GEN_INT (allocate));
    9774           45 :       emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
    9775              : 
    9776              :       /* Use the fact that AX still contains ALLOCATE.  */
    9777           45 :       insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
    9778           45 :                         (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
    9779              : 
    9780           45 :       if (sp_is_cfa_reg || TARGET_SEH)
    9781              :         {
    9782           37 :           if (sp_is_cfa_reg)
    9783           37 :             m->fs.cfa_offset += allocate;
    9784           37 :           RTX_FRAME_RELATED_P (insn) = 1;
    9785           37 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9786           37 :                         gen_rtx_SET (stack_pointer_rtx,
    9787              :                                      plus_constant (Pmode, stack_pointer_rtx,
    9788              :                                                     -allocate)));
    9789              :         }
    9790           45 :       m->fs.sp_offset += allocate;
    9791              : 
    9792              :       /* Use stack_pointer_rtx for relative addressing so that code works for
    9793              :          realigned stack.  But this means that we need a blockage to prevent
    9794              :          stores based on the frame pointer from being scheduled before.  */
    9795           45 :       if (r10_live && eax_live)
    9796              :         {
    9797            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9798            0 :           emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
    9799              :                           gen_frame_mem (word_mode, t));
    9800            0 :           t = plus_constant (Pmode, t, UNITS_PER_WORD);
    9801            0 :           emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
    9802              :                           gen_frame_mem (word_mode, t));
    9803            0 :           emit_insn (gen_memory_blockage ());
    9804              :         }
    9805           45 :       else if (eax_live || r10_live)
    9806              :         {
    9807            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9808            0 :           emit_move_insn (gen_rtx_REG (word_mode,
    9809              :                                        (eax_live ? AX_REG : R10_REG)),
    9810              :                           gen_frame_mem (word_mode, t));
    9811            0 :           emit_insn (gen_memory_blockage ());
    9812              :         }
    9813              :     }
    9814      1526135 :   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
    9815              : 
    9816              :   /* If we havn't already set up the frame pointer, do so now.  */
    9817      1526135 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9818              :     {
    9819            0 :       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
    9820            0 :                             GEN_INT (frame.stack_pointer_offset
    9821              :                                      - frame.hard_frame_pointer_offset));
    9822            0 :       insn = emit_insn (insn);
    9823            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9824            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
    9825              : 
    9826            0 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    9827            0 :         m->fs.cfa_reg = hard_frame_pointer_rtx;
    9828            0 :       m->fs.fp_offset = frame.hard_frame_pointer_offset;
    9829            0 :       m->fs.fp_valid = true;
    9830              :     }
    9831              : 
    9832      1526135 :   if (!int_registers_saved)
    9833         4474 :     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9834      1526135 :   if (!sse_registers_saved)
    9835        33362 :     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9836      1492773 :   else if (save_stub_call_needed)
    9837         7044 :     ix86_emit_outlined_ms2sysv_save (frame);
    9838              : 
    9839              :   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
    9840              :      in PROLOGUE.  */
    9841      1526135 :   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
    9842              :     {
    9843            0 :       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
    9844            0 :       insn = emit_insn (gen_set_got (pic));
    9845            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9846            0 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    9847            0 :       emit_insn (gen_prologue_use (pic));
    9848              :       /* Deleting already emmitted SET_GOT if exist and allocated to
    9849              :          REAL_PIC_OFFSET_TABLE_REGNUM.  */
    9850            0 :       ix86_elim_entry_set_got (pic);
    9851              :     }
    9852              : 
    9853      1526135 :   if (crtl->drap_reg && !crtl->stack_realign_needed)
    9854              :     {
    9855              :       /* vDRAP is setup but after reload it turns out stack realign
    9856              :          isn't necessary, here we will emit prologue to setup DRAP
    9857              :          without stack realign adjustment */
    9858          181 :       t = choose_baseaddr (0, NULL);
    9859          181 :       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9860              :     }
    9861              : 
    9862              :   /* Prevent instructions from being scheduled into register save push
    9863              :      sequence when access to the redzone area is done through frame pointer.
    9864              :      The offset between the frame pointer and the stack pointer is calculated
    9865              :      relative to the value of the stack pointer at the end of the function
    9866              :      prologue, and moving instructions that access redzone area via frame
    9867              :      pointer inside push sequence violates this assumption.  */
    9868      1526135 :   if (frame_pointer_needed && frame.red_zone_size)
    9869       129454 :     emit_insn (gen_memory_blockage ());
    9870              : 
    9871              :   /* SEH requires that the prologue end within 256 bytes of the start of
    9872              :      the function.  Prevent instruction schedules that would extend that.
    9873              :      Further, prevent alloca modifications to the stack pointer from being
    9874              :      combined with prologue modifications.  */
    9875              :   if (TARGET_SEH)
    9876              :     emit_insn (gen_prologue_use (stack_pointer_rtx));
    9877              : }
    9878              : 
    9879              : /* Emit code to restore REG using a POP or POPP insn.  */
    9880              : 
    9881              : static void
    9882      1468934 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
    9883              : {
    9884      1468934 :   struct machine_function *m = cfun->machine;
    9885      1468934 :   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
    9886              : 
    9887      1468934 :   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
    9888      1468934 :   m->fs.sp_offset -= UNITS_PER_WORD;
    9889              : 
    9890      1468934 :   if (m->fs.cfa_reg == crtl->drap_reg
    9891      1468934 :       && REGNO (reg) == REGNO (crtl->drap_reg))
    9892              :     {
    9893              :       /* Previously we'd represented the CFA as an expression
    9894              :          like *(%ebp - 8).  We've just popped that value from
    9895              :          the stack, which means we need to reset the CFA to
    9896              :          the drap register.  This will remain until we restore
    9897              :          the stack pointer.  */
    9898         4032 :       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
    9899         4032 :       RTX_FRAME_RELATED_P (insn) = 1;
    9900              : 
    9901              :       /* This means that the DRAP register is valid for addressing too.  */
    9902         4032 :       m->fs.drap_valid = true;
    9903         4032 :       return;
    9904              :     }
    9905              : 
    9906      1464902 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9907              :     {
    9908      1381669 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    9909      1018408 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9910      1018408 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9911      1018408 :       RTX_FRAME_RELATED_P (insn) = 1;
    9912              : 
    9913      1200031 :       m->fs.cfa_offset -= UNITS_PER_WORD;
    9914              :     }
    9915              : 
    9916              :   /* When the frame pointer is the CFA, and we pop it, we are
    9917              :      swapping back to the stack pointer as the CFA.  This happens
    9918              :      for stack frames that don't allocate other data, so we assume
    9919              :      the stack pointer is now pointing at the return address, i.e.
    9920              :      the function entry state, which makes the offset be 1 word.  */
    9921      1464902 :   if (reg == hard_frame_pointer_rtx)
    9922              :     {
    9923       237572 :       m->fs.fp_valid = false;
    9924       237572 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9925              :         {
    9926       233527 :           m->fs.cfa_reg = stack_pointer_rtx;
    9927       233527 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    9928              : 
    9929       233527 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9930       233527 :                         plus_constant (Pmode, stack_pointer_rtx,
    9931       233527 :                                        m->fs.cfa_offset));
    9932       233527 :           RTX_FRAME_RELATED_P (insn) = 1;
    9933              :         }
    9934              :     }
    9935              : }
    9936              : 
    9937              : /* Emit code to restore REG using a POP2 insn.  */
    9938              : static void
    9939           19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
    9940              : {
    9941           19 :   struct machine_function *m = cfun->machine;
    9942           19 :   const int offset = UNITS_PER_WORD * 2;
    9943           19 :   rtx_insn *insn;
    9944              : 
    9945           19 :   rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
    9946              :                                                    stack_pointer_rtx));
    9947              : 
    9948           19 :   if (ppx_p)
    9949           15 :     insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
    9950              :   else
    9951            4 :     insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
    9952              : 
    9953           19 :   RTX_FRAME_RELATED_P (insn) = 1;
    9954              : 
    9955           19 :   rtx dwarf = NULL_RTX;
    9956           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
    9957           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
    9958           19 :   REG_NOTES (insn) = dwarf;
    9959           19 :   m->fs.sp_offset -= offset;
    9960              : 
    9961           19 :   if (m->fs.cfa_reg == crtl->drap_reg
    9962           19 :       && (REGNO (reg1) == REGNO (crtl->drap_reg)
    9963            3 :           || REGNO (reg2) == REGNO (crtl->drap_reg)))
    9964              :     {
    9965              :       /* Previously we'd represented the CFA as an expression
    9966              :          like *(%ebp - 8).  We've just popped that value from
    9967              :          the stack, which means we need to reset the CFA to
    9968              :          the drap register.  This will remain until we restore
    9969              :          the stack pointer.  */
    9970            1 :       add_reg_note (insn, REG_CFA_DEF_CFA,
    9971            1 :                     REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
    9972            1 :       RTX_FRAME_RELATED_P (insn) = 1;
    9973              : 
    9974              :       /* This means that the DRAP register is valid for addressing too.  */
    9975            1 :       m->fs.drap_valid = true;
    9976            1 :       return;
    9977              :     }
    9978              : 
    9979           18 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9980              :     {
    9981           14 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    9982           14 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9983           14 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9984           14 :       RTX_FRAME_RELATED_P (insn) = 1;
    9985              : 
    9986           14 :       m->fs.cfa_offset -= offset;
    9987              :     }
    9988              : 
    9989              :   /* When the frame pointer is the CFA, and we pop it, we are
    9990              :      swapping back to the stack pointer as the CFA.  This happens
    9991              :      for stack frames that don't allocate other data, so we assume
    9992              :      the stack pointer is now pointing at the return address, i.e.
    9993              :      the function entry state, which makes the offset be 1 word.  */
    9994           18 :   if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
    9995              :     {
    9996            0 :       m->fs.fp_valid = false;
    9997            0 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9998              :         {
    9999            0 :           m->fs.cfa_reg = stack_pointer_rtx;
   10000            0 :           m->fs.cfa_offset -= offset;
   10001              : 
   10002            0 :           add_reg_note (insn, REG_CFA_DEF_CFA,
   10003            0 :                         plus_constant (Pmode, stack_pointer_rtx,
   10004            0 :                                        m->fs.cfa_offset));
   10005            0 :           RTX_FRAME_RELATED_P (insn) = 1;
   10006              :         }
   10007              :     }
   10008              : }
   10009              : 
   10010              : /* Emit code to restore saved registers using POP insns.  */
   10011              : 
   10012              : static void
   10013      1355562 : ix86_emit_restore_regs_using_pop (bool ppx_p)
   10014              : {
   10015      1355562 :   unsigned int regno;
   10016              : 
   10017    126067266 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10018    124711704 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   10019      1231041 :       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
   10020      1355562 : }
   10021              : 
   10022              : /* Emit code to restore saved registers using POP2 insns.  */
   10023              : 
   10024              : static void
   10025          560 : ix86_emit_restore_regs_using_pop2 (void)
   10026              : {
   10027          560 :   int regno;
   10028          560 :   int regno_list[2];
   10029          560 :   regno_list[0] = regno_list[1] = -1;
   10030          560 :   int loaded_regnum = 0;
   10031          560 :   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
   10032              : 
   10033        52080 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10034        51520 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   10035              :       {
   10036          127 :         if (aligned)
   10037              :           {
   10038          120 :             regno_list[loaded_regnum++] = regno;
   10039          120 :             if (loaded_regnum == 2)
   10040              :               {
   10041           19 :                 gcc_assert (regno_list[0] != -1
   10042              :                             && regno_list[1] != -1
   10043              :                             && regno_list[0] != regno_list[1]);
   10044              : 
   10045           19 :                 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
   10046              :                                                                regno_list[0]),
   10047              :                                                   gen_rtx_REG (word_mode,
   10048              :                                                                regno_list[1]),
   10049           19 :                                                   TARGET_APX_PPX);
   10050           19 :                 loaded_regnum = 0;
   10051           19 :                 regno_list[0] = regno_list[1] = -1;
   10052              :               }
   10053              :           }
   10054              :         else
   10055              :           {
   10056           14 :             ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
   10057            7 :                                              TARGET_APX_PPX);
   10058            7 :             aligned = true;
   10059              :           }
   10060              :       }
   10061              : 
   10062          560 :   if (loaded_regnum == 1)
   10063           82 :     ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
   10064           82 :                                      TARGET_APX_PPX);
   10065          560 : }
   10066              : 
   10067              : /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
   10068              :    omits the emit and only attaches the notes.  */
   10069              : 
   10070              : static void
   10071       244201 : ix86_emit_leave (rtx_insn *insn)
   10072              : {
   10073       244201 :   struct machine_function *m = cfun->machine;
   10074              : 
   10075       244201 :   if (!insn)
   10076       243230 :     insn = emit_insn (gen_leave (word_mode));
   10077              : 
   10078       244201 :   ix86_add_queued_cfa_restore_notes (insn);
   10079              : 
   10080       244201 :   gcc_assert (m->fs.fp_valid);
   10081       244201 :   m->fs.sp_valid = true;
   10082       244201 :   m->fs.sp_realigned = false;
   10083       244201 :   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
   10084       244201 :   m->fs.fp_valid = false;
   10085              : 
   10086       244201 :   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
   10087              :     {
   10088       241059 :       m->fs.cfa_reg = stack_pointer_rtx;
   10089       241059 :       m->fs.cfa_offset = m->fs.sp_offset;
   10090              : 
   10091       241059 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10092       241059 :                     plus_constant (Pmode, stack_pointer_rtx,
   10093       241059 :                                    m->fs.sp_offset));
   10094       241059 :       RTX_FRAME_RELATED_P (insn) = 1;
   10095              :     }
   10096       244201 :   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
   10097              :                              m->fs.fp_offset);
   10098       244201 : }
   10099              : 
   10100              : /* Emit code to restore saved registers using MOV insns.
   10101              :    First register is restored from CFA - CFA_OFFSET.  */
   10102              : static void
   10103        96089 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10104              :                                   bool maybe_eh_return)
   10105              : {
   10106        96089 :   struct machine_function *m = cfun->machine;
   10107        96089 :   unsigned int regno;
   10108              : 
   10109      8936277 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10110      8840188 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10111              :       {
   10112              : 
   10113              :         /* Skip registers, already processed by shrink wrap separate.  */
   10114       264802 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
   10115              :           {
   10116       140537 :             rtx reg = gen_rtx_REG (word_mode, regno);
   10117       140537 :             rtx mem;
   10118       140537 :             rtx_insn *insn;
   10119              : 
   10120       140537 :             mem = choose_baseaddr (cfa_offset, NULL);
   10121       140537 :             mem = gen_frame_mem (word_mode, mem);
   10122       140537 :             insn = emit_move_insn (reg, mem);
   10123              : 
   10124       140537 :             if (m->fs.cfa_reg == crtl->drap_reg
   10125       140537 :                 && regno == REGNO (crtl->drap_reg))
   10126              :               {
   10127              :                 /* Previously we'd represented the CFA as an expression
   10128              :                    like *(%ebp - 8).  We've just popped that value from
   10129              :                    the stack, which means we need to reset the CFA to
   10130              :                    the drap register.  This will remain until we restore
   10131              :                    the stack pointer.  */
   10132         3142 :                 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
   10133         3142 :                 RTX_FRAME_RELATED_P (insn) = 1;
   10134              : 
   10135              :                 /* DRAP register is valid for addressing.  */
   10136         3142 :                 m->fs.drap_valid = true;
   10137              :               }
   10138              :             else
   10139       137395 :               ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10140              :           }
   10141       285366 :         cfa_offset -= UNITS_PER_WORD;
   10142              :       }
   10143        96089 : }
   10144              : 
   10145              : /* Emit code to restore saved registers using MOV insns.
   10146              :    First register is restored from CFA - CFA_OFFSET.  */
   10147              : static void
   10148        33939 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10149              :                                       bool maybe_eh_return)
   10150              : {
   10151        33939 :   unsigned int regno;
   10152              : 
   10153      3156327 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10154      3122388 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10155              :       {
   10156       339417 :         rtx reg = gen_rtx_REG (V4SFmode, regno);
   10157       339417 :         rtx mem;
   10158       339417 :         unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
   10159              : 
   10160       339417 :         mem = choose_baseaddr (cfa_offset, &align);
   10161       339417 :         mem = gen_rtx_MEM (V4SFmode, mem);
   10162              : 
   10163              :         /* The location aligment depends upon the base register.  */
   10164       339417 :         align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
   10165       339417 :         gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   10166       339417 :         set_mem_align (mem, align);
   10167       339417 :         emit_insn (gen_rtx_SET (reg, mem));
   10168              : 
   10169       339417 :         ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10170              : 
   10171       339417 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
   10172              :       }
   10173        33939 : }
   10174              : 
   10175              : static void
   10176         7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
   10177              :                                   bool use_call, int style)
   10178              : {
   10179         7621 :   struct machine_function *m = cfun->machine;
   10180         7621 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
   10181         7621 :                           + m->call_ms2sysv_extra_regs;
   10182         7621 :   rtvec v;
   10183         7621 :   unsigned int elems_needed, align, i, vi = 0;
   10184         7621 :   rtx_insn *insn;
   10185         7621 :   rtx sym, tmp;
   10186         7621 :   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
   10187         7621 :   rtx r10 = NULL_RTX;
   10188         7621 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
   10189         7621 :   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
   10190         7621 :   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
   10191         7621 :   rtx rsi_frame_load = NULL_RTX;
   10192         7621 :   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
   10193         7621 :   enum xlogue_stub stub;
   10194              : 
   10195         7621 :   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
   10196              : 
   10197              :   /* If using a realigned stack, we should never start with padding.  */
   10198         7621 :   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
   10199              : 
   10200              :   /* Setup RSI as the stub's base pointer.  */
   10201         7621 :   align = GET_MODE_ALIGNMENT (V4SFmode);
   10202         7621 :   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
   10203         7621 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
   10204              : 
   10205         7621 :   emit_insn (gen_rtx_SET (rsi, tmp));
   10206              : 
   10207              :   /* Get a symbol for the stub.  */
   10208         7621 :   if (frame_pointer_needed)
   10209         5955 :     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
   10210              :                     : XLOGUE_STUB_RESTORE_HFP_TAIL;
   10211              :   else
   10212         1666 :     stub = use_call ? XLOGUE_STUB_RESTORE
   10213              :                     : XLOGUE_STUB_RESTORE_TAIL;
   10214         7621 :   sym = xlogue.get_stub_rtx (stub);
   10215              : 
   10216         7621 :   elems_needed = ncregs;
   10217         7621 :   if (use_call)
   10218         6498 :     elems_needed += 1;
   10219              :   else
   10220         1275 :     elems_needed += frame_pointer_needed ? 5 : 3;
   10221         7621 :   v = rtvec_alloc (elems_needed);
   10222              : 
   10223              :   /* We call the epilogue stub when we need to pop incoming args or we are
   10224              :      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
   10225              :      epilogue stub and it is the tail-call.  */
   10226         7621 :   if (use_call)
   10227         6498 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10228              :   else
   10229              :     {
   10230         1123 :       RTVEC_ELT (v, vi++) = ret_rtx;
   10231         1123 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10232         1123 :       if (frame_pointer_needed)
   10233              :         {
   10234          971 :           rtx rbp = gen_rtx_REG (DImode, BP_REG);
   10235          971 :           gcc_assert (m->fs.fp_valid);
   10236          971 :           gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
   10237              : 
   10238          971 :           tmp = plus_constant (DImode, rbp, 8);
   10239          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
   10240          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
   10241          971 :           tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
   10242          971 :           RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
   10243              :         }
   10244              :       else
   10245              :         {
   10246              :           /* If no hard frame pointer, we set R10 to the SP restore value.  */
   10247          152 :           gcc_assert (!m->fs.fp_valid);
   10248          152 :           gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10249          152 :           gcc_assert (m->fs.sp_valid);
   10250              : 
   10251          152 :           r10 = gen_rtx_REG (DImode, R10_REG);
   10252          152 :           tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
   10253          152 :           emit_insn (gen_rtx_SET (r10, tmp));
   10254              : 
   10255          152 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
   10256              :         }
   10257              :     }
   10258              : 
   10259              :   /* Generate frame load insns and restore notes.  */
   10260       107954 :   for (i = 0; i < ncregs; ++i)
   10261              :     {
   10262       100333 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
   10263       100333 :       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
   10264       100333 :       rtx reg, frame_load;
   10265              : 
   10266       100333 :       reg = gen_rtx_REG (mode, r.regno);
   10267       100333 :       frame_load = gen_frame_load (reg, rsi, r.offset);
   10268              : 
   10269              :       /* Save RSI frame load insn & note to add last.  */
   10270       100333 :       if (r.regno == SI_REG)
   10271              :         {
   10272         7621 :           gcc_assert (!rsi_frame_load);
   10273         7621 :           rsi_frame_load = frame_load;
   10274         7621 :           rsi_restore_offset = r.offset;
   10275              :         }
   10276              :       else
   10277              :         {
   10278        92712 :           RTVEC_ELT (v, vi++) = frame_load;
   10279        92712 :           ix86_add_cfa_restore_note (NULL, reg, r.offset);
   10280              :         }
   10281              :     }
   10282              : 
   10283              :   /* Add RSI frame load & restore note at the end.  */
   10284         7621 :   gcc_assert (rsi_frame_load);
   10285         7621 :   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
   10286         7621 :   RTVEC_ELT (v, vi++) = rsi_frame_load;
   10287         7621 :   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
   10288              :                              rsi_restore_offset);
   10289              : 
   10290              :   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
   10291         7621 :   if (!use_call && !frame_pointer_needed)
   10292              :     {
   10293          152 :       gcc_assert (m->fs.sp_valid);
   10294          152 :       gcc_assert (!m->fs.sp_realigned);
   10295              : 
   10296              :       /* At this point, R10 should point to frame.stack_realign_offset.  */
   10297          152 :       if (m->fs.cfa_reg == stack_pointer_rtx)
   10298          152 :         m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
   10299          152 :       m->fs.sp_offset = frame.stack_realign_offset;
   10300              :     }
   10301              : 
   10302         7621 :   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
   10303         7621 :   tmp = gen_rtx_PARALLEL (VOIDmode, v);
   10304         7621 :   if (use_call)
   10305         6498 :       insn = emit_insn (tmp);
   10306              :   else
   10307              :     {
   10308         1123 :       insn = emit_jump_insn (tmp);
   10309         1123 :       JUMP_LABEL (insn) = ret_rtx;
   10310              : 
   10311         1123 :       if (frame_pointer_needed)
   10312          971 :         ix86_emit_leave (insn);
   10313              :       else
   10314              :         {
   10315              :           /* Need CFA adjust note.  */
   10316          152 :           tmp = gen_rtx_SET (stack_pointer_rtx, r10);
   10317          152 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
   10318              :         }
   10319              :     }
   10320              : 
   10321         7621 :   RTX_FRAME_RELATED_P (insn) = true;
   10322         7621 :   ix86_add_queued_cfa_restore_notes (insn);
   10323              : 
   10324              :   /* If we're not doing a tail-call, we need to adjust the stack.  */
   10325         7621 :   if (use_call && m->fs.sp_valid)
   10326              :     {
   10327         3706 :       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
   10328         3706 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10329              :                                 GEN_INT (dealloc), style,
   10330         3706 :                                 m->fs.cfa_reg == stack_pointer_rtx);
   10331              :     }
   10332         7621 : }
   10333              : 
   10334              : /* Restore function stack, frame, and registers.  */
   10335              : 
   10336              : void
   10337      1650658 : ix86_expand_epilogue (int style)
   10338              : {
   10339      1650658 :   struct machine_function *m = cfun->machine;
   10340      1650658 :   struct machine_frame_state frame_state_save = m->fs;
   10341      1650658 :   bool restore_regs_via_mov;
   10342      1650658 :   bool using_drap;
   10343      1650658 :   bool restore_stub_is_tail = false;
   10344              : 
   10345      1650658 :   if (ix86_function_naked (current_function_decl))
   10346              :     {
   10347              :       /* The program should not reach this point.  */
   10348           74 :       emit_insn (gen_ud2 ());
   10349       124568 :       return;
   10350              :     }
   10351              : 
   10352      1650584 :   ix86_finalize_stack_frame_flags ();
   10353      1650584 :   const struct ix86_frame &frame = cfun->machine->frame;
   10354              : 
   10355      1650584 :   m->fs.sp_realigned = stack_realign_fp;
   10356        31846 :   m->fs.sp_valid = stack_realign_fp
   10357      1625913 :                    || !frame_pointer_needed
   10358      2107713 :                    || crtl->sp_is_unchanging;
   10359      1650584 :   gcc_assert (!m->fs.sp_valid
   10360              :               || m->fs.sp_offset == frame.stack_pointer_offset);
   10361              : 
   10362              :   /* The FP must be valid if the frame pointer is present.  */
   10363      1650584 :   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
   10364      1650584 :   gcc_assert (!m->fs.fp_valid
   10365              :               || m->fs.fp_offset == frame.hard_frame_pointer_offset);
   10366              : 
   10367              :   /* We must have *some* valid pointer to the stack frame.  */
   10368      1650584 :   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
   10369              : 
   10370              :   /* The DRAP is never valid at this point.  */
   10371      1650584 :   gcc_assert (!m->fs.drap_valid);
   10372              : 
   10373              :   /* See the comment about red zone and frame
   10374              :      pointer usage in ix86_expand_prologue.  */
   10375      1650584 :   if (frame_pointer_needed && frame.red_zone_size)
   10376       129487 :     emit_insn (gen_memory_blockage ());
   10377              : 
   10378      1650584 :   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
   10379         7175 :   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
   10380              : 
   10381              :   /* Determine the CFA offset of the end of the red-zone.  */
   10382      1650584 :   m->fs.red_zone_offset = 0;
   10383      1650584 :   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
   10384              :     {
   10385              :       /* The red-zone begins below return address and error code in
   10386              :          exception handler.  */
   10387      1473127 :       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
   10388              : 
   10389              :       /* When the register save area is in the aligned portion of
   10390              :          the stack, determine the maximum runtime displacement that
   10391              :          matches up with the aligned frame.  */
   10392      1473127 :       if (stack_realign_drap)
   10393         8620 :         m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
   10394         4310 :                                   + UNITS_PER_WORD);
   10395              :     }
   10396              : 
   10397      1650584 :   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
   10398              : 
   10399              :   /* Special care must be taken for the normal return case of a function
   10400              :      using eh_return: the eax and edx registers are marked as saved, but
   10401              :      not restored along this path.  Adjust the save location to match.  */
   10402      1650584 :   if (crtl->calls_eh_return && style != 2)
   10403           37 :     reg_save_offset -= 2 * UNITS_PER_WORD;
   10404              : 
   10405              :   /* EH_RETURN requires the use of moves to function properly.  */
   10406      1650584 :   if (crtl->calls_eh_return)
   10407              :     restore_regs_via_mov = true;
   10408              :   /* SEH requires the use of pops to identify the epilogue.  */
   10409      1650526 :   else if (TARGET_SEH)
   10410              :     restore_regs_via_mov = false;
   10411              :   /* If we already save reg with pushp, don't use move at epilogue.  */
   10412      1650526 :   else if (m->fs.apx_ppx_used)
   10413              :     restore_regs_via_mov = false;
   10414              :   /* If we're only restoring one register and sp cannot be used then
   10415              :      using a move instruction to restore the register since it's
   10416              :      less work than reloading sp and popping the register.  */
   10417      1650439 :   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
   10418              :     restore_regs_via_mov = true;
   10419      1589592 :   else if (crtl->shrink_wrapped_separate
   10420      1537288 :            || (TARGET_EPILOGUE_USING_MOVE
   10421        56735 :                && cfun->machine->use_fast_prologue_epilogue
   10422        56679 :                && (frame.nregs > 1
   10423        56666 :                    || m->fs.sp_offset != reg_save_offset)))
   10424              :     restore_regs_via_mov = true;
   10425      1537053 :   else if (frame_pointer_needed
   10426       418342 :            && !frame.nregs
   10427       323164 :            && m->fs.sp_offset != reg_save_offset)
   10428              :     restore_regs_via_mov = true;
   10429      1385823 :   else if (frame_pointer_needed
   10430       267112 :            && TARGET_USE_LEAVE
   10431       267037 :            && cfun->machine->use_fast_prologue_epilogue
   10432       210065 :            && frame.nregs == 1)
   10433              :     restore_regs_via_mov = true;
   10434              :   else
   10435      1650584 :     restore_regs_via_mov = false;
   10436              : 
   10437      1650584 :   if (crtl->shrink_wrapped_separate)
   10438        52335 :     gcc_assert (restore_regs_via_mov);
   10439              : 
   10440      1598249 :   if (restore_regs_via_mov || frame.nsseregs)
   10441              :     {
   10442              :       /* Ensure that the entire register save area is addressable via
   10443              :          the stack pointer, if we will restore SSE regs via sp.  */
   10444       328047 :       if (TARGET_64BIT
   10445       315417 :           && m->fs.sp_offset > 0x7fffffff
   10446           23 :           && sp_valid_at (frame.stack_realign_offset + 1)
   10447       328069 :           && (frame.nsseregs + frame.nregs) != 0)
   10448              :         {
   10449            6 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10450            6 :                                      GEN_INT (m->fs.sp_offset
   10451              :                                               - frame.sse_reg_save_offset),
   10452              :                                      style,
   10453            6 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10454              :         }
   10455              :     }
   10456              : 
   10457              :   /* If there are any SSE registers to restore, then we have to do it
   10458              :      via moves, since there's obviously no pop for SSE regs.  */
   10459      1650584 :   if (frame.nsseregs)
   10460        33939 :     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
   10461              :                                           style == 2);
   10462              : 
   10463      1650584 :   if (m->call_ms2sysv)
   10464              :     {
   10465         7621 :       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
   10466              : 
   10467              :       /* We cannot use a tail-call for the stub if:
   10468              :          1. We have to pop incoming args,
   10469              :          2. We have additional int regs to restore, or
   10470              :          3. A sibling call will be the tail-call, or
   10471              :          4. We are emitting an eh_return_internal epilogue.
   10472              : 
   10473              :          TODO: Item 4 has not yet tested!
   10474              : 
   10475              :          If any of the above are true, we will call the stub rather than
   10476              :          jump to it.  */
   10477         7621 :       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
   10478         7621 :       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
   10479              :     }
   10480              : 
   10481              :   /* If using out-of-line stub that is a tail-call, then...*/
   10482      1650584 :   if (m->call_ms2sysv && restore_stub_is_tail)
   10483              :     {
   10484              :       /* TODO: parinoid tests. (remove eventually)  */
   10485         1123 :       gcc_assert (m->fs.sp_valid);
   10486         1123 :       gcc_assert (!m->fs.sp_realigned);
   10487         1123 :       gcc_assert (!m->fs.fp_valid);
   10488         1123 :       gcc_assert (!m->fs.realigned);
   10489         1123 :       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
   10490         1123 :       gcc_assert (!crtl->drap_reg);
   10491         1123 :       gcc_assert (!frame.nregs);
   10492         1123 :       gcc_assert (!crtl->shrink_wrapped_separate);
   10493              :     }
   10494      1649461 :   else if (restore_regs_via_mov)
   10495              :     {
   10496       293339 :       rtx t;
   10497              : 
   10498       293339 :       if (frame.nregs)
   10499        96089 :         ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
   10500              : 
   10501              :       /* eh_return epilogues need %ecx added to the stack pointer.  */
   10502       293339 :       if (style == 2)
   10503              :         {
   10504           37 :           rtx sa = EH_RETURN_STACKADJ_RTX;
   10505           29 :           rtx_insn *insn;
   10506              : 
   10507           29 :           gcc_assert (!crtl->shrink_wrapped_separate);
   10508              : 
   10509              :           /* Stack realignment doesn't work with eh_return.  */
   10510           29 :           if (crtl->stack_realign_needed)
   10511            0 :             sorry ("Stack realignment not supported with "
   10512              :                    "%<__builtin_eh_return%>");
   10513              : 
   10514              :           /* regparm nested functions don't work with eh_return.  */
   10515           29 :           if (ix86_static_chain_on_stack)
   10516            0 :             sorry ("regparm nested function not supported with "
   10517              :                    "%<__builtin_eh_return%>");
   10518              : 
   10519           29 :           if (frame_pointer_needed)
   10520              :             {
   10521           35 :               t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
   10522           43 :               t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
   10523           27 :               emit_insn (gen_rtx_SET (sa, t));
   10524              : 
   10525              :               /* NB: eh_return epilogues must restore the frame pointer
   10526              :                  in word_mode since the upper 32 bits of RBP register
   10527              :                  can have any values.  */
   10528           27 :               t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
   10529           27 :               rtx frame_reg = gen_rtx_REG (word_mode,
   10530              :                                            HARD_FRAME_POINTER_REGNUM);
   10531           27 :               insn = emit_move_insn (frame_reg, t);
   10532              : 
   10533              :               /* Note that we use SA as a temporary CFA, as the return
   10534              :                  address is at the proper place relative to it.  We
   10535              :                  pretend this happens at the FP restore insn because
   10536              :                  prior to this insn the FP would be stored at the wrong
   10537              :                  offset relative to SA, and after this insn we have no
   10538              :                  other reasonable register to use for the CFA.  We don't
   10539              :                  bother resetting the CFA to the SP for the duration of
   10540              :                  the return insn, unless the control flow instrumentation
   10541              :                  is done.  In this case the SP is used later and we have
   10542              :                  to reset CFA to SP.  */
   10543           27 :               add_reg_note (insn, REG_CFA_DEF_CFA,
   10544           35 :                             plus_constant (Pmode, sa, UNITS_PER_WORD));
   10545           27 :               ix86_add_queued_cfa_restore_notes (insn);
   10546           27 :               add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
   10547           27 :               RTX_FRAME_RELATED_P (insn) = 1;
   10548              : 
   10549           27 :               m->fs.cfa_reg = sa;
   10550           27 :               m->fs.cfa_offset = UNITS_PER_WORD;
   10551           27 :               m->fs.fp_valid = false;
   10552              : 
   10553           27 :               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
   10554              :                                          const0_rtx, style,
   10555           27 :                                          flag_cf_protection);
   10556              :             }
   10557              :           else
   10558              :             {
   10559            2 :               t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
   10560            2 :               t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
   10561            2 :               insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
   10562            2 :               ix86_add_queued_cfa_restore_notes (insn);
   10563              : 
   10564            2 :               gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10565            2 :               if (m->fs.cfa_offset != UNITS_PER_WORD)
   10566              :                 {
   10567            2 :                   m->fs.cfa_offset = UNITS_PER_WORD;
   10568            2 :                   add_reg_note (insn, REG_CFA_DEF_CFA,
   10569            2 :                                 plus_constant (Pmode, stack_pointer_rtx,
   10570            2 :                                                UNITS_PER_WORD));
   10571            2 :                   RTX_FRAME_RELATED_P (insn) = 1;
   10572              :                 }
   10573              :             }
   10574           29 :           m->fs.sp_offset = UNITS_PER_WORD;
   10575           29 :           m->fs.sp_valid = true;
   10576           29 :           m->fs.sp_realigned = false;
   10577              :         }
   10578              :     }
   10579              :   else
   10580              :     {
   10581              :       /* SEH requires that the function end with (1) a stack adjustment
   10582              :          if necessary, (2) a sequence of pops, and (3) a return or
   10583              :          jump instruction.  Prevent insns from the function body from
   10584              :          being scheduled into this sequence.  */
   10585      1356122 :       if (TARGET_SEH)
   10586              :         {
   10587              :           /* Prevent a catch region from being adjacent to the standard
   10588              :              epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
   10589              :              nor several other flags that would be interesting to test are
   10590              :              set up yet.  */
   10591              :           if (flag_non_call_exceptions)
   10592              :             emit_insn (gen_nops (const1_rtx));
   10593              :           else
   10594              :             emit_insn (gen_blockage ());
   10595              :         }
   10596              : 
   10597              :       /* First step is to deallocate the stack frame so that we can
   10598              :          pop the registers.  If the stack pointer was realigned, it needs
   10599              :          to be restored now.  Also do it on SEH target for very large
   10600              :          frame as the emitted instructions aren't allowed by the ABI
   10601              :          in epilogues.  */
   10602      1356122 :       if (!m->fs.sp_valid || m->fs.sp_realigned
   10603              :           || (TARGET_SEH
   10604              :               && (m->fs.sp_offset - reg_save_offset
   10605              :                   >= SEH_MAX_FRAME_SIZE)))
   10606              :         {
   10607        29737 :           pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
   10608        29737 :                                      GEN_INT (m->fs.fp_offset
   10609              :                                               - reg_save_offset),
   10610              :                                      style, false);
   10611              :         }
   10612      1326385 :       else if (m->fs.sp_offset != reg_save_offset)
   10613              :         {
   10614       614759 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10615              :                                      GEN_INT (m->fs.sp_offset
   10616              :                                               - reg_save_offset),
   10617              :                                      style,
   10618       614759 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10619              :         }
   10620              : 
   10621      1356122 :       if (TARGET_APX_PUSH2POP2
   10622          563 :           && ix86_can_use_push2pop2 ()
   10623      1356683 :           && m->func_type == TYPE_NORMAL)
   10624          560 :         ix86_emit_restore_regs_using_pop2 ();
   10625              :       else
   10626      1355562 :         ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
   10627              :     }
   10628              : 
   10629              :   /* If we used a stack pointer and haven't already got rid of it,
   10630              :      then do so now.  */
   10631      1650584 :   if (m->fs.fp_valid)
   10632              :     {
   10633              :       /* If the stack pointer is valid and pointing at the frame
   10634              :          pointer store address, then we only need a pop.  */
   10635       480802 :       if (sp_valid_at (frame.hfp_save_offset)
   10636       480802 :           && m->fs.sp_offset == frame.hfp_save_offset)
   10637       237560 :         ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10638              :       /* Leave results in shorter dependency chains on CPUs that are
   10639              :          able to grok it fast.  */
   10640       243242 :       else if (TARGET_USE_LEAVE
   10641           12 :                || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
   10642       243254 :                || !cfun->machine->use_fast_prologue_epilogue)
   10643       243230 :         ix86_emit_leave (NULL);
   10644              :       else
   10645              :         {
   10646           12 :           pro_epilogue_adjust_stack (stack_pointer_rtx,
   10647              :                                      hard_frame_pointer_rtx,
   10648           12 :                                      const0_rtx, style, !using_drap);
   10649           12 :           ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10650              :         }
   10651              :     }
   10652              : 
   10653      1650584 :   if (using_drap)
   10654              :     {
   10655         7175 :       int param_ptr_offset = UNITS_PER_WORD;
   10656         7175 :       rtx_insn *insn;
   10657              : 
   10658         7175 :       gcc_assert (stack_realign_drap);
   10659              : 
   10660         7175 :       if (ix86_static_chain_on_stack)
   10661            0 :         param_ptr_offset += UNITS_PER_WORD;
   10662         7175 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10663          232 :         param_ptr_offset += UNITS_PER_WORD;
   10664              : 
   10665         7480 :       insn = emit_insn (gen_rtx_SET
   10666              :                         (stack_pointer_rtx,
   10667              :                          plus_constant (Pmode, crtl->drap_reg,
   10668              :                                         -param_ptr_offset)));
   10669         7175 :       m->fs.cfa_reg = stack_pointer_rtx;
   10670         7175 :       m->fs.cfa_offset = param_ptr_offset;
   10671         7175 :       m->fs.sp_offset = param_ptr_offset;
   10672         7175 :       m->fs.realigned = false;
   10673              : 
   10674         7480 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10675         7175 :                     plus_constant (Pmode, stack_pointer_rtx,
   10676         7175 :                                    param_ptr_offset));
   10677         7175 :       RTX_FRAME_RELATED_P (insn) = 1;
   10678              : 
   10679         7175 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10680          232 :         ix86_emit_restore_reg_using_pop (crtl->drap_reg);
   10681              :     }
   10682              : 
   10683              :   /* At this point the stack pointer must be valid, and we must have
   10684              :      restored all of the registers.  We may not have deallocated the
   10685              :      entire stack frame.  We've delayed this until now because it may
   10686              :      be possible to merge the local stack deallocation with the
   10687              :      deallocation forced by ix86_static_chain_on_stack.   */
   10688      1650584 :   gcc_assert (m->fs.sp_valid);
   10689      1650584 :   gcc_assert (!m->fs.sp_realigned);
   10690      1650584 :   gcc_assert (!m->fs.fp_valid);
   10691      1650584 :   gcc_assert (!m->fs.realigned);
   10692      1786289 :   if (m->fs.sp_offset != UNITS_PER_WORD)
   10693              :     {
   10694        50054 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10695              :                                  GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
   10696              :                                  style, true);
   10697              :     }
   10698              :   else
   10699      1600530 :     ix86_add_queued_cfa_restore_notes (get_last_insn ());
   10700              : 
   10701              :   /* Sibcall epilogues don't want a return instruction.  */
   10702      1650584 :   if (style == 0)
   10703              :     {
   10704       124420 :       m->fs = frame_state_save;
   10705       124420 :       return;
   10706              :     }
   10707              : 
   10708      1526164 :   if (cfun->machine->func_type != TYPE_NORMAL)
   10709          120 :     emit_jump_insn (gen_interrupt_return ());
   10710      1526044 :   else if (crtl->args.pops_args && crtl->args.size)
   10711              :     {
   10712        25996 :       rtx popc = GEN_INT (crtl->args.pops_args);
   10713              : 
   10714              :       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
   10715              :          address, do explicit add, and jump indirectly to the caller.  */
   10716              : 
   10717        25996 :       if (crtl->args.pops_args >= 65536)
   10718              :         {
   10719            0 :           rtx ecx = gen_rtx_REG (SImode, CX_REG);
   10720            0 :           rtx_insn *insn;
   10721              : 
   10722              :           /* There is no "pascal" calling convention in any 64bit ABI.  */
   10723            0 :           gcc_assert (!TARGET_64BIT);
   10724              : 
   10725            0 :           insn = emit_insn (gen_pop (ecx));
   10726            0 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10727            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10728              : 
   10729            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10730            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10731            0 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10732            0 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10733            0 :           RTX_FRAME_RELATED_P (insn) = 1;
   10734              : 
   10735            0 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10736              :                                      popc, -1, true);
   10737            0 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10738              :         }
   10739              :       else
   10740        25996 :         emit_jump_insn (gen_simple_return_pop_internal (popc));
   10741              :     }
   10742      1500048 :   else if (!m->call_ms2sysv || !restore_stub_is_tail)
   10743              :     {
   10744              :       /* In case of return from EH a simple return cannot be used
   10745              :          as a return address will be compared with a shadow stack
   10746              :          return address.  Use indirect jump instead.  */
   10747      1498925 :       if (style == 2 && flag_cf_protection)
   10748              :         {
   10749              :           /* Register used in indirect jump must be in word_mode.  But
   10750              :              Pmode may not be the same as word_mode for x32.  */
   10751           17 :           rtx ecx = gen_rtx_REG (word_mode, CX_REG);
   10752           17 :           rtx_insn *insn;
   10753              : 
   10754           17 :           insn = emit_insn (gen_pop (ecx));
   10755           17 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10756           17 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10757              : 
   10758           33 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10759           17 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10760           17 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10761           17 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10762           17 :           RTX_FRAME_RELATED_P (insn) = 1;
   10763              : 
   10764           17 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10765           17 :         }
   10766              :       else
   10767      1498908 :         emit_jump_insn (gen_simple_return_internal ());
   10768              :     }
   10769              : 
   10770              :   /* Restore the state back to the state from the prologue,
   10771              :      so that it's correct for the next epilogue.  */
   10772      1526164 :   m->fs = frame_state_save;
   10773              : }
   10774              : 
   10775              : /* Reset from the function's potential modifications.  */
   10776              : 
   10777              : static void
   10778      1486852 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
   10779              : {
   10780      1486852 :   if (pic_offset_table_rtx
   10781      1486852 :       && !ix86_use_pseudo_pic_reg ())
   10782            0 :     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
   10783              : 
   10784      1486852 :   if (TARGET_MACHO)
   10785              :     {
   10786              :       rtx_insn *insn = get_last_insn ();
   10787              :       rtx_insn *deleted_debug_label = NULL;
   10788              : 
   10789              :       /* Mach-O doesn't support labels at the end of objects, so if
   10790              :          it looks like we might want one, take special action.
   10791              :         First, collect any sequence of deleted debug labels.  */
   10792              :       while (insn
   10793              :              && NOTE_P (insn)
   10794              :              && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
   10795              :         {
   10796              :           /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
   10797              :              notes only, instead set their CODE_LABEL_NUMBER to -1,
   10798              :              otherwise there would be code generation differences
   10799              :              in between -g and -g0.  */
   10800              :           if (NOTE_P (insn) && NOTE_KIND (insn)
   10801              :               == NOTE_INSN_DELETED_DEBUG_LABEL)
   10802              :             deleted_debug_label = insn;
   10803              :           insn = PREV_INSN (insn);
   10804              :         }
   10805              : 
   10806              :       /* If we have:
   10807              :          label:
   10808              :             barrier
   10809              :           then this needs to be detected, so skip past the barrier.  */
   10810              : 
   10811              :       if (insn && BARRIER_P (insn))
   10812              :         insn = PREV_INSN (insn);
   10813              : 
   10814              :       /* Up to now we've only seen notes or barriers.  */
   10815              :       if (insn)
   10816              :         {
   10817              :           if (LABEL_P (insn)
   10818              :               || (NOTE_P (insn)
   10819              :                   && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
   10820              :             /* Trailing label.  */
   10821              :             fputs ("\tnop\n", file);
   10822              :           else if (cfun && ! cfun->is_thunk)
   10823              :             {
   10824              :               /* See if we have a completely empty function body, skipping
   10825              :                  the special case of the picbase thunk emitted as asm.  */
   10826              :               while (insn && ! INSN_P (insn))
   10827              :                 insn = PREV_INSN (insn);
   10828              :               /* If we don't find any insns, we've got an empty function body;
   10829              :                  I.e. completely empty - without a return or branch.  This is
   10830              :                  taken as the case where a function body has been removed
   10831              :                  because it contains an inline __builtin_unreachable().  GCC
   10832              :                  declares that reaching __builtin_unreachable() means UB so
   10833              :                  we're not obliged to do anything special; however, we want
   10834              :                  non-zero-sized function bodies.  To meet this, and help the
   10835              :                  user out, let's trap the case.  */
   10836              :               if (insn == NULL)
   10837              :                 fputs ("\tud2\n", file);
   10838              :             }
   10839              :         }
   10840              :       else if (deleted_debug_label)
   10841              :         for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
   10842              :           if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
   10843              :             CODE_LABEL_NUMBER (insn) = -1;
   10844              :     }
   10845      1486852 : }
   10846              : 
   10847              : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY.  */
   10848              : 
   10849              : void
   10850           59 : ix86_print_patchable_function_entry (FILE *file,
   10851              :                                      unsigned HOST_WIDE_INT patch_area_size,
   10852              :                                      bool record_p)
   10853              : {
   10854           59 :   if (cfun->machine->function_label_emitted)
   10855              :     {
   10856              :       /* NB: When ix86_print_patchable_function_entry is called after
   10857              :          function table has been emitted, we have inserted or queued
   10858              :          a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
   10859              :          place.  There is nothing to do here.  */
   10860              :       return;
   10861              :     }
   10862              : 
   10863            8 :   default_print_patchable_function_entry (file, patch_area_size,
   10864              :                                           record_p);
   10865              : }
   10866              : 
   10867              : /* Output patchable area.  NB: default_print_patchable_function_entry
   10868              :    isn't available in i386.md.  */
   10869              : 
   10870              : void
   10871           51 : ix86_output_patchable_area (unsigned int patch_area_size,
   10872              :                             bool record_p)
   10873              : {
   10874           51 :   default_print_patchable_function_entry (asm_out_file,
   10875              :                                           patch_area_size,
   10876              :                                           record_p);
   10877           51 : }
   10878              : 
   10879              : /* Return a scratch register to use in the split stack prologue.  The
   10880              :    split stack prologue is used for -fsplit-stack.  It is the first
   10881              :    instructions in the function, even before the regular prologue.
   10882              :    The scratch register can be any caller-saved register which is not
   10883              :    used for parameters or for the static chain.  */
   10884              : 
   10885              : static unsigned int
   10886        24610 : split_stack_prologue_scratch_regno (void)
   10887              : {
   10888        24610 :   if (TARGET_64BIT)
   10889              :     return R11_REG;
   10890              :   else
   10891              :     {
   10892         6943 :       bool is_fastcall, is_thiscall;
   10893         6943 :       int regparm;
   10894              : 
   10895         6943 :       is_fastcall = (lookup_attribute ("fastcall",
   10896         6943 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10897              :                      != NULL);
   10898         6943 :       is_thiscall = (lookup_attribute ("thiscall",
   10899         6943 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10900              :                      != NULL);
   10901         6943 :       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
   10902              : 
   10903         6943 :       if (is_fastcall)
   10904              :         {
   10905            0 :           if (DECL_STATIC_CHAIN (cfun->decl))
   10906              :             {
   10907            0 :               sorry ("%<-fsplit-stack%> does not support fastcall with "
   10908              :                      "nested function");
   10909            0 :               return INVALID_REGNUM;
   10910              :             }
   10911              :           return AX_REG;
   10912              :         }
   10913         6943 :       else if (is_thiscall)
   10914              :         {
   10915            0 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10916              :             return DX_REG;
   10917            0 :           return AX_REG;
   10918              :         }
   10919         6943 :       else if (regparm < 3)
   10920              :         {
   10921         6943 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10922              :             return CX_REG;
   10923              :           else
   10924              :             {
   10925          459 :               if (regparm >= 2)
   10926              :                 {
   10927            0 :                   sorry ("%<-fsplit-stack%> does not support 2 register "
   10928              :                          "parameters for a nested function");
   10929            0 :                   return INVALID_REGNUM;
   10930              :                 }
   10931              :               return DX_REG;
   10932              :             }
   10933              :         }
   10934              :       else
   10935              :         {
   10936              :           /* FIXME: We could make this work by pushing a register
   10937              :              around the addition and comparison.  */
   10938            0 :           sorry ("%<-fsplit-stack%> does not support 3 register parameters");
   10939            0 :           return INVALID_REGNUM;
   10940              :         }
   10941              :     }
   10942              : }
   10943              : 
   10944              : /* A SYMBOL_REF for the function which allocates new stackspace for
   10945              :    -fsplit-stack.  */
   10946              : 
   10947              : static GTY(()) rtx split_stack_fn;
   10948              : 
   10949              : /* A SYMBOL_REF for the more stack function when using the large model.  */
   10950              : 
   10951              : static GTY(()) rtx split_stack_fn_large;
   10952              : 
   10953              : /* Return location of the stack guard value in the TLS block.  */
   10954              : 
   10955              : rtx
   10956       260053 : ix86_split_stack_guard (void)
   10957              : {
   10958       260053 :   int offset;
   10959       260053 :   addr_space_t as = DEFAULT_TLS_SEG_REG;
   10960       260053 :   rtx r;
   10961              : 
   10962       260053 :   gcc_assert (flag_split_stack);
   10963              : 
   10964              : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
   10965       260053 :   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
   10966              : #else
   10967              :   gcc_unreachable ();
   10968              : #endif
   10969              : 
   10970       260053 :   r = GEN_INT (offset);
   10971       358058 :   r = gen_const_mem (Pmode, r);
   10972       260053 :   set_mem_addr_space (r, as);
   10973              : 
   10974       260053 :   return r;
   10975              : }
   10976              : 
   10977              : /* Handle -fsplit-stack.  These are the first instructions in the
   10978              :    function, even before the regular prologue.  */
   10979              : 
   10980              : void
   10981       260043 : ix86_expand_split_stack_prologue (void)
   10982              : {
   10983       260043 :   HOST_WIDE_INT allocate;
   10984       260043 :   unsigned HOST_WIDE_INT args_size;
   10985       260043 :   rtx_code_label *label;
   10986       260043 :   rtx limit, current, allocate_rtx, call_fusage;
   10987       260043 :   rtx_insn *call_insn;
   10988       260043 :   unsigned int scratch_regno = INVALID_REGNUM;
   10989       260043 :   rtx scratch_reg = NULL_RTX;
   10990       260043 :   rtx_code_label *varargs_label = NULL;
   10991       260043 :   rtx fn;
   10992              : 
   10993       260043 :   gcc_assert (flag_split_stack && reload_completed);
   10994              : 
   10995       260043 :   ix86_finalize_stack_frame_flags ();
   10996       260043 :   struct ix86_frame &frame = cfun->machine->frame;
   10997       260043 :   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
   10998              : 
   10999              :   /* This is the label we will branch to if we have enough stack
   11000              :      space.  We expect the basic block reordering pass to reverse this
   11001              :      branch if optimizing, so that we branch in the unlikely case.  */
   11002       260043 :   label = gen_label_rtx ();
   11003              : 
   11004              :   /* We need to compare the stack pointer minus the frame size with
   11005              :      the stack boundary in the TCB.  The stack boundary always gives
   11006              :      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
   11007              :      can compare directly.  Otherwise we need to do an addition.  */
   11008              : 
   11009       260043 :   limit = ix86_split_stack_guard ();
   11010              : 
   11011       260043 :   if (allocate >= SPLIT_STACK_AVAILABLE
   11012       235596 :       || flag_force_indirect_call)
   11013              :     {
   11014        24462 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11015        24462 :       if (scratch_regno == INVALID_REGNUM)
   11016            0 :         return;
   11017              :     }
   11018              : 
   11019       260043 :   if (allocate >= SPLIT_STACK_AVAILABLE)
   11020              :     {
   11021        24447 :       rtx offset;
   11022              : 
   11023              :       /* We need a scratch register to hold the stack pointer minus
   11024              :          the required frame size.  Since this is the very start of the
   11025              :          function, the scratch register can be any caller-saved
   11026              :          register which is not used for parameters.  */
   11027        24447 :       offset = GEN_INT (- allocate);
   11028              : 
   11029        31336 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11030        24447 :       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
   11031              :         {
   11032              :           /* We don't use gen_add in this case because it will
   11033              :              want to split to lea, but when not optimizing the insn
   11034              :              will not be split after this point.  */
   11035        31336 :           emit_insn (gen_rtx_SET (scratch_reg,
   11036              :                                   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   11037              :                                                 offset)));
   11038              :         }
   11039              :       else
   11040              :         {
   11041            0 :           emit_move_insn (scratch_reg, offset);
   11042            0 :           emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
   11043              :         }
   11044              :       current = scratch_reg;
   11045              :     }
   11046              :   else
   11047       235596 :     current = stack_pointer_rtx;
   11048              : 
   11049       260043 :   ix86_expand_branch (GEU, current, limit, label);
   11050       260043 :   rtx_insn *jump_insn = get_last_insn ();
   11051       260043 :   JUMP_LABEL (jump_insn) = label;
   11052              : 
   11053              :   /* Mark the jump as very likely to be taken.  */
   11054       260043 :   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
   11055              : 
   11056       260043 :   if (split_stack_fn == NULL_RTX)
   11057              :     {
   11058         5451 :       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
   11059         4347 :       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
   11060              :     }
   11061       260043 :   fn = split_stack_fn;
   11062              : 
   11063              :   /* Get more stack space.  We pass in the desired stack space and the
   11064              :      size of the arguments to copy to the new stack.  In 32-bit mode
   11065              :      we push the parameters; __morestack will return on a new stack
   11066              :      anyhow.  In 64-bit mode we pass the parameters in r10 and
   11067              :      r11.  */
   11068       260043 :   allocate_rtx = GEN_INT (allocate);
   11069       260043 :   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
   11070       260043 :   call_fusage = NULL_RTX;
   11071       260043 :   rtx pop = NULL_RTX;
   11072       260043 :   if (TARGET_64BIT)
   11073              :     {
   11074       162038 :       rtx reg10, reg11;
   11075              : 
   11076       162038 :       reg10 = gen_rtx_REG (DImode, R10_REG);
   11077       162038 :       reg11 = gen_rtx_REG (DImode, R11_REG);
   11078              : 
   11079              :       /* If this function uses a static chain, it will be in %r10.
   11080              :          Preserve it across the call to __morestack.  */
   11081       162038 :       if (DECL_STATIC_CHAIN (cfun->decl))
   11082              :         {
   11083         7505 :           rtx rax;
   11084              : 
   11085         7505 :           rax = gen_rtx_REG (word_mode, AX_REG);
   11086         7505 :           emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
   11087         7505 :           use_reg (&call_fusage, rax);
   11088              :         }
   11089              : 
   11090       162038 :       if (flag_force_indirect_call
   11091       162023 :           || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
   11092              :         {
   11093           16 :           HOST_WIDE_INT argval;
   11094              : 
   11095           16 :           if (split_stack_fn_large == NULL_RTX)
   11096              :             {
   11097            7 :               split_stack_fn_large
   11098            7 :                 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
   11099            7 :               SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
   11100              :             }
   11101              : 
   11102           16 :           fn = split_stack_fn_large;
   11103              : 
   11104           16 :           if (ix86_cmodel == CM_LARGE_PIC)
   11105              :             {
   11106            3 :               rtx_code_label *label;
   11107            3 :               rtx x;
   11108              : 
   11109            3 :               gcc_assert (Pmode == DImode);
   11110              : 
   11111            3 :               label = gen_label_rtx ();
   11112            3 :               emit_label (label);
   11113            3 :               LABEL_PRESERVE_P (label) = 1;
   11114            3 :               emit_insn (gen_set_rip_rex64 (reg10, label));
   11115            3 :               emit_insn (gen_set_got_offset_rex64 (reg11, label));
   11116            3 :               emit_insn (gen_add2_insn (reg10, reg11));
   11117            3 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
   11118            3 :               x = gen_rtx_CONST (Pmode, x);
   11119            3 :               emit_move_insn (reg11, x);
   11120            3 :               x = gen_rtx_PLUS (Pmode, reg10, reg11);
   11121            3 :               x = gen_const_mem (Pmode, x);
   11122            3 :               fn = copy_to_suggested_reg (x, reg11, Pmode);
   11123              :             }
   11124           13 :           else if (ix86_cmodel == CM_LARGE)
   11125            1 :             fn = copy_to_suggested_reg (fn, reg11, Pmode);
   11126              : 
   11127              :           /* When using the large model we need to load the address
   11128              :              into a register, and we've run out of registers.  So we
   11129              :              switch to a different calling convention, and we call a
   11130              :              different function: __morestack_large.  We pass the
   11131              :              argument size in the upper 32 bits of r10 and pass the
   11132              :              frame size in the lower 32 bits.  */
   11133           16 :           gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
   11134           16 :           gcc_assert ((args_size & 0xffffffff) == args_size);
   11135              : 
   11136           16 :           argval = ((args_size << 16) << 16) + allocate;
   11137           16 :           emit_move_insn (reg10, GEN_INT (argval));
   11138           16 :         }
   11139              :       else
   11140              :         {
   11141       162022 :           emit_move_insn (reg10, allocate_rtx);
   11142       162022 :           emit_move_insn (reg11, GEN_INT (args_size));
   11143       162022 :           use_reg (&call_fusage, reg11);
   11144              :         }
   11145              : 
   11146       162038 :       use_reg (&call_fusage, reg10);
   11147              :     }
   11148              :   else
   11149              :     {
   11150        98005 :       if (flag_force_indirect_call && flag_pic)
   11151              :         {
   11152            0 :           rtx x;
   11153              : 
   11154            0 :           gcc_assert (Pmode == SImode);
   11155              : 
   11156            0 :           scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11157              : 
   11158            0 :           emit_insn (gen_set_got (scratch_reg));
   11159            0 :           x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
   11160              :                               UNSPEC_GOT);
   11161            0 :           x = gen_rtx_CONST (Pmode, x);
   11162            0 :           x = gen_rtx_PLUS (Pmode, scratch_reg, x);
   11163            0 :           x = gen_const_mem (Pmode, x);
   11164            0 :           fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
   11165              :         }
   11166              : 
   11167        98005 :       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
   11168       196010 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
   11169        98005 :       insn = emit_insn (gen_push (allocate_rtx));
   11170       196010 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
   11171       196010 :       pop = GEN_INT (2 * UNITS_PER_WORD);
   11172              :     }
   11173              : 
   11174       260043 :   if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
   11175              :     {
   11176           12 :       scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
   11177              : 
   11178           12 :       if (GET_MODE (fn) != word_mode)
   11179            0 :         fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
   11180              : 
   11181           12 :       fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
   11182              :     }
   11183              : 
   11184       260043 :   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
   11185       260043 :                                 GEN_INT (UNITS_PER_WORD), constm1_rtx,
   11186              :                                 pop, false);
   11187       260043 :   add_function_usage_to (call_insn, call_fusage);
   11188       260043 :   if (!TARGET_64BIT)
   11189        98005 :     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
   11190              :   /* Indicate that this function can't jump to non-local gotos.  */
   11191       260043 :   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
   11192              : 
   11193              :   /* In order to make call/return prediction work right, we now need
   11194              :      to execute a return instruction.  See
   11195              :      libgcc/config/i386/morestack.S for the details on how this works.
   11196              : 
   11197              :      For flow purposes gcc must not see this as a return
   11198              :      instruction--we need control flow to continue at the subsequent
   11199              :      label.  Therefore, we use an unspec.  */
   11200       260043 :   gcc_assert (crtl->args.pops_args < 65536);
   11201       260043 :   rtx_insn *ret_insn
   11202       260043 :     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
   11203              : 
   11204       260043 :   if ((flag_cf_protection & CF_BRANCH))
   11205              :     {
   11206              :       /* Insert ENDBR since __morestack will jump back here via indirect
   11207              :          call.  */
   11208           21 :       rtx cet_eb = gen_nop_endbr ();
   11209           21 :       emit_insn_after (cet_eb, ret_insn);
   11210              :     }
   11211              : 
   11212              :   /* If we are in 64-bit mode and this function uses a static chain,
   11213              :      we saved %r10 in %rax before calling _morestack.  */
   11214       260043 :   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
   11215         7505 :     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
   11216              :                     gen_rtx_REG (word_mode, AX_REG));
   11217              : 
   11218              :   /* If this function calls va_start, we need to store a pointer to
   11219              :      the arguments on the old stack, because they may not have been
   11220              :      all copied to the new stack.  At this point the old stack can be
   11221              :      found at the frame pointer value used by __morestack, because
   11222              :      __morestack has set that up before calling back to us.  Here we
   11223              :      store that pointer in a scratch register, and in
   11224              :      ix86_expand_prologue we store the scratch register in a stack
   11225              :      slot.  */
   11226       260043 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11227              :     {
   11228           12 :       rtx frame_reg;
   11229           12 :       int words;
   11230              : 
   11231           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11232           16 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11233           16 :       frame_reg = gen_rtx_REG (Pmode, BP_REG);
   11234              : 
   11235              :       /* 64-bit:
   11236              :          fp -> old fp value
   11237              :                return address within this function
   11238              :                return address of caller of this function
   11239              :                stack arguments
   11240              :          So we add three words to get to the stack arguments.
   11241              : 
   11242              :          32-bit:
   11243              :          fp -> old fp value
   11244              :                return address within this function
   11245              :                first argument to __morestack
   11246              :                second argument to __morestack
   11247              :                return address of caller of this function
   11248              :                stack arguments
   11249              :          So we add five words to get to the stack arguments.
   11250              :       */
   11251           12 :       words = TARGET_64BIT ? 3 : 5;
   11252           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11253              :                               plus_constant (Pmode, frame_reg,
   11254              :                                              words * UNITS_PER_WORD)));
   11255              : 
   11256           12 :       varargs_label = gen_label_rtx ();
   11257           12 :       emit_jump_insn (gen_jump (varargs_label));
   11258           12 :       JUMP_LABEL (get_last_insn ()) = varargs_label;
   11259              : 
   11260           12 :       emit_barrier ();
   11261              :     }
   11262              : 
   11263       260043 :   emit_label (label);
   11264       260043 :   LABEL_NUSES (label) = 1;
   11265              : 
   11266              :   /* If this function calls va_start, we now have to set the scratch
   11267              :      register for the case where we do not call __morestack.  In this
   11268              :      case we need to set it based on the stack pointer.  */
   11269       260043 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11270              :     {
   11271           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11272              :                               plus_constant (Pmode, stack_pointer_rtx,
   11273              :                                              UNITS_PER_WORD)));
   11274              : 
   11275           12 :       emit_label (varargs_label);
   11276           12 :       LABEL_NUSES (varargs_label) = 1;
   11277              :     }
   11278              : }
   11279              : 
   11280              : /* We may have to tell the dataflow pass that the split stack prologue
   11281              :    is initializing a scratch register.  */
   11282              : 
   11283              : static void
   11284     15865769 : ix86_live_on_entry (bitmap regs)
   11285              : {
   11286     15865769 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11287              :     {
   11288          124 :       gcc_assert (flag_split_stack);
   11289          124 :       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
   11290              :     }
   11291     15865769 : }
   11292              : 
   11293              : /* Extract the parts of an RTL expression that is a valid memory address
   11294              :    for an instruction.  Return false if the structure of the address is
   11295              :    grossly off.  */
   11296              : 
   11297              : bool
   11298   4333665097 : ix86_decompose_address (rtx addr, struct ix86_address *out)
   11299              : {
   11300   4333665097 :   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
   11301   4333665097 :   rtx base_reg, index_reg;
   11302   4333665097 :   HOST_WIDE_INT scale = 1;
   11303   4333665097 :   rtx scale_rtx = NULL_RTX;
   11304   4333665097 :   rtx tmp;
   11305   4333665097 :   addr_space_t seg = ADDR_SPACE_GENERIC;
   11306              : 
   11307              :   /* Allow zero-extended SImode addresses,
   11308              :      they will be emitted with addr32 prefix.  */
   11309   4333665097 :   if (TARGET_64BIT && GET_MODE (addr) == DImode)
   11310              :     {
   11311   2295024636 :       if (GET_CODE (addr) == ZERO_EXTEND
   11312      2183351 :           && GET_MODE (XEXP (addr, 0)) == SImode)
   11313              :         {
   11314      2090014 :           addr = XEXP (addr, 0);
   11315      2090014 :           if (CONST_INT_P (addr))
   11316              :             return false;
   11317              :         }
   11318   2292934622 :       else if (GET_CODE (addr) == AND)
   11319              :         {
   11320      2816511 :           rtx mask = XEXP (addr, 1);
   11321      2816511 :           rtx shift_val;
   11322              : 
   11323      2816511 :           if (const_32bit_mask (mask, DImode)
   11324              :               /* For ASHIFT inside AND, combine will not generate
   11325              :                  canonical zero-extend. Merge mask for AND and shift_count
   11326              :                  to check if it is canonical zero-extend.  */
   11327      2816511 :               || (CONST_INT_P (mask)
   11328      1828862 :                   && GET_CODE (XEXP (addr, 0)) == ASHIFT
   11329       141264 :                   && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
   11330       138167 :                   && ((UINTVAL (mask)
   11331       138167 :                        | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
   11332              :                       == HOST_WIDE_INT_UC (0xffffffff))))
   11333              :             {
   11334        81250 :               addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
   11335        81250 :               if (addr == NULL_RTX)
   11336              :                 return false;
   11337              : 
   11338        81250 :               if (CONST_INT_P (addr))
   11339              :                 return false;
   11340              :             }
   11341              :         }
   11342              :     }
   11343              : 
   11344              :   /* Allow SImode subregs of DImode addresses,
   11345              :      they will be emitted with addr32 prefix.  */
   11346   4333665097 :   if (TARGET_64BIT && GET_MODE (addr) == SImode)
   11347              :     {
   11348     17359304 :       if (SUBREG_P (addr)
   11349       215218 :           && GET_MODE (SUBREG_REG (addr)) == DImode)
   11350              :         {
   11351       188320 :           addr = SUBREG_REG (addr);
   11352       188320 :           if (CONST_INT_P (addr))
   11353              :             return false;
   11354              :         }
   11355              :     }
   11356              : 
   11357   4333665097 :   if (REG_P (addr))
   11358              :     base = addr;
   11359              :   else if (SUBREG_P (addr))
   11360              :     {
   11361       456928 :       if (REG_P (SUBREG_REG (addr)))
   11362              :         base = addr;
   11363              :       else
   11364              :         return false;
   11365              :     }
   11366              :   else if (GET_CODE (addr) == PLUS)
   11367              :     {
   11368              :       rtx addends[4], op;
   11369              :       int n = 0, i;
   11370              : 
   11371              :       op = addr;
   11372   3161910574 :       do
   11373              :         {
   11374   3161910574 :           if (n >= 4)
   11375    639859762 :             return false;
   11376   3161905276 :           addends[n++] = XEXP (op, 1);
   11377   3161905276 :           op = XEXP (op, 0);
   11378              :         }
   11379   3161905276 :       while (GET_CODE (op) == PLUS);
   11380   3098442454 :       if (n >= 4)
   11381              :         return false;
   11382   3098435718 :       addends[n] = op;
   11383              : 
   11384   8082154146 :       for (i = n; i >= 0; --i)
   11385              :         {
   11386   5623566156 :           op = addends[i];
   11387   5623566156 :           switch (GET_CODE (op))
   11388              :             {
   11389     61246393 :             case MULT:
   11390     61246393 :               if (index)
   11391              :                 return false;
   11392     61207080 :               index = XEXP (op, 0);
   11393     61207080 :               scale_rtx = XEXP (op, 1);
   11394     61207080 :               break;
   11395              : 
   11396     12775410 :             case ASHIFT:
   11397     12775410 :               if (index)
   11398              :                 return false;
   11399     12702225 :               index = XEXP (op, 0);
   11400     12702225 :               tmp = XEXP (op, 1);
   11401     12702225 :               if (!CONST_INT_P (tmp))
   11402              :                 return false;
   11403     12687634 :               scale = INTVAL (tmp);
   11404     12687634 :               if ((unsigned HOST_WIDE_INT) scale > 3)
   11405              :                 return false;
   11406     12276811 :               scale = 1 << scale;
   11407     12276811 :               break;
   11408              : 
   11409      1073373 :             case ZERO_EXTEND:
   11410      1073373 :               op = XEXP (op, 0);
   11411      1073373 :               if (GET_CODE (op) != UNSPEC)
   11412              :                 return false;
   11413              :               /* FALLTHRU */
   11414              : 
   11415       697639 :             case UNSPEC:
   11416       697639 :               if (XINT (op, 1) == UNSPEC_TP
   11417       689326 :                   && TARGET_TLS_DIRECT_SEG_REFS
   11418       689326 :                   && seg == ADDR_SPACE_GENERIC)
   11419       689326 :                 seg = DEFAULT_TLS_SEG_REG;
   11420              :               else
   11421              :                 return false;
   11422              :               break;
   11423              : 
   11424       514350 :             case SUBREG:
   11425       514350 :               if (!REG_P (SUBREG_REG (op)))
   11426              :                 return false;
   11427              :               /* FALLTHRU */
   11428              : 
   11429   2528301838 :             case REG:
   11430   2528301838 :               if (!base)
   11431              :                 base = op;
   11432     81377845 :               else if (!index)
   11433              :                 index = op;
   11434              :               else
   11435              :                 return false;
   11436              :               break;
   11437              : 
   11438   2382065820 :             case CONST:
   11439   2382065820 :             case CONST_INT:
   11440   2382065820 :             case SYMBOL_REF:
   11441   2382065820 :             case LABEL_REF:
   11442   2382065820 :               if (disp)
   11443              :                 return false;
   11444              :               disp = op;
   11445              :               break;
   11446              : 
   11447              :             default:
   11448              :               return false;
   11449              :             }
   11450              :         }
   11451              :     }
   11452              :   else if (GET_CODE (addr) == MULT)
   11453              :     {
   11454      3712333 :       index = XEXP (addr, 0);           /* index*scale */
   11455      3712333 :       scale_rtx = XEXP (addr, 1);
   11456              :     }
   11457              :   else if (GET_CODE (addr) == ASHIFT)
   11458              :     {
   11459              :       /* We're called for lea too, which implements ashift on occasion.  */
   11460      3242888 :       index = XEXP (addr, 0);
   11461      3242888 :       tmp = XEXP (addr, 1);
   11462      3242888 :       if (!CONST_INT_P (tmp))
   11463              :         return false;
   11464      2856520 :       scale = INTVAL (tmp);
   11465      2856520 :       if ((unsigned HOST_WIDE_INT) scale > 3)
   11466              :         return false;
   11467      2118677 :       scale = 1 << scale;
   11468              :     }
   11469              :   else
   11470              :     disp = addr;                        /* displacement */
   11471              : 
   11472   2464419000 :   if (index)
   11473              :     {
   11474    151041430 :       if (REG_P (index))
   11475              :         ;
   11476      4004081 :       else if (SUBREG_P (index)
   11477       258770 :                && REG_P (SUBREG_REG (index)))
   11478              :         ;
   11479              :       else
   11480              :         return false;
   11481              :     }
   11482              : 
   11483              :   /* Extract the integral value of scale.  */
   11484   3688869700 :   if (scale_rtx)
   11485              :     {
   11486     56425320 :       if (!CONST_INT_P (scale_rtx))
   11487              :         return false;
   11488     55811753 :       scale = INTVAL (scale_rtx);
   11489              :     }
   11490              : 
   11491   3688256133 :   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
   11492   3688256133 :   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
   11493              : 
   11494              :   /* Avoid useless 0 displacement.  */
   11495   3688256133 :   if (disp == const0_rtx && (base || index))
   11496   3688256133 :     disp = NULL_RTX;
   11497              : 
   11498              :   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
   11499   2686626244 :   if (base_reg && index_reg && scale == 1
   11500   3768150420 :       && (REGNO (index_reg) == ARG_POINTER_REGNUM
   11501              :           || REGNO (index_reg) == FRAME_POINTER_REGNUM
   11502              :           || REGNO (index_reg) == SP_REG))
   11503              :     {
   11504              :       std::swap (base, index);
   11505              :       std::swap (base_reg, index_reg);
   11506              :     }
   11507              : 
   11508              :   /* Special case: rewrite index*1+disp into base+disp.  */
   11509   3688256133 :   if (!base && index && scale == 1)
   11510           52 :     base = index, base_reg = index_reg, index = index_reg = NULL_RTX;
   11511              : 
   11512              :   /* Special case: %ebp cannot be encoded as a base without a displacement.
   11513              :      Similarly %r13.  */
   11514    322887249 :   if (!disp && base_reg
   11515   4006820907 :       && (REGNO (base_reg) == ARG_POINTER_REGNUM
   11516              :           || REGNO (base_reg) == FRAME_POINTER_REGNUM
   11517              :           || REGNO (base_reg) == BP_REG
   11518              :           || REGNO (base_reg) == R13_REG))
   11519              :     disp = const0_rtx;
   11520              : 
   11521              :   /* Special case: on K6, [%esi] makes the instruction vector decoded.
   11522              :      Avoid this by transforming to [%esi+0].
   11523              :      Reload calls address legitimization without cfun defined, so we need
   11524              :      to test cfun for being non-NULL. */
   11525            0 :   if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
   11526            0 :       && base_reg && !index_reg && !disp
   11527   3688256133 :       && REGNO (base_reg) == SI_REG)
   11528            0 :     disp = const0_rtx;
   11529              : 
   11530              :   /* Special case: encode reg+reg instead of reg*2.  */
   11531   3688256133 :   if (!base && index && scale == 2)
   11532   1001629837 :     base = index, base_reg = index_reg, scale = 1;
   11533              : 
   11534              :   /* Special case: scaling cannot be encoded without base or displacement.  */
   11535   1001629837 :   if (!base && !disp && index && scale != 1)
   11536      3444671 :     disp = const0_rtx;
   11537              : 
   11538   3688256133 :   out->base = base;
   11539   3688256133 :   out->index = index;
   11540   3688256133 :   out->disp = disp;
   11541   3688256133 :   out->scale = scale;
   11542   3688256133 :   out->seg = seg;
   11543              : 
   11544   3688256133 :   return true;
   11545              : }
   11546              : 
   11547              : /* Return cost of the memory address x.
   11548              :    For i386, it is better to use a complex address than let gcc copy
   11549              :    the address into a reg and make a new pseudo.  But not if the address
   11550              :    requires to two regs - that would mean more pseudos with longer
   11551              :    lifetimes.  */
   11552              : static int
   11553     10828889 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
   11554              : {
   11555     10828889 :   struct ix86_address parts;
   11556     10828889 :   int cost = 1;
   11557     10828889 :   int ok = ix86_decompose_address (x, &parts);
   11558              : 
   11559     10828889 :   gcc_assert (ok);
   11560              : 
   11561     10828889 :   if (parts.base && SUBREG_P (parts.base))
   11562          492 :     parts.base = SUBREG_REG (parts.base);
   11563     10828889 :   if (parts.index && SUBREG_P (parts.index))
   11564           20 :     parts.index = SUBREG_REG (parts.index);
   11565              : 
   11566              :   /* Attempt to minimize number of registers in the address by increasing
   11567              :      address cost for each used register.  We don't increase address cost
   11568              :      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
   11569              :      is not invariant itself it most likely means that base or index is not
   11570              :      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
   11571              :      which is not profitable for x86.  */
   11572     10828889 :   if (parts.base
   11573      9417416 :       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
   11574     19946779 :       && (current_pass->type == GIMPLE_PASS
   11575      2730054 :           || !pic_offset_table_rtx
   11576       126469 :           || !REG_P (parts.base)
   11577       126469 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
   11578              :     cost++;
   11579              : 
   11580     10828889 :   if (parts.index
   11581      5188702 :       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
   11582     16003572 :       && (current_pass->type == GIMPLE_PASS
   11583       649168 :           || !pic_offset_table_rtx
   11584        54782 :           || !REG_P (parts.index)
   11585        54782 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
   11586      5173408 :     cost++;
   11587              : 
   11588              :   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
   11589              :      since it's predecode logic can't detect the length of instructions
   11590              :      and it degenerates to vector decoded.  Increase cost of such
   11591              :      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
   11592              :      to split such addresses or even refuse such addresses at all.
   11593              : 
   11594              :      Following addressing modes are affected:
   11595              :       [base+scale*index]
   11596              :       [scale*index+disp]
   11597              :       [base+index]
   11598              : 
   11599              :      The first and last case  may be avoidable by explicitly coding the zero in
   11600              :      memory address, but I don't have AMD-K6 machine handy to check this
   11601              :      theory.  */
   11602              : 
   11603     10828889 :   if (TARGET_CPU_P (K6)
   11604            0 :       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
   11605            0 :           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
   11606            0 :           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
   11607            0 :     cost += 10;
   11608              : 
   11609     10828889 :   return cost;
   11610              : }
   11611              : 
   11612              : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
   11613              : 
   11614              : bool
   11615      1184687 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   11616              :                                      unsigned int align,
   11617              :                                      enum by_pieces_operation op,
   11618              :                                      bool speed_p)
   11619              : {
   11620              :   /* Return true when we are currently expanding memcpy/memset epilogue
   11621              :      with move_by_pieces or store_by_pieces.  */
   11622      1184687 :   if (cfun->machine->by_pieces_in_use)
   11623              :     return true;
   11624              : 
   11625      1182577 :   return default_use_by_pieces_infrastructure_p (size, align, op,
   11626      1182577 :                                                  speed_p);
   11627              : }
   11628              : 
   11629              : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
   11630              :    this is used for to form addresses to local data when -fPIC is in
   11631              :    use.  */
   11632              : 
   11633              : static bool
   11634            0 : darwin_local_data_pic (rtx disp)
   11635              : {
   11636            0 :   return (GET_CODE (disp) == UNSPEC
   11637            0 :           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
   11638              : }
   11639              : 
   11640              : /* True if the function symbol operand X should be loaded from GOT.
   11641              :    If CALL_P is true, X is a call operand.
   11642              : 
   11643              :    NB: -mno-direct-extern-access doesn't force load from GOT for
   11644              :    call.
   11645              : 
   11646              :    NB: In 32-bit mode, only non-PIC is allowed in inline assembly
   11647              :    statements, since a PIC register could not be available at the
   11648              :    call site.  */
   11649              : 
   11650              : bool
   11651   1852092944 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
   11652              : {
   11653     96329732 :   return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
   11654              :           && !TARGET_PECOFF && !TARGET_MACHO
   11655   1849230178 :           && (!flag_pic || this_is_asm_operands)
   11656   1828902817 :           && ix86_cmodel != CM_LARGE
   11657   1828896818 :           && ix86_cmodel != CM_LARGE_PIC
   11658   1828896817 :           && SYMBOL_REF_P (x)
   11659   1828896815 :           && ((!call_p
   11660   1823450711 :                && (!ix86_direct_extern_access
   11661   1823448441 :                    || (SYMBOL_REF_DECL (x)
   11662   1643487062 :                        && lookup_attribute ("nodirect_extern_access",
   11663   1643487062 :                                             DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
   11664   1828894091 :               || (SYMBOL_REF_FUNCTION_P (x)
   11665    690451943 :                   && (!flag_plt
   11666    690447532 :                       || (SYMBOL_REF_DECL (x)
   11667    690447532 :                           && lookup_attribute ("noplt",
   11668    690447532 :                                                DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
   11669   1852100478 :           && !SYMBOL_REF_LOCAL_P (x));
   11670              : }
   11671              : 
   11672              : /* Determine if a given RTX is a valid constant.  We already know this
   11673              :    satisfies CONSTANT_P.  */
   11674              : 
   11675              : static bool
   11676   1557492929 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
   11677              : {
   11678   1557492929 :   switch (GET_CODE (x))
   11679              :     {
   11680    138575000 :     case CONST:
   11681    138575000 :       x = XEXP (x, 0);
   11682              : 
   11683    138575000 :       if (GET_CODE (x) == PLUS)
   11684              :         {
   11685    138458561 :           if (!CONST_INT_P (XEXP (x, 1)))
   11686              :             return false;
   11687    138458561 :           x = XEXP (x, 0);
   11688              :         }
   11689              : 
   11690    138575000 :       if (TARGET_MACHO && darwin_local_data_pic (x))
   11691              :         return true;
   11692              : 
   11693              :       /* Only some unspecs are valid as "constants".  */
   11694    138575000 :       if (GET_CODE (x) == UNSPEC)
   11695       493532 :         switch (XINT (x, 1))
   11696              :           {
   11697        21063 :           case UNSPEC_GOT:
   11698        21063 :           case UNSPEC_GOTOFF:
   11699        21063 :           case UNSPEC_PLTOFF:
   11700        21063 :             return TARGET_64BIT;
   11701       472106 :           case UNSPEC_TPOFF:
   11702       472106 :           case UNSPEC_NTPOFF:
   11703       472106 :             x = XVECEXP (x, 0, 0);
   11704       472106 :             return (SYMBOL_REF_P (x)
   11705       472106 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11706          275 :           case UNSPEC_DTPOFF:
   11707          275 :             x = XVECEXP (x, 0, 0);
   11708          275 :             return (SYMBOL_REF_P (x)
   11709          275 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
   11710            0 :           case UNSPEC_SECREL32:
   11711            0 :             x = XVECEXP (x, 0, 0);
   11712            0 :             return SYMBOL_REF_P (x);
   11713              :           default:
   11714              :             return false;
   11715              :           }
   11716              : 
   11717              :       /* We must have drilled down to a symbol.  */
   11718    138081468 :       if (LABEL_REF_P (x))
   11719              :         return true;
   11720    138076194 :       if (!SYMBOL_REF_P (x))
   11721              :         return false;
   11722              :       /* FALLTHRU */
   11723              : 
   11724    928151861 :     case SYMBOL_REF:
   11725              :       /* TLS symbols are never valid.  */
   11726    928151861 :       if (SYMBOL_REF_TLS_MODEL (x))
   11727              :         return false;
   11728              : 
   11729              :       /* DLLIMPORT symbols are never valid.  */
   11730    928048072 :       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
   11731              :           && SYMBOL_REF_DLLIMPORT_P (x))
   11732              :         return false;
   11733              : 
   11734              : #if TARGET_MACHO
   11735              :       /* mdynamic-no-pic */
   11736              :       if (MACHO_DYNAMIC_NO_PIC_P)
   11737              :         return machopic_symbol_defined_p (x);
   11738              : #endif
   11739              : 
   11740              :       /* External function address should be loaded
   11741              :          via the GOT slot to avoid PLT.  */
   11742    928048072 :       if (ix86_force_load_from_GOT_p (x))
   11743              :         return false;
   11744              : 
   11745              :       break;
   11746              : 
   11747    607870811 :     CASE_CONST_SCALAR_INT:
   11748    607870811 :       if (ix86_endbr_immediate_operand (x, VOIDmode))
   11749              :         return false;
   11750              : 
   11751    607870610 :       switch (mode)
   11752              :         {
   11753      1456402 :         case E_TImode:
   11754      1456402 :           if (TARGET_64BIT)
   11755              :             return true;
   11756              :           /* FALLTHRU */
   11757        25953 :         case E_OImode:
   11758        25953 :         case E_XImode:
   11759        25953 :           if (!standard_sse_constant_p (x, mode)
   11760        43110 :               && GET_MODE_SIZE (TARGET_AVX512F
   11761              :                                 ? XImode
   11762              :                                 : (TARGET_AVX
   11763              :                                    ? OImode
   11764              :                                    : (TARGET_SSE2
   11765        17157 :                                       ? TImode : DImode))) < GET_MODE_SIZE (mode))
   11766              :             return false;
   11767              :         default:
   11768              :           break;
   11769              :         }
   11770              :       break;
   11771              : 
   11772      8626544 :     case CONST_VECTOR:
   11773      8626544 :       if (!standard_sse_constant_p (x, mode))
   11774              :         return false;
   11775              :       break;
   11776              : 
   11777      7690782 :     case CONST_DOUBLE:
   11778      7690782 :       if (mode == E_BFmode)
   11779              :         return false;
   11780              : 
   11781              :     default:
   11782              :       break;
   11783              :     }
   11784              : 
   11785              :   /* Otherwise we handle everything else in the move patterns.  */
   11786              :   return true;
   11787              : }
   11788              : 
   11789              : /* Determine if it's legal to put X into the constant pool.  This
   11790              :    is not possible for the address of thread-local symbols, which
   11791              :    is checked above.  */
   11792              : 
   11793              : static bool
   11794     61749899 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
   11795              : {
   11796              :   /* We can put any immediate constant in memory.  */
   11797     61749899 :   switch (GET_CODE (x))
   11798              :     {
   11799              :     CASE_CONST_ANY:
   11800              :       return false;
   11801              : 
   11802      1796472 :     default:
   11803      1796472 :       break;
   11804              :     }
   11805              : 
   11806      1796472 :   return !ix86_legitimate_constant_p (mode, x);
   11807              : }
   11808              : 
   11809              : /* Return a unique alias set for the GOT.  */
   11810              : 
   11811              : alias_set_type
   11812       188716 : ix86_GOT_alias_set (void)
   11813              : {
   11814       188716 :   static alias_set_type set = -1;
   11815       188716 :   if (set == -1)
   11816         2952 :     set = new_alias_set ();
   11817       188716 :   return set;
   11818              : }
   11819              : 
   11820              : /* Nonzero if the constant value X is a legitimate general operand
   11821              :    when generating PIC code.  It is given that flag_pic is on and
   11822              :    that X satisfies CONSTANT_P.  */
   11823              : 
   11824              : bool
   11825    126267004 : legitimate_pic_operand_p (rtx x)
   11826              : {
   11827    126267004 :   rtx inner;
   11828              : 
   11829    126267004 :   switch (GET_CODE (x))
   11830              :     {
   11831      2504403 :     case CONST:
   11832      2504403 :       inner = XEXP (x, 0);
   11833      2504403 :       if (GET_CODE (inner) == PLUS
   11834       357822 :           && CONST_INT_P (XEXP (inner, 1)))
   11835       357822 :         inner = XEXP (inner, 0);
   11836              : 
   11837              :       /* Only some unspecs are valid as "constants".  */
   11838      2504403 :       if (GET_CODE (inner) == UNSPEC)
   11839      2254540 :         switch (XINT (inner, 1))
   11840              :           {
   11841      2193858 :           case UNSPEC_GOT:
   11842      2193858 :           case UNSPEC_GOTOFF:
   11843      2193858 :           case UNSPEC_PLTOFF:
   11844      2193858 :             return TARGET_64BIT;
   11845            0 :           case UNSPEC_TPOFF:
   11846            0 :             x = XVECEXP (inner, 0, 0);
   11847            0 :             return (SYMBOL_REF_P (x)
   11848            0 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11849            0 :           case UNSPEC_SECREL32:
   11850            0 :             x = XVECEXP (inner, 0, 0);
   11851            0 :             return SYMBOL_REF_P (x);
   11852            0 :           case UNSPEC_MACHOPIC_OFFSET:
   11853            0 :             return legitimate_pic_address_disp_p (x);
   11854              :           default:
   11855              :             return false;
   11856              :           }
   11857              :       /* FALLTHRU */
   11858              : 
   11859      6996703 :     case SYMBOL_REF:
   11860      6996703 :     case LABEL_REF:
   11861      6996703 :       return legitimate_pic_address_disp_p (x);
   11862              : 
   11863              :     default:
   11864              :       return true;
   11865              :     }
   11866              : }
   11867              : 
   11868              : /* Determine if a given CONST RTX is a valid memory displacement
   11869              :    in PIC mode.  */
   11870              : 
   11871              : bool
   11872     65325268 : legitimate_pic_address_disp_p (rtx disp)
   11873              : {
   11874     65325268 :   bool saw_plus;
   11875              : 
   11876              :   /* In 64bit mode we can allow direct addresses of symbols and labels
   11877              :      when they are not dynamic symbols.  */
   11878     65325268 :   if (TARGET_64BIT)
   11879              :     {
   11880     40155966 :       rtx op0 = disp, op1;
   11881              : 
   11882     40155966 :       switch (GET_CODE (disp))
   11883              :         {
   11884              :         case LABEL_REF:
   11885              :           return true;
   11886              : 
   11887     10954876 :         case CONST:
   11888     10954876 :           if (GET_CODE (XEXP (disp, 0)) != PLUS)
   11889              :             break;
   11890      1173433 :           op0 = XEXP (XEXP (disp, 0), 0);
   11891      1173433 :           op1 = XEXP (XEXP (disp, 0), 1);
   11892      1173433 :           if (!CONST_INT_P (op1))
   11893              :             break;
   11894      1173433 :           if (GET_CODE (op0) == UNSPEC
   11895          296 :               && (XINT (op0, 1) == UNSPEC_DTPOFF
   11896          296 :                   || XINT (op0, 1) == UNSPEC_NTPOFF)
   11897      1173729 :               && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
   11898              :             return true;
   11899      1173137 :           if (INTVAL (op1) >= 16*1024*1024
   11900      1173137 :               || INTVAL (op1) < -16*1024*1024)
   11901              :             break;
   11902      1173049 :           if (LABEL_REF_P (op0))
   11903              :             return true;
   11904      1173049 :           if (GET_CODE (op0) == CONST
   11905            0 :               && GET_CODE (XEXP (op0, 0)) == UNSPEC
   11906            0 :               && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
   11907              :             return true;
   11908      1173049 :           if (GET_CODE (op0) == UNSPEC
   11909            0 :               && XINT (op0, 1) == UNSPEC_PCREL)
   11910              :             return true;
   11911      1173049 :           if (!SYMBOL_REF_P (op0))
   11912              :             break;
   11913              :           /* FALLTHRU */
   11914              : 
   11915     30151560 :         case SYMBOL_REF:
   11916              :           /* TLS references should always be enclosed in UNSPEC.
   11917              :              The dllimported symbol needs always to be resolved.  */
   11918     30151560 :           if (SYMBOL_REF_TLS_MODEL (op0)
   11919              :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
   11920              :             return false;
   11921              : 
   11922     29995995 :           if (TARGET_PECOFF)
   11923              :             {
   11924              : #if TARGET_PECOFF
   11925              :               if (is_imported_p (op0))
   11926              :                 return true;
   11927              : #endif
   11928              : 
   11929              :               if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
   11930              :                 break;
   11931              : 
   11932              :               /* Non-external-weak function symbols need to be resolved only
   11933              :                  for the large model.  Non-external symbols don't need to be
   11934              :                  resolved for large and medium models.  For the small model,
   11935              :                  we don't need to resolve anything here.  */
   11936              :               if ((ix86_cmodel != CM_LARGE_PIC
   11937              :                    && SYMBOL_REF_FUNCTION_P (op0)
   11938              :                    && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
   11939              :                   || !SYMBOL_REF_EXTERNAL_P (op0)
   11940              :                   || ix86_cmodel == CM_SMALL_PIC)
   11941              :                 return true;
   11942              :             }
   11943     29995995 :           else if (!SYMBOL_REF_FAR_ADDR_P (op0)
   11944     29995991 :                    && (SYMBOL_REF_LOCAL_P (op0)
   11945     18293818 :                        || ((ix86_direct_extern_access
   11946     36416326 :                             && !(SYMBOL_REF_DECL (op0)
   11947     18122671 :                                  && lookup_attribute ("nodirect_extern_access",
   11948     18122671 :                                                       DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
   11949              :                            && HAVE_LD_PIE_COPYRELOC
   11950     18293492 :                            && flag_pie
   11951        34047 :                            && !SYMBOL_REF_WEAK (op0)
   11952        33659 :                            && !SYMBOL_REF_FUNCTION_P (op0)))
   11953     41701901 :                    && ix86_cmodel != CM_LARGE_PIC)
   11954              :             return true;
   11955              :           break;
   11956              : 
   11957              :         default:
   11958              :           break;
   11959              :         }
   11960              :     }
   11961     53244802 :   if (GET_CODE (disp) != CONST)
   11962              :     return false;
   11963     14976614 :   disp = XEXP (disp, 0);
   11964              : 
   11965     14976614 :   if (TARGET_64BIT)
   11966              :     {
   11967              :       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
   11968              :          of GOT tables.  We should not need these anyway.  */
   11969      9833965 :       if (GET_CODE (disp) != UNSPEC
   11970      9781443 :           || (XINT (disp, 1) != UNSPEC_GOTPCREL
   11971      9781443 :               && XINT (disp, 1) != UNSPEC_GOTOFF
   11972              :               && XINT (disp, 1) != UNSPEC_PCREL
   11973              :               && XINT (disp, 1) != UNSPEC_PLTOFF))
   11974              :         return false;
   11975              : 
   11976      9781443 :       if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11977      9781443 :           && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11978              :         return false;
   11979              :       return true;
   11980              :     }
   11981              : 
   11982      5142649 :   saw_plus = false;
   11983      5142649 :   if (GET_CODE (disp) == PLUS)
   11984              :     {
   11985       589189 :       if (!CONST_INT_P (XEXP (disp, 1)))
   11986              :         return false;
   11987       589189 :       disp = XEXP (disp, 0);
   11988       589189 :       saw_plus = true;
   11989              :     }
   11990              : 
   11991      5142649 :   if (TARGET_MACHO && darwin_local_data_pic (disp))
   11992              :     return true;
   11993              : 
   11994      5142649 :   if (GET_CODE (disp) != UNSPEC)
   11995              :     return false;
   11996              : 
   11997      4977325 :   switch (XINT (disp, 1))
   11998              :     {
   11999      2266983 :     case UNSPEC_GOT:
   12000      2266983 :       if (saw_plus)
   12001              :         return false;
   12002              :       /* We need to check for both symbols and labels because VxWorks loads
   12003              :          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
   12004              :          details.  */
   12005      2266982 :       return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   12006      2266982 :               || LABEL_REF_P (XVECEXP (disp, 0, 0)));
   12007      2710342 :     case UNSPEC_GOTOFF:
   12008              :       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
   12009              :          While ABI specify also 32bit relocation but we don't produce it in
   12010              :          small PIC model at all.  */
   12011      2710342 :       if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   12012      2710342 :            || LABEL_REF_P (XVECEXP (disp, 0, 0)))
   12013              :           && !TARGET_64BIT)
   12014      5420684 :         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
   12015              :       return false;
   12016            0 :     case UNSPEC_GOTTPOFF:
   12017            0 :     case UNSPEC_GOTNTPOFF:
   12018            0 :     case UNSPEC_INDNTPOFF:
   12019            0 :       if (saw_plus)
   12020              :         return false;
   12021            0 :       disp = XVECEXP (disp, 0, 0);
   12022            0 :       return (SYMBOL_REF_P (disp)
   12023            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
   12024            0 :     case UNSPEC_NTPOFF:
   12025            0 :       disp = XVECEXP (disp, 0, 0);
   12026            0 :       return (SYMBOL_REF_P (disp)
   12027            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
   12028            0 :     case UNSPEC_DTPOFF:
   12029            0 :       disp = XVECEXP (disp, 0, 0);
   12030            0 :       return (SYMBOL_REF_P (disp)
   12031            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
   12032            0 :     case UNSPEC_SECREL32:
   12033            0 :       disp = XVECEXP (disp, 0, 0);
   12034            0 :       return SYMBOL_REF_P (disp);
   12035              :     }
   12036              : 
   12037              :   return false;
   12038              : }
   12039              : 
   12040              : /* Determine if op is suitable RTX for an address register.
   12041              :    Return naked register if a register or a register subreg is
   12042              :    found, otherwise return NULL_RTX.  */
   12043              : 
   12044              : static rtx
   12045   1376646848 : ix86_validate_address_register (rtx op)
   12046              : {
   12047   1376646848 :   machine_mode mode = GET_MODE (op);
   12048              : 
   12049              :   /* Only SImode or DImode registers can form the address.  */
   12050   1376646848 :   if (mode != SImode && mode != DImode)
   12051              :     return NULL_RTX;
   12052              : 
   12053   1376639925 :   if (REG_P (op))
   12054              :     return op;
   12055       697556 :   else if (SUBREG_P (op))
   12056              :     {
   12057       697556 :       rtx reg = SUBREG_REG (op);
   12058              : 
   12059       697556 :       if (!REG_P (reg))
   12060              :         return NULL_RTX;
   12061              : 
   12062       697556 :       mode = GET_MODE (reg);
   12063              : 
   12064              :       /* Don't allow SUBREGs that span more than a word.  It can
   12065              :          lead to spill failures when the register is one word out
   12066              :          of a two word structure.  */
   12067      1440488 :       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   12068              :         return NULL_RTX;
   12069              : 
   12070              :       /* Allow only SUBREGs of non-eliminable hard registers.  */
   12071       237153 :       if (register_no_elim_operand (reg, mode))
   12072              :         return reg;
   12073              :     }
   12074              : 
   12075              :   /* Op is not a register.  */
   12076              :   return NULL_RTX;
   12077              : }
   12078              : 
   12079              : /* Determine which memory address register set insn can use.  */
   12080              : 
   12081              : static enum attr_addr
   12082    255317350 : ix86_memory_address_reg_class (rtx_insn* insn)
   12083              : {
   12084              :   /* LRA can do some initialization with NULL insn,
   12085              :      return maximum register class in this case.  */
   12086    255317350 :   enum attr_addr addr_rclass = ADDR_GPR32;
   12087              : 
   12088    255317350 :   if (!insn)
   12089              :     return addr_rclass;
   12090              : 
   12091     72683759 :   if (asm_noperands (PATTERN (insn)) >= 0
   12092     72683759 :       || GET_CODE (PATTERN (insn)) == ASM_INPUT)
   12093        75252 :     return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
   12094              : 
   12095              :   /* Return maximum register class for unrecognized instructions.  */
   12096     72646133 :   if (INSN_CODE (insn) < 0)
   12097              :     return addr_rclass;
   12098              : 
   12099              :   /* Try to recognize the insn before calling get_attr_addr.
   12100              :      Save current recog_data and current alternative.  */
   12101     72646133 :   struct recog_data_d saved_recog_data = recog_data;
   12102     72646133 :   int saved_alternative = which_alternative;
   12103              : 
   12104              :   /* Update recog_data for processing of alternatives.  */
   12105     72646133 :   extract_insn_cached (insn);
   12106              : 
   12107              :   /* If current alternative is not set, loop throught enabled
   12108              :      alternatives and get the most limited register class.  */
   12109     72646133 :   if (saved_alternative == -1)
   12110              :     {
   12111     72646133 :       alternative_mask enabled = get_enabled_alternatives (insn);
   12112              : 
   12113   1253086297 :       for (int i = 0; i < recog_data.n_alternatives; i++)
   12114              :         {
   12115   1180440164 :           if (!TEST_BIT (enabled, i))
   12116    349000094 :             continue;
   12117              : 
   12118    831440070 :           which_alternative = i;
   12119    831440070 :           addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
   12120              :         }
   12121              :     }
   12122              :   else
   12123              :     {
   12124            0 :       which_alternative = saved_alternative;
   12125            0 :       addr_rclass = get_attr_addr (insn);
   12126              :     }
   12127              : 
   12128     72646133 :   recog_data = saved_recog_data;
   12129     72646133 :   which_alternative = saved_alternative;
   12130              : 
   12131     72646133 :   return addr_rclass;
   12132              : }
   12133              : 
   12134              : /* Return memory address register class insn can use.  */
   12135              : 
   12136              : enum reg_class
   12137    214635910 : ix86_insn_base_reg_class (rtx_insn* insn)
   12138              : {
   12139    214635910 :   switch (ix86_memory_address_reg_class (insn))
   12140              :     {
   12141              :     case ADDR_GPR8:
   12142              :       return LEGACY_GENERAL_REGS;
   12143              :     case ADDR_GPR16:
   12144              :       return GENERAL_GPR16;
   12145              :     case ADDR_GPR32:
   12146              :       break;
   12147            0 :     default:
   12148            0 :       gcc_unreachable ();
   12149              :     }
   12150              : 
   12151              :   return BASE_REG_CLASS;
   12152              : }
   12153              : 
   12154              : bool
   12155      1247206 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
   12156              : {
   12157      1247206 :   switch (ix86_memory_address_reg_class (insn))
   12158              :     {
   12159            0 :     case ADDR_GPR8:
   12160            0 :       return LEGACY_INT_REGNO_P (regno);
   12161            0 :     case ADDR_GPR16:
   12162            0 :       return GENERAL_GPR16_REGNO_P (regno);
   12163      1247206 :     case ADDR_GPR32:
   12164      1247206 :       break;
   12165            0 :     default:
   12166            0 :       gcc_unreachable ();
   12167              :     }
   12168              : 
   12169      1247206 :   return GENERAL_REGNO_P (regno);
   12170              : }
   12171              : 
   12172              : enum reg_class
   12173     39434234 : ix86_insn_index_reg_class (rtx_insn* insn)
   12174              : {
   12175     39434234 :   switch (ix86_memory_address_reg_class (insn))
   12176              :     {
   12177              :     case ADDR_GPR8:
   12178              :       return LEGACY_INDEX_REGS;
   12179              :     case ADDR_GPR16:
   12180              :       return INDEX_GPR16;
   12181              :     case ADDR_GPR32:
   12182              :       break;
   12183            0 :     default:
   12184            0 :       gcc_unreachable ();
   12185              :     }
   12186              : 
   12187              :   return INDEX_REG_CLASS;
   12188              : }
   12189              : 
   12190              : /* Recognizes RTL expressions that are valid memory addresses for an
   12191              :    instruction.  The MODE argument is the machine mode for the MEM
   12192              :    expression that wants to use this address.
   12193              : 
   12194              :    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   12195              :    convert common non-canonical forms to canonical form so that they will
   12196              :    be recognized.  */
   12197              : 
   12198              : static bool
   12199   2253475670 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
   12200              :                            code_helper = ERROR_MARK)
   12201              : {
   12202   2253475670 :   struct ix86_address parts;
   12203   2253475670 :   rtx base, index, disp;
   12204   2253475670 :   HOST_WIDE_INT scale;
   12205   2253475670 :   addr_space_t seg;
   12206              : 
   12207   2253475670 :   if (ix86_decompose_address (addr, &parts) == 0)
   12208              :     /* Decomposition failed.  */
   12209              :     return false;
   12210              : 
   12211   2241800525 :   base = parts.base;
   12212   2241800525 :   index = parts.index;
   12213   2241800525 :   disp = parts.disp;
   12214   2241800525 :   scale = parts.scale;
   12215   2241800525 :   seg = parts.seg;
   12216              : 
   12217              :   /* Validate base register.  */
   12218   2241800525 :   if (base)
   12219              :     {
   12220   1289990810 :       rtx reg = ix86_validate_address_register (base);
   12221              : 
   12222   1289990810 :       if (reg == NULL_RTX)
   12223              :         return false;
   12224              : 
   12225   1289561455 :       unsigned int regno = REGNO (reg);
   12226   1289561455 :       if ((strict && !REGNO_OK_FOR_BASE_P (regno))
   12227   1285143630 :           || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
   12228              :         /* Base is not valid.  */
   12229              :         return false;
   12230              :     }
   12231              : 
   12232              :   /* Validate index register.  */
   12233   2240039723 :   if (index)
   12234              :     {
   12235     86656038 :       rtx reg = ix86_validate_address_register (index);
   12236              : 
   12237     86656038 :       if (reg == NULL_RTX)
   12238              :         return false;
   12239              : 
   12240     86617892 :       unsigned int regno = REGNO (reg);
   12241     86617892 :       if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
   12242     86610058 :           || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
   12243              :         /* Index is not valid.  */
   12244              :         return false;
   12245              :     }
   12246              : 
   12247              :   /* Index and base should have the same mode.  */
   12248   2239999597 :   if (base && index
   12249     77059703 :       && GET_MODE (base) != GET_MODE (index))
   12250              :     return false;
   12251              : 
   12252              :   /* Address override works only on the (%reg) part of %fs:(%reg).  */
   12253   2239700026 :   if (seg != ADDR_SPACE_GENERIC
   12254   2239700026 :       && ((base && GET_MODE (base) != word_mode)
   12255       339653 :           || (index && GET_MODE (index) != word_mode)))
   12256              :     return false;
   12257              : 
   12258              :   /* Validate scale factor.  */
   12259   2239699997 :   if (scale != 1)
   12260              :     {
   12261     39956016 :       if (!index)
   12262              :         /* Scale without index.  */
   12263              :         return false;
   12264              : 
   12265     39956016 :       if (scale != 2 && scale != 4 && scale != 8)
   12266              :         /* Scale is not a valid multiplier.  */
   12267              :         return false;
   12268              :     }
   12269              : 
   12270              :   /* Validate displacement.  */
   12271   2236533674 :   if (disp)
   12272              :     {
   12273   2010337357 :       if (ix86_endbr_immediate_operand (disp, VOIDmode))
   12274              :         return false;
   12275              : 
   12276   2010337314 :       if (GET_CODE (disp) == CONST
   12277    149028150 :           && GET_CODE (XEXP (disp, 0)) == UNSPEC
   12278     15408826 :           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
   12279     15408826 :         switch (XINT (XEXP (disp, 0), 1))
   12280              :           {
   12281              :           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
   12282              :              when used.  While ABI specify also 32bit relocations, we
   12283              :              don't produce them at all and use IP relative instead.
   12284              :              Allow GOT in 32bit mode for both PIC and non-PIC if symbol
   12285              :              should be loaded via GOT.  */
   12286      2267041 :           case UNSPEC_GOT:
   12287      2267041 :             if (!TARGET_64BIT
   12288      2267041 :                 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12289            0 :               goto is_legitimate_pic;
   12290              :             /* FALLTHRU */
   12291      4553663 :           case UNSPEC_GOTOFF:
   12292      4553663 :             gcc_assert (flag_pic);
   12293      4553663 :             if (!TARGET_64BIT)
   12294      4553460 :               goto is_legitimate_pic;
   12295              : 
   12296              :             /* 64bit address unspec.  */
   12297              :             return false;
   12298              : 
   12299      9781415 :           case UNSPEC_GOTPCREL:
   12300      9781415 :             if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12301         2534 :               goto is_legitimate_pic;
   12302              :             /* FALLTHRU */
   12303      9778881 :           case UNSPEC_PCREL:
   12304      9778881 :             gcc_assert (flag_pic);
   12305      9778881 :             goto is_legitimate_pic;
   12306              : 
   12307              :           case UNSPEC_GOTTPOFF:
   12308              :           case UNSPEC_GOTNTPOFF:
   12309              :           case UNSPEC_INDNTPOFF:
   12310              :           case UNSPEC_NTPOFF:
   12311              :           case UNSPEC_DTPOFF:
   12312              :           case UNSPEC_SECREL32:
   12313              :             break;
   12314              : 
   12315              :           default:
   12316              :             /* Invalid address unspec.  */
   12317              :             return false;
   12318              :           }
   12319              : 
   12320   1268797020 :       else if (SYMBOLIC_CONST (disp)
   12321   2128547812 :                && (flag_pic
   12322              : #if TARGET_MACHO
   12323              :                    || (MACHOPIC_INDIRECT
   12324              :                        && !machopic_operand_p (disp))
   12325              : #endif
   12326              :                   ))
   12327              :         {
   12328              : 
   12329     58167457 :         is_legitimate_pic:
   12330     58167457 :           if (TARGET_64BIT && (index || base))
   12331              :             {
   12332              :               /* foo@dtpoff(%rX) is ok.  */
   12333        36996 :               if (GET_CODE (disp) != CONST
   12334         7002 :                   || GET_CODE (XEXP (disp, 0)) != PLUS
   12335         7002 :                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
   12336         4637 :                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
   12337         4637 :                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
   12338         4637 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
   12339            6 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
   12340              :                 /* Non-constant pic memory reference.  */
   12341              :                 return false;
   12342              :             }
   12343     58130461 :           else if ((!TARGET_MACHO || flag_pic)
   12344     58130461 :                     && ! legitimate_pic_address_disp_p (disp))
   12345              :             /* Displacement is an invalid pic construct.  */
   12346              :             return false;
   12347              : #if TARGET_MACHO
   12348              :           else if (MACHO_DYNAMIC_NO_PIC_P
   12349              :                    && !ix86_legitimate_constant_p (Pmode, disp))
   12350              :             /* displacment must be referenced via non_lazy_pointer */
   12351              :             return false;
   12352              : #endif
   12353              : 
   12354              :           /* This code used to verify that a symbolic pic displacement
   12355              :              includes the pic_offset_table_rtx register.
   12356              : 
   12357              :              While this is good idea, unfortunately these constructs may
   12358              :              be created by "adds using lea" optimization for incorrect
   12359              :              code like:
   12360              : 
   12361              :              int a;
   12362              :              int foo(int i)
   12363              :                {
   12364              :                  return *(&a+i);
   12365              :                }
   12366              : 
   12367              :              This code is nonsensical, but results in addressing
   12368              :              GOT table with pic_offset_table_rtx base.  We can't
   12369              :              just refuse it easily, since it gets matched by
   12370              :              "addsi3" pattern, that later gets split to lea in the
   12371              :              case output register differs from input.  While this
   12372              :              can be handled by separate addsi pattern for this case
   12373              :              that never results in lea, this seems to be easier and
   12374              :              correct fix for crash to disable this test.  */
   12375              :         }
   12376   1951095906 :       else if (!LABEL_REF_P (disp)
   12377   1950941830 :                && !CONST_INT_P (disp)
   12378    873296862 :                && (GET_CODE (disp) != CONST
   12379    135059641 :                    || !ix86_legitimate_constant_p (Pmode, disp))
   12380   2692296948 :                && (!SYMBOL_REF_P (disp)
   12381    748785755 :                    || !ix86_legitimate_constant_p (Pmode, disp)))
   12382              :         /* Displacement is not constant.  */
   12383     57611089 :         return false;
   12384   1893484817 :       else if (TARGET_64BIT
   12385   1893484817 :                && !x86_64_immediate_operand (disp, VOIDmode))
   12386              :         /* Displacement is out of range.  */
   12387              :         return false;
   12388              :       /* In x32 mode, constant addresses are sign extended to 64bit, so
   12389              :          we have to prevent addresses from 0x80000000 to 0xffffffff.  */
   12390        45259 :       else if (TARGET_X32 && !(index || base)
   12391        17351 :                && CONST_INT_P (disp)
   12392   1892974236 :                && val_signbit_known_set_p (SImode, INTVAL (disp)))
   12393              :         return false;
   12394              :     }
   12395              : 
   12396              :   /* Everything looks valid.  */
   12397              :   return true;
   12398              : }
   12399              : 
   12400              : /* Determine if a given RTX is a valid constant address.  */
   12401              : 
   12402              : bool
   12403   2793145531 : constant_address_p (rtx x)
   12404              : {
   12405   2873518736 :   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
   12406              : }
   12407              : 
   12408              : 
   12409              : /* Return a legitimate reference for ORIG (an address) using the
   12410              :    register REG.  If REG is 0, a new pseudo is generated.
   12411              : 
   12412              :    There are two types of references that must be handled:
   12413              : 
   12414              :    1. Global data references must load the address from the GOT, via
   12415              :       the PIC reg.  An insn is emitted to do this load, and the reg is
   12416              :       returned.
   12417              : 
   12418              :    2. Static data references, constant pool addresses, and code labels
   12419              :       compute the address as an offset from the GOT, whose base is in
   12420              :       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
   12421              :       differentiate them from global data objects.  The returned
   12422              :       address is the PIC reg + an unspec constant.
   12423              : 
   12424              :    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
   12425              :    reg also appears in the address.  */
   12426              : 
   12427              : rtx
   12428       397806 : legitimize_pic_address (rtx orig, rtx reg)
   12429              : {
   12430       397806 :   rtx addr = orig;
   12431       397806 :   rtx new_rtx = orig;
   12432              : 
   12433              : #if TARGET_MACHO
   12434              :   if (TARGET_MACHO && !TARGET_64BIT)
   12435              :     {
   12436              :       if (reg == 0)
   12437              :         reg = gen_reg_rtx (Pmode);
   12438              :       /* Use the generic Mach-O PIC machinery.  */
   12439              :       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
   12440              :     }
   12441              : #endif
   12442              : 
   12443       397806 :   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   12444              :     {
   12445              : #if TARGET_PECOFF
   12446              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12447              :       if (tmp)
   12448              :         return tmp;
   12449              : #endif
   12450              :     }
   12451              : 
   12452       397806 :   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
   12453              :     new_rtx = addr;
   12454       301447 :   else if ((!TARGET_64BIT
   12455       101745 :             || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
   12456              :            && !TARGET_PECOFF
   12457       501246 :            && gotoff_operand (addr, Pmode))
   12458              :     {
   12459              :       /* This symbol may be referenced via a displacement
   12460              :          from the PIC base address (@GOTOFF).  */
   12461        96314 :       if (GET_CODE (addr) == CONST)
   12462         3047 :         addr = XEXP (addr, 0);
   12463              : 
   12464        96314 :       if (GET_CODE (addr) == PLUS)
   12465              :           {
   12466         6094 :             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
   12467              :                                       UNSPEC_GOTOFF);
   12468         6094 :             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
   12469              :           }
   12470              :         else
   12471       186505 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
   12472              : 
   12473       192599 :       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12474              : 
   12475        96314 :       if (TARGET_64BIT)
   12476           29 :         new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12477              : 
   12478        96314 :       if (reg != 0)
   12479              :         {
   12480            3 :           gcc_assert (REG_P (reg));
   12481            3 :           new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12482              :                                          new_rtx, reg, 1, OPTAB_DIRECT);
   12483              :         }
   12484              :       else
   12485       192596 :         new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12486              :     }
   12487       383029 :   else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
   12488              :            /* We can't always use @GOTOFF for text labels
   12489              :               on VxWorks, see gotoff_operand.  */
   12490       205133 :            || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
   12491              :     {
   12492              : #if TARGET_PECOFF
   12493              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12494              :       if (tmp)
   12495              :         return tmp;
   12496              : #endif
   12497              : 
   12498              :       /* For x64 PE-COFF there is no GOT table,
   12499              :          so we use address directly.  */
   12500       177893 :       if (TARGET_64BIT && TARGET_PECOFF)
   12501              :         {
   12502              :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
   12503              :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12504              :         }
   12505       177893 :       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
   12506              :         {
   12507        94465 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
   12508              :                                     UNSPEC_GOTPCREL);
   12509        94465 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12510        94465 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12511        94462 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12512              :         }
   12513              :       else
   12514              :         {
   12515              :           /* This symbol must be referenced via a load
   12516              :              from the Global Offset Table (@GOT).  */
   12517       166839 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
   12518       166839 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12519              : 
   12520        83431 :           if (TARGET_64BIT)
   12521           23 :             new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12522              : 
   12523        83431 :           if (reg != 0)
   12524              :             {
   12525            0 :               gcc_assert (REG_P (reg));
   12526            0 :               new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12527              :                                              new_rtx, reg, 1, OPTAB_DIRECT);
   12528              :             }
   12529              :           else
   12530       166839 :             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12531              : 
   12532       166839 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12533        83431 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12534              :         }
   12535              : 
   12536       261304 :       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12537              :     }
   12538              :   else
   12539              :     {
   12540        27240 :       if (CONST_INT_P (addr)
   12541        27240 :           && !x86_64_immediate_operand (addr, VOIDmode))
   12542            8 :         new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
   12543        27232 :       else if (GET_CODE (addr) == CONST)
   12544              :         {
   12545        16723 :           addr = XEXP (addr, 0);
   12546              : 
   12547              :           /* We must match stuff we generate before.  Assume the only
   12548              :              unspecs that can get here are ours.  Not that we could do
   12549              :              anything with them anyway....  */
   12550        16723 :           if (GET_CODE (addr) == UNSPEC
   12551         8975 :               || (GET_CODE (addr) == PLUS
   12552         8975 :                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
   12553              :             return orig;
   12554         6852 :           gcc_assert (GET_CODE (addr) == PLUS);
   12555              :         }
   12556              : 
   12557        17369 :       if (GET_CODE (addr) == PLUS)
   12558              :         {
   12559         8685 :           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
   12560              : 
   12561              :           /* Check first to see if this is a constant
   12562              :              offset from a @GOTOFF symbol reference.  */
   12563         8685 :           if (!TARGET_PECOFF
   12564        13754 :               && gotoff_operand (op0, Pmode)
   12565         8685 :               && CONST_INT_P (op1))
   12566              :             {
   12567            4 :               if (!TARGET_64BIT)
   12568              :                 {
   12569            0 :                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
   12570              :                                             UNSPEC_GOTOFF);
   12571            0 :                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
   12572            0 :                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12573              : 
   12574            0 :                   if (reg != 0)
   12575              :                     {
   12576            0 :                       gcc_assert (REG_P (reg));
   12577            0 :                       new_rtx = expand_simple_binop (Pmode, PLUS,
   12578              :                                                      pic_offset_table_rtx,
   12579              :                                                      new_rtx, reg, 1,
   12580              :                                                      OPTAB_DIRECT);
   12581              :                     }
   12582              :                   else
   12583            0 :                     new_rtx
   12584            0 :                       = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12585              :                 }
   12586              :               else
   12587              :                 {
   12588            4 :                   if (INTVAL (op1) < -16*1024*1024
   12589            4 :                       || INTVAL (op1) >= 16*1024*1024)
   12590              :                     {
   12591            4 :                       if (!x86_64_immediate_operand (op1, Pmode))
   12592            4 :                         op1 = force_reg (Pmode, op1);
   12593              : 
   12594            4 :                       new_rtx
   12595            4 :                         = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
   12596              :                     }
   12597              :                 }
   12598              :             }
   12599              :           else
   12600              :             {
   12601         8681 :               rtx base = legitimize_pic_address (op0, reg);
   12602         8681 :               machine_mode mode = GET_MODE (base);
   12603         8681 :               new_rtx
   12604         8681 :                 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
   12605              : 
   12606         8681 :               if (CONST_INT_P (new_rtx))
   12607              :                 {
   12608         6840 :                   if (INTVAL (new_rtx) < -16*1024*1024
   12609         6840 :                       || INTVAL (new_rtx) >= 16*1024*1024)
   12610              :                     {
   12611            0 :                       if (!x86_64_immediate_operand (new_rtx, mode))
   12612            0 :                         new_rtx = force_reg (mode, new_rtx);
   12613              : 
   12614            0 :                       new_rtx
   12615            0 :                         = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
   12616              :                     }
   12617              :                   else
   12618         6840 :                     new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
   12619              :                 }
   12620              :               else
   12621              :                 {
   12622              :                   /* For %rip addressing, we have to use
   12623              :                      just disp32, not base nor index.  */
   12624         1841 :                   if (TARGET_64BIT
   12625          101 :                       && (SYMBOL_REF_P (base)
   12626          101 :                           || LABEL_REF_P (base)))
   12627            7 :                     base = force_reg (mode, base);
   12628         1841 :                   if (GET_CODE (new_rtx) == PLUS
   12629         1719 :                       && CONSTANT_P (XEXP (new_rtx, 1)))
   12630              :                     {
   12631         1715 :                       base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
   12632         1715 :                       new_rtx = XEXP (new_rtx, 1);
   12633              :                     }
   12634         1841 :                   new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
   12635              :                 }
   12636              :             }
   12637              :         }
   12638              :     }
   12639              :   return new_rtx;
   12640              : }
   12641              : 
   12642              : /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
   12643              : 
   12644              : static rtx
   12645        24421 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
   12646              : {
   12647        24421 :   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
   12648              : 
   12649        24421 :   if (GET_MODE (tp) != tp_mode)
   12650              :     {
   12651           11 :       gcc_assert (GET_MODE (tp) == SImode);
   12652           11 :       gcc_assert (tp_mode == DImode);
   12653              : 
   12654           11 :       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
   12655              :     }
   12656              : 
   12657        24421 :   if (to_reg)
   12658         8112 :     tp = copy_to_mode_reg (tp_mode, tp);
   12659              : 
   12660        24421 :   return tp;
   12661              : }
   12662              : 
   12663              : /* Construct the SYMBOL_REF for the _tls_index symbol.  */
   12664              : 
   12665              : static GTY(()) rtx ix86_tls_index_symbol;
   12666              : 
   12667              : static rtx
   12668            0 : ix86_tls_index (void)
   12669              : {
   12670            0 :   if (!ix86_tls_index_symbol)
   12671            0 :     ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
   12672              : 
   12673            0 :   if (flag_pic)
   12674            0 :     return gen_rtx_CONST (Pmode,
   12675              :                           gen_rtx_UNSPEC (Pmode,
   12676              :                                           gen_rtvec (1, ix86_tls_index_symbol),
   12677              :                                           UNSPEC_PCREL));
   12678              :   else
   12679            0 :     return ix86_tls_index_symbol;
   12680              : }
   12681              : 
   12682              : /* Construct the SYMBOL_REF for the tls_get_addr function.  */
   12683              : 
   12684              : static GTY(()) rtx ix86_tls_symbol;
   12685              : 
   12686              : rtx
   12687         6715 : ix86_tls_get_addr (void)
   12688              : {
   12689         6715 :   if (cfun->machine->call_saved_registers
   12690         6715 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
   12691              :     {
   12692              :       /* __tls_get_addr doesn't preserve vector registers.  When a
   12693              :          function with no_caller_saved_registers attribute calls
   12694              :          __tls_get_addr, YMM and ZMM registers will be clobbered.
   12695              :          Issue an error and suggest -mtls-dialect=gnu2 in this case.  */
   12696            3 :       if (cfun->machine->func_type == TYPE_NORMAL)
   12697            1 :         error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
   12698              :                   " with the %<no_caller_saved_registers%> attribute"));
   12699              :       else
   12700            3 :         error (cfun->machine->func_type == TYPE_EXCEPTION
   12701              :                ? G_("%<-mtls-dialect=gnu2%> must be used with an"
   12702              :                     " exception service routine")
   12703              :                : G_("%<-mtls-dialect=gnu2%> must be used with an"
   12704              :                     " interrupt service routine"));
   12705              :       /* Don't issue the same error twice.  */
   12706            3 :       cfun->machine->func_type = TYPE_NORMAL;
   12707            3 :       cfun->machine->call_saved_registers
   12708            3 :         = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
   12709              :     }
   12710              : 
   12711         6715 :   if (!ix86_tls_symbol)
   12712              :     {
   12713          204 :       const char *sym
   12714          241 :         = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
   12715          241 :            ? "___tls_get_addr" : "__tls_get_addr");
   12716              : 
   12717          278 :       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
   12718              :     }
   12719              : 
   12720         6715 :   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
   12721              :     {
   12722            2 :       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
   12723              :                                    UNSPEC_PLTOFF);
   12724            2 :       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
   12725              :                            gen_rtx_CONST (Pmode, unspec));
   12726              :     }
   12727              : 
   12728         6713 :   return ix86_tls_symbol;
   12729              : }
   12730              : 
   12731              : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
   12732              : 
   12733              : static GTY(()) rtx ix86_tls_module_base_symbol;
   12734              : 
   12735              : rtx
   12736           98 : ix86_tls_module_base (void)
   12737              : {
   12738           98 :   if (!ix86_tls_module_base_symbol)
   12739              :     {
   12740           11 :       ix86_tls_module_base_symbol
   12741           11 :         = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
   12742              : 
   12743           11 :       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
   12744           11 :         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
   12745              :     }
   12746              : 
   12747           98 :   return ix86_tls_module_base_symbol;
   12748              : }
   12749              : 
   12750              : /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
   12751              :    false if we expect this to be used for a memory address and true if
   12752              :    we expect to load the address into a register.  */
   12753              : 
   12754              : rtx
   12755        30844 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   12756              : {
   12757        30844 :   rtx dest, base, off;
   12758        30844 :   rtx pic = NULL_RTX, tp = NULL_RTX;
   12759        30844 :   machine_mode tp_mode = Pmode;
   12760        30844 :   int type;
   12761              : 
   12762              :   /* Windows implements a single form of TLS.  */
   12763        30844 :   if (TARGET_WIN32_TLS)
   12764              :     {
   12765              :       /* Load the 32-bit index.  */
   12766              :       rtx ind = gen_const_mem (SImode, ix86_tls_index ());
   12767              :       set_mem_alias_set (ind, GOT_ALIAS_SET);
   12768              :       if (TARGET_64BIT)
   12769              :         ind = convert_to_mode (Pmode, ind, 1);
   12770              :       ind = force_reg (Pmode, ind);
   12771              : 
   12772              :       /* Add it to the thread pointer and load the base.  */
   12773              :       tp = get_thread_pointer (Pmode, true);
   12774              :       rtx addr = gen_rtx_PLUS (Pmode, tp,
   12775              :                                gen_rtx_MULT (Pmode, ind,
   12776              :                                              GEN_INT (UNITS_PER_WORD)));
   12777              :       base = gen_const_mem (Pmode, addr);
   12778              :       set_mem_alias_set (base, GOT_ALIAS_SET);
   12779              : 
   12780              :       /* Add the 32-bit section-relative offset to the base.  */
   12781              :       base = force_reg (Pmode, base);
   12782              :       off = gen_rtx_CONST (Pmode,
   12783              :                            gen_rtx_UNSPEC (SImode,
   12784              :                                            gen_rtvec (1, x),
   12785              :                                            UNSPEC_SECREL32));
   12786              :       return gen_rtx_PLUS (Pmode, base, off);
   12787              :     }
   12788              : 
   12789              :   /* Fall back to global dynamic model if tool chain cannot support local
   12790              :      dynamic.  */
   12791        30844 :   if (TARGET_SUN_TLS && !TARGET_64BIT
   12792              :       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
   12793              :       && model == TLS_MODEL_LOCAL_DYNAMIC)
   12794              :     model = TLS_MODEL_GLOBAL_DYNAMIC;
   12795              : 
   12796        30844 :   switch (model)
   12797              :     {
   12798         6116 :     case TLS_MODEL_GLOBAL_DYNAMIC:
   12799         6116 :       if (!TARGET_64BIT)
   12800              :         {
   12801         1930 :           if (flag_pic && !TARGET_PECOFF)
   12802         1930 :             pic = pic_offset_table_rtx;
   12803              :           else
   12804              :             {
   12805            0 :               pic = gen_reg_rtx (Pmode);
   12806            0 :               emit_insn (gen_set_got (pic));
   12807              :             }
   12808              :         }
   12809              : 
   12810         6116 :       if (TARGET_GNU2_TLS)
   12811              :         {
   12812           53 :           dest = gen_reg_rtx (ptr_mode);
   12813           53 :           if (TARGET_64BIT)
   12814           53 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
   12815              :           else
   12816            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
   12817              : 
   12818           53 :           tp = get_thread_pointer (ptr_mode, true);
   12819           53 :           dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12820           61 :           if (GET_MODE (dest) != Pmode)
   12821            6 :              dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12822           61 :           dest = force_reg (Pmode, dest);
   12823              : 
   12824           61 :           if (GET_MODE (x) != Pmode)
   12825            3 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12826              : 
   12827           53 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12828              :         }
   12829              :       else
   12830              :         {
   12831         6063 :           rtx caddr = ix86_tls_get_addr ();
   12832              : 
   12833         7993 :           dest = gen_reg_rtx (Pmode);
   12834         6063 :           if (TARGET_64BIT)
   12835              :             {
   12836         4133 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12837         4133 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12838         4133 :               rtx_insn *insns;
   12839              : 
   12840         4133 :               start_sequence ();
   12841         4133 :               emit_call_insn
   12842         4133 :                 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
   12843         4133 :               insns = end_sequence ();
   12844              : 
   12845         4133 :               if (GET_MODE (x) != Pmode)
   12846            1 :                 x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12847              : 
   12848         4133 :               RTL_CONST_CALL_P (insns) = 1;
   12849         4133 :               emit_libcall_block (insns, dest, rax, x);
   12850              :             }
   12851              :           else
   12852         1930 :             emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
   12853              :         }
   12854              :       break;
   12855              : 
   12856          386 :     case TLS_MODEL_LOCAL_DYNAMIC:
   12857          386 :       if (!TARGET_64BIT)
   12858              :         {
   12859           92 :           if (flag_pic)
   12860           92 :             pic = pic_offset_table_rtx;
   12861              :           else
   12862              :             {
   12863            0 :               pic = gen_reg_rtx (Pmode);
   12864            0 :               emit_insn (gen_set_got (pic));
   12865              :             }
   12866              :         }
   12867              : 
   12868          386 :       if (TARGET_GNU2_TLS)
   12869              :         {
   12870           26 :           rtx tmp = ix86_tls_module_base ();
   12871              : 
   12872           26 :           base = gen_reg_rtx (ptr_mode);
   12873           26 :           if (TARGET_64BIT)
   12874           26 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
   12875              :           else
   12876            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
   12877              : 
   12878           26 :           tp = get_thread_pointer (ptr_mode, true);
   12879           32 :           if (GET_MODE (base) != Pmode)
   12880            2 :             base = gen_rtx_ZERO_EXTEND (Pmode, base);
   12881           32 :           base = force_reg (Pmode, base);
   12882              :         }
   12883              :       else
   12884              :         {
   12885          360 :           rtx caddr = ix86_tls_get_addr ();
   12886              : 
   12887          452 :           base = gen_reg_rtx (Pmode);
   12888          360 :           if (TARGET_64BIT)
   12889              :             {
   12890          268 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12891          268 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12892          268 :               rtx_insn *insns;
   12893          268 :               rtx eqv;
   12894              : 
   12895          268 :               start_sequence ();
   12896          268 :               emit_call_insn
   12897          268 :                 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
   12898          268 :               insns = end_sequence ();
   12899              : 
   12900              :               /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
   12901              :                  share the LD_BASE result with other LD model accesses.  */
   12902          268 :               eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
   12903              :                                     UNSPEC_TLS_LD_BASE);
   12904              : 
   12905          268 :               RTL_CONST_CALL_P (insns) = 1;
   12906          268 :               emit_libcall_block (insns, base, rax, eqv);
   12907              :             }
   12908              :           else
   12909           92 :             emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
   12910              :         }
   12911              : 
   12912          484 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
   12913          484 :       off = gen_rtx_CONST (Pmode, off);
   12914              : 
   12915          582 :       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
   12916              : 
   12917          386 :       if (TARGET_GNU2_TLS)
   12918              :         {
   12919           32 :           if (GET_MODE (tp) != Pmode)
   12920              :             {
   12921            2 :               dest = lowpart_subreg (ptr_mode, dest, Pmode);
   12922            2 :               dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12923            2 :               dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12924              :             }
   12925              :           else
   12926           24 :             dest = gen_rtx_PLUS (Pmode, tp, dest);
   12927           32 :           dest = force_reg (Pmode, dest);
   12928              : 
   12929           32 :           if (GET_MODE (x) != Pmode)
   12930            1 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12931              : 
   12932           26 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12933              :         }
   12934              :       break;
   12935              : 
   12936        10803 :     case TLS_MODEL_INITIAL_EXEC:
   12937        10803 :       if (TARGET_64BIT)
   12938              :         {
   12939              :           /* Generate DImode references to avoid %fs:(%reg32)
   12940              :              problems and linker IE->LE relaxation bug.  */
   12941              :           tp_mode = DImode;
   12942              :           pic = NULL;
   12943              :           type = UNSPEC_GOTNTPOFF;
   12944              :         }
   12945          761 :       else if (flag_pic)
   12946              :         {
   12947          760 :           pic = pic_offset_table_rtx;
   12948          760 :           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
   12949              :         }
   12950            1 :       else if (!TARGET_ANY_GNU_TLS)
   12951              :         {
   12952            0 :           pic = gen_reg_rtx (Pmode);
   12953            0 :           emit_insn (gen_set_got (pic));
   12954            0 :           type = UNSPEC_GOTTPOFF;
   12955              :         }
   12956              :       else
   12957              :         {
   12958              :           pic = NULL;
   12959              :           type = UNSPEC_INDNTPOFF;
   12960              :         }
   12961              : 
   12962        10803 :       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
   12963        10803 :       off = gen_rtx_CONST (tp_mode, off);
   12964        10803 :       if (pic)
   12965          760 :         off = gen_rtx_PLUS (tp_mode, pic, off);
   12966        10803 :       off = gen_const_mem (tp_mode, off);
   12967        10803 :       set_mem_alias_set (off, GOT_ALIAS_SET);
   12968              : 
   12969        10803 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12970              :         {
   12971        10803 :           base = get_thread_pointer (tp_mode,
   12972        10803 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12973        10803 :           off = force_reg (tp_mode, off);
   12974        10803 :           dest = gen_rtx_PLUS (tp_mode, base, off);
   12975        11568 :           if (tp_mode != Pmode)
   12976            4 :             dest = convert_to_mode (Pmode, dest, 1);
   12977              :         }
   12978              :       else
   12979              :         {
   12980            0 :           base = get_thread_pointer (Pmode, true);
   12981            0 :           dest = gen_reg_rtx (Pmode);
   12982            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12983              :         }
   12984              :       break;
   12985              : 
   12986        13539 :     case TLS_MODEL_LOCAL_EXEC:
   12987        27846 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   12988              :                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12989              :                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
   12990        14307 :       off = gen_rtx_CONST (Pmode, off);
   12991              : 
   12992        13539 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12993              :         {
   12994        14307 :           base = get_thread_pointer (Pmode,
   12995        13539 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12996        14307 :           return gen_rtx_PLUS (Pmode, base, off);
   12997              :         }
   12998              :       else
   12999              :         {
   13000            0 :           base = get_thread_pointer (Pmode, true);
   13001            0 :           dest = gen_reg_rtx (Pmode);
   13002            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   13003              :         }
   13004            0 :       break;
   13005              : 
   13006            0 :     default:
   13007            0 :       gcc_unreachable ();
   13008              :     }
   13009              : 
   13010              :   return dest;
   13011              : }
   13012              : 
   13013              : /* Return true if the TLS address requires insn using integer registers.
   13014              :    It's used to prevent KMOV/VMOV in TLS code sequences which require integer
   13015              :    MOV instructions, refer to PR103275.  */
   13016              : bool
   13017     15242967 : ix86_gpr_tls_address_pattern_p (rtx mem)
   13018              : {
   13019     15242967 :   gcc_assert (MEM_P (mem));
   13020              : 
   13021     15242967 :   rtx addr = XEXP (mem, 0);
   13022     15242967 :   subrtx_var_iterator::array_type array;
   13023     53073126 :   FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
   13024              :     {
   13025     37837736 :       rtx op = *iter;
   13026     37837736 :       if (GET_CODE (op) == UNSPEC)
   13027       201427 :         switch (XINT (op, 1))
   13028              :           {
   13029              :           case UNSPEC_GOTNTPOFF:
   13030         7577 :             return true;
   13031            0 :           case UNSPEC_TPOFF:
   13032            0 :             if (!TARGET_64BIT)
   13033              :               return true;
   13034              :             break;
   13035              :           default:
   13036              :             break;
   13037              :           }
   13038              :     }
   13039              : 
   13040     15235390 :   return false;
   13041     15242967 : }
   13042              : 
   13043              : /* Return true if OP refers to a TLS address.  */
   13044              : bool
   13045    232886856 : ix86_tls_address_pattern_p (rtx op)
   13046              : {
   13047    232886856 :   subrtx_var_iterator::array_type array;
   13048   1385380295 :   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
   13049              :     {
   13050   1152511478 :       rtx op = *iter;
   13051   1152511478 :       if (MEM_P (op))
   13052              :         {
   13053    105244636 :           rtx *x = &XEXP (op, 0);
   13054    166583251 :           while (GET_CODE (*x) == PLUS)
   13055              :             {
   13056              :               int i;
   13057    184033907 :               for (i = 0; i < 2; i++)
   13058              :                 {
   13059    122695292 :                   rtx u = XEXP (*x, i);
   13060    122695292 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13061       127893 :                     u = XEXP (u, 0);
   13062    122695292 :                   if (GET_CODE (u) == UNSPEC
   13063        18071 :                       && XINT (u, 1) == UNSPEC_TP)
   13064        18039 :                     return true;
   13065              :                 }
   13066     61338615 :               x = &XEXP (*x, 0);
   13067              :             }
   13068              : 
   13069    105226597 :           iter.skip_subrtxes ();
   13070              :         }
   13071              :     }
   13072              : 
   13073    232868817 :   return false;
   13074    232886856 : }
   13075              : 
   13076              : /* Rewrite *LOC so that it refers to a default TLS address space.  */
   13077              : static void
   13078        18039 : ix86_rewrite_tls_address_1 (rtx *loc)
   13079              : {
   13080        18039 :   subrtx_ptr_iterator::array_type array;
   13081        53524 :   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
   13082              :     {
   13083        53524 :       rtx *loc = *iter;
   13084        53524 :       if (MEM_P (*loc))
   13085              :         {
   13086        18226 :           rtx addr = XEXP (*loc, 0);
   13087        18226 :           rtx *x = &addr;
   13088        23062 :           while (GET_CODE (*x) == PLUS)
   13089              :             {
   13090              :               int i;
   13091        32570 :               for (i = 0; i < 2; i++)
   13092              :                 {
   13093        27734 :                   rtx u = XEXP (*x, i);
   13094        27734 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13095           19 :                     u = XEXP (u, 0);
   13096        27734 :                   if (GET_CODE (u) == UNSPEC
   13097        18039 :                       && XINT (u, 1) == UNSPEC_TP)
   13098              :                     {
   13099              :                       /* NB: Since address override only applies to the
   13100              :                          (reg32) part in fs:(reg32), return if address
   13101              :                          override is used.  */
   13102        19666 :                       if (Pmode != word_mode
   13103        18039 :                           && REG_P (XEXP (*x, 1 - i)))
   13104        18039 :                         return;
   13105              : 
   13106        18037 :                       addr_space_t as = DEFAULT_TLS_SEG_REG;
   13107              : 
   13108        18037 :                       *x = XEXP (*x, 1 - i);
   13109              : 
   13110        18037 :                       *loc = replace_equiv_address_nv (*loc, addr, true);
   13111        18037 :                       set_mem_addr_space (*loc, as);
   13112        18037 :                       return;
   13113              :                     }
   13114              :                 }
   13115         4836 :               x = &XEXP (*x, 0);
   13116              :             }
   13117              : 
   13118          187 :           iter.skip_subrtxes ();
   13119              :         }
   13120              :     }
   13121        18039 : }
   13122              : 
   13123              : /* Rewrite instruction pattern involvning TLS address
   13124              :    so that it refers to a default TLS address space.  */
   13125              : rtx
   13126        18039 : ix86_rewrite_tls_address (rtx pattern)
   13127              : {
   13128        18039 :   pattern = copy_insn (pattern);
   13129        18039 :   ix86_rewrite_tls_address_1 (&pattern);
   13130        18039 :   return pattern;
   13131              : }
   13132              : 
   13133              : /* Try machine-dependent ways of modifying an illegitimate address
   13134              :    to be legitimate.  If we find one, return the new, valid address.
   13135              :    This macro is used in only one place: `memory_address' in explow.cc.
   13136              : 
   13137              :    OLDX is the address as it was before break_out_memory_refs was called.
   13138              :    In some cases it is useful to look at this to decide what needs to be done.
   13139              : 
   13140              :    It is always safe for this macro to do nothing.  It exists to recognize
   13141              :    opportunities to optimize the output.
   13142              : 
   13143              :    For the 80386, we handle X+REG by loading X into a register R and
   13144              :    using R+REG.  R will go in a general reg and indexing will be used.
   13145              :    However, if REG is a broken-out memory address or multiplication,
   13146              :    nothing needs to be done because REG can certainly go in a general reg.
   13147              : 
   13148              :    When -fpic is used, special handling is needed for symbolic references.
   13149              :    See comments by legitimize_pic_address in i386.cc for details.  */
   13150              : 
   13151              : static rtx
   13152       670789 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
   13153              : {
   13154       670789 :   bool changed = false;
   13155       670789 :   unsigned log;
   13156              : 
   13157       670789 :   log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
   13158       151109 :   if (log)
   13159        20725 :     return legitimize_tls_address (x, (enum tls_model) log, false);
   13160       650064 :   if (GET_CODE (x) == CONST
   13161          508 :       && GET_CODE (XEXP (x, 0)) == PLUS
   13162          508 :       && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
   13163       650572 :       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
   13164              :     {
   13165            4 :       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
   13166              :                                       (enum tls_model) log, false);
   13167            5 :       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
   13168              :     }
   13169              : 
   13170       650060 :   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   13171              :     {
   13172              : #if TARGET_PECOFF
   13173              :       rtx tmp = legitimize_pe_coff_symbol (x, true);
   13174              :       if (tmp)
   13175              :         return tmp;
   13176              : #endif
   13177              :     }
   13178              : 
   13179       650060 :   if (flag_pic && SYMBOLIC_CONST (x))
   13180       130767 :     return legitimize_pic_address (x, 0);
   13181              : 
   13182              : #if TARGET_MACHO
   13183              :   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
   13184              :     return machopic_indirect_data_reference (x, 0);
   13185              : #endif
   13186              : 
   13187              :   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
   13188       519293 :   if (GET_CODE (x) == ASHIFT
   13189            0 :       && CONST_INT_P (XEXP (x, 1))
   13190            0 :       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
   13191              :     {
   13192            0 :       changed = true;
   13193            0 :       log = INTVAL (XEXP (x, 1));
   13194            0 :       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
   13195              :                         GEN_INT (1 << log));
   13196              :     }
   13197              : 
   13198       519293 :   if (GET_CODE (x) == PLUS)
   13199              :     {
   13200              :       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
   13201              : 
   13202       182774 :       if (GET_CODE (XEXP (x, 0)) == ASHIFT
   13203          594 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13204          594 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
   13205              :         {
   13206          594 :           changed = true;
   13207          594 :           log = INTVAL (XEXP (XEXP (x, 0), 1));
   13208         1738 :           XEXP (x, 0) = gen_rtx_MULT (Pmode,
   13209              :                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
   13210              :                                       GEN_INT (1 << log));
   13211              :         }
   13212              : 
   13213       182774 :       if (GET_CODE (XEXP (x, 1)) == ASHIFT
   13214            0 :           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   13215            0 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
   13216              :         {
   13217            0 :           changed = true;
   13218            0 :           log = INTVAL (XEXP (XEXP (x, 1), 1));
   13219            0 :           XEXP (x, 1) = gen_rtx_MULT (Pmode,
   13220              :                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
   13221              :                                       GEN_INT (1 << log));
   13222              :         }
   13223              : 
   13224              :       /* Put multiply first if it isn't already.  */
   13225       182774 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13226              :         {
   13227            0 :           std::swap (XEXP (x, 0), XEXP (x, 1));
   13228            0 :           changed = true;
   13229              :         }
   13230              : 
   13231              :       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
   13232              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
   13233              :          created by virtual register instantiation, register elimination, and
   13234              :          similar optimizations.  */
   13235       182774 :       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
   13236              :         {
   13237         9796 :           changed = true;
   13238        15404 :           x = gen_rtx_PLUS (Pmode,
   13239              :                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
   13240              :                                           XEXP (XEXP (x, 1), 0)),
   13241              :                             XEXP (XEXP (x, 1), 1));
   13242              :         }
   13243              : 
   13244              :       /* Canonicalize
   13245              :          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
   13246              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
   13247       172978 :       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
   13248       108753 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   13249        51368 :                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
   13250            0 :                && CONSTANT_P (XEXP (x, 1)))
   13251              :         {
   13252            0 :           rtx constant;
   13253            0 :           rtx other = NULL_RTX;
   13254              : 
   13255            0 :           if (CONST_INT_P (XEXP (x, 1)))
   13256              :             {
   13257            0 :               constant = XEXP (x, 1);
   13258            0 :               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13259              :             }
   13260            0 :           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
   13261              :             {
   13262              :               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13263              :               other = XEXP (x, 1);
   13264              :             }
   13265              :           else
   13266              :             constant = 0;
   13267              : 
   13268            0 :           if (constant)
   13269              :             {
   13270            0 :               changed = true;
   13271            0 :               x = gen_rtx_PLUS (Pmode,
   13272              :                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
   13273              :                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
   13274              :                                 plus_constant (Pmode, other,
   13275              :                                                INTVAL (constant)));
   13276              :             }
   13277              :         }
   13278              : 
   13279       182774 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13280         9832 :         return x;
   13281              : 
   13282       172942 :       if (GET_CODE (XEXP (x, 0)) == MULT)
   13283              :         {
   13284        19715 :           changed = true;
   13285        19715 :           XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
   13286              :         }
   13287              : 
   13288       172942 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13289              :         {
   13290            0 :           changed = true;
   13291            0 :           XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
   13292              :         }
   13293              : 
   13294       172942 :       if (changed
   13295        19723 :           && REG_P (XEXP (x, 1))
   13296        16158 :           && REG_P (XEXP (x, 0)))
   13297              :         return x;
   13298              : 
   13299       156784 :       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
   13300              :         {
   13301         1833 :           changed = true;
   13302         1833 :           x = legitimize_pic_address (x, 0);
   13303              :         }
   13304              : 
   13305       156784 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13306         3843 :         return x;
   13307              : 
   13308       152941 :       if (REG_P (XEXP (x, 0)))
   13309              :         {
   13310        46475 :           rtx temp = gen_reg_rtx (Pmode);
   13311        43727 :           rtx val  = force_operand (XEXP (x, 1), temp);
   13312        43727 :           if (val != temp)
   13313              :             {
   13314        35375 :               val = convert_to_mode (Pmode, val, 1);
   13315        35084 :               emit_move_insn (temp, val);
   13316              :             }
   13317              : 
   13318        43727 :           XEXP (x, 1) = temp;
   13319        43727 :           return x;
   13320              :         }
   13321              : 
   13322       109214 :       else if (REG_P (XEXP (x, 1)))
   13323              :         {
   13324         3312 :           rtx temp = gen_reg_rtx (Pmode);
   13325         2662 :           rtx val  = force_operand (XEXP (x, 0), temp);
   13326         2662 :           if (val != temp)
   13327              :             {
   13328            0 :               val = convert_to_mode (Pmode, val, 1);
   13329            0 :               emit_move_insn (temp, val);
   13330              :             }
   13331              : 
   13332         2662 :           XEXP (x, 0) = temp;
   13333         2662 :           return x;
   13334              :         }
   13335              :     }
   13336              : 
   13337              :   return x;
   13338              : }
   13339              : 
   13340              : /* Print an integer constant expression in assembler syntax.  Addition
   13341              :    and subtraction are the only arithmetic that may appear in these
   13342              :    expressions.  FILE is the stdio stream to write to, X is the rtx, and
   13343              :    CODE is the operand print code from the output string.  */
   13344              : 
   13345              : static void
   13346      3694178 : output_pic_addr_const (FILE *file, rtx x, int code)
   13347              : {
   13348      3924063 :   char buf[256];
   13349              : 
   13350      3924063 :   switch (GET_CODE (x))
   13351              :     {
   13352            0 :     case PC:
   13353            0 :       gcc_assert (flag_pic);
   13354            0 :       putc ('.', file);
   13355            0 :       break;
   13356              : 
   13357       868830 :     case SYMBOL_REF:
   13358       868830 :       if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
   13359       868830 :         output_addr_const (file, x);
   13360              :       else
   13361              :         {
   13362              :           const char *name = XSTR (x, 0);
   13363              : 
   13364              :           /* Mark the decl as referenced so that cgraph will
   13365              :              output the function.  */
   13366              :           if (SYMBOL_REF_DECL (x))
   13367              :             mark_decl_referenced (SYMBOL_REF_DECL (x));
   13368              : 
   13369              : #if TARGET_MACHO
   13370              :           if (MACHOPIC_INDIRECT
   13371              :               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
   13372              :             name = machopic_indirection_name (x, /*stub_p=*/true);
   13373              : #endif
   13374              :           assemble_name (file, name);
   13375              :         }
   13376       868830 :       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
   13377       868830 :           && code == 'P' && ix86_call_use_plt_p (x))
   13378       396758 :         fputs ("@PLT", file);
   13379              :       break;
   13380              : 
   13381         2641 :     case LABEL_REF:
   13382         2641 :       x = XEXP (x, 0);
   13383              :       /* FALLTHRU */
   13384         2641 :     case CODE_LABEL:
   13385         2641 :       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
   13386         2641 :       assemble_name (asm_out_file, buf);
   13387         2641 :       break;
   13388              : 
   13389      2621533 :     CASE_CONST_SCALAR_INT:
   13390      2621533 :       output_addr_const (file, x);
   13391      2621533 :       break;
   13392              : 
   13393       210797 :     case CONST:
   13394              :       /* This used to output parentheses around the expression,
   13395              :          but that does not work on the 386 (either ATT or BSD assembler).  */
   13396       210797 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13397       210797 :       break;
   13398              : 
   13399            0 :     case CONST_DOUBLE:
   13400              :       /* We can't handle floating point constants;
   13401              :          TARGET_PRINT_OPERAND must handle them.  */
   13402            0 :       output_operand_lossage ("floating constant misused");
   13403            0 :       break;
   13404              : 
   13405        19088 :     case PLUS:
   13406              :       /* Some assemblers need integer constants to appear first.  */
   13407        19088 :       if (CONST_INT_P (XEXP (x, 0)))
   13408              :         {
   13409            0 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13410            0 :           putc ('+', file);
   13411            0 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13412              :         }
   13413              :       else
   13414              :         {
   13415        19088 :           gcc_assert (CONST_INT_P (XEXP (x, 1)));
   13416        19088 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13417        19088 :           putc ('+', file);
   13418        19088 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13419              :         }
   13420              :       break;
   13421              : 
   13422            0 :     case MINUS:
   13423            0 :       if (!TARGET_MACHO)
   13424            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
   13425            0 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13426            0 :       putc ('-', file);
   13427            0 :       output_pic_addr_const (file, XEXP (x, 1), code);
   13428            0 :       if (!TARGET_MACHO)
   13429            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
   13430            0 :       break;
   13431              : 
   13432       201174 :     case UNSPEC:
   13433       201174 :       gcc_assert (XVECLEN (x, 0) == 1);
   13434       201174 :       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
   13435       201174 :       switch (XINT (x, 1))
   13436              :         {
   13437        43344 :         case UNSPEC_GOT:
   13438        43344 :           fputs ("@GOT", file);
   13439        43344 :           break;
   13440        77802 :         case UNSPEC_GOTOFF:
   13441        77802 :           fputs ("@GOTOFF", file);
   13442        77802 :           break;
   13443           36 :         case UNSPEC_PLTOFF:
   13444           36 :           fputs ("@PLTOFF", file);
   13445           36 :           break;
   13446            0 :         case UNSPEC_PCREL:
   13447            0 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13448              :                  "(%rip)" : "[rip]", file);
   13449            0 :           break;
   13450        75808 :         case UNSPEC_GOTPCREL:
   13451        75808 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13452              :                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
   13453        75808 :           break;
   13454            0 :         case UNSPEC_GOTTPOFF:
   13455              :           /* FIXME: This might be @TPOFF in Sun ld too.  */
   13456            0 :           fputs ("@gottpoff", file);
   13457            0 :           break;
   13458            0 :         case UNSPEC_TPOFF:
   13459            0 :           fputs ("@tpoff", file);
   13460            0 :           break;
   13461         1459 :         case UNSPEC_NTPOFF:
   13462         1459 :           if (TARGET_64BIT)
   13463         1459 :             fputs ("@tpoff", file);
   13464              :           else
   13465            0 :             fputs ("@ntpoff", file);
   13466              :           break;
   13467          315 :         case UNSPEC_DTPOFF:
   13468          315 :           fputs ("@dtpoff", file);
   13469          315 :           break;
   13470         2410 :         case UNSPEC_GOTNTPOFF:
   13471         2410 :           if (TARGET_64BIT)
   13472         2147 :             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13473              :                    "@gottpoff(%rip)": "@gottpoff[rip]", file);
   13474              :           else
   13475          263 :             fputs ("@gotntpoff", file);
   13476              :           break;
   13477            0 :         case UNSPEC_INDNTPOFF:
   13478            0 :           fputs ("@indntpoff", file);
   13479            0 :           break;
   13480            0 :         case UNSPEC_SECREL32:
   13481            0 :           fputs ("@secrel32", file);
   13482            0 :           break;
   13483              : #if TARGET_MACHO
   13484              :         case UNSPEC_MACHOPIC_OFFSET:
   13485              :           putc ('-', file);
   13486              :           machopic_output_function_base_name (file);
   13487              :           break;
   13488              : #endif
   13489            0 :         default:
   13490            0 :           output_operand_lossage ("invalid UNSPEC as operand");
   13491            0 :           break;
   13492              :         }
   13493              :        break;
   13494              : 
   13495            0 :     default:
   13496            0 :       output_operand_lossage ("invalid expression as operand");
   13497              :     }
   13498      3694178 : }
   13499              : 
   13500              : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
   13501              :    We need to emit DTP-relative relocations.  */
   13502              : 
   13503              : static void ATTRIBUTE_UNUSED
   13504          667 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
   13505              : {
   13506          667 :   fputs (ASM_LONG, file);
   13507          667 :   output_addr_const (file, x);
   13508              : #if TARGET_WIN32_TLS
   13509              :   fputs ("@secrel32", file);
   13510              : #else
   13511          667 :   fputs ("@dtpoff", file);
   13512              : #endif
   13513          667 :   switch (size)
   13514              :     {
   13515              :     case 4:
   13516              :       break;
   13517          548 :     case 8:
   13518          548 :       fputs (", 0", file);
   13519          548 :       break;
   13520            0 :     default:
   13521            0 :       gcc_unreachable ();
   13522              :    }
   13523          667 : }
   13524              : 
   13525              : /* Return true if X is a representation of the PIC register.  This copes
   13526              :    with calls from ix86_find_base_term, where the register might have
   13527              :    been replaced by a cselib value.  */
   13528              : 
   13529              : static bool
   13530     26852849 : ix86_pic_register_p (rtx x)
   13531              : {
   13532     26852849 :   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
   13533       750305 :     return (pic_offset_table_rtx
   13534       750305 :             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
   13535     26102544 :   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
   13536              :     return true;
   13537     26098027 :   else if (!REG_P (x))
   13538              :     return false;
   13539     25493977 :   else if (pic_offset_table_rtx)
   13540              :     {
   13541     25474312 :       if (REGNO (x) == REGNO (pic_offset_table_rtx))
   13542              :         return true;
   13543       403506 :       if (HARD_REGISTER_P (x)
   13544       382048 :           && !HARD_REGISTER_P (pic_offset_table_rtx)
   13545       785554 :           && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
   13546              :         return true;
   13547              :       return false;
   13548              :     }
   13549              :   else
   13550        19665 :     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
   13551              : }
   13552              : 
   13553              : /* Helper function for ix86_delegitimize_address.
   13554              :    Attempt to delegitimize TLS local-exec accesses.  */
   13555              : 
   13556              : static rtx
   13557   3493528169 : ix86_delegitimize_tls_address (rtx orig_x)
   13558              : {
   13559   3493528169 :   rtx x = orig_x, unspec;
   13560   3493528169 :   struct ix86_address addr;
   13561              : 
   13562   3493528169 :   if (!TARGET_TLS_DIRECT_SEG_REFS)
   13563              :     return orig_x;
   13564   3493528169 :   if (MEM_P (x))
   13565     42913412 :     x = XEXP (x, 0);
   13566   5017868512 :   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
   13567              :     return orig_x;
   13568   1680213201 :   if (ix86_decompose_address (x, &addr) == 0
   13569   1942520411 :       || addr.seg != DEFAULT_TLS_SEG_REG
   13570       268212 :       || addr.disp == NULL_RTX
   13571   1680429367 :       || GET_CODE (addr.disp) != CONST)
   13572              :     return orig_x;
   13573       111823 :   unspec = XEXP (addr.disp, 0);
   13574       111823 :   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
   13575        65331 :     unspec = XEXP (unspec, 0);
   13576       111823 :   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
   13577              :     return orig_x;
   13578       111760 :   x = XVECEXP (unspec, 0, 0);
   13579       111760 :   gcc_assert (SYMBOL_REF_P (x));
   13580       111760 :   if (unspec != XEXP (addr.disp, 0))
   13581        86301 :     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
   13582       111760 :   if (addr.index)
   13583              :     {
   13584          185 :       rtx idx = addr.index;
   13585          185 :       if (addr.scale != 1)
   13586          185 :         idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
   13587          185 :       x = gen_rtx_PLUS (Pmode, idx, x);
   13588              :     }
   13589       111760 :   if (addr.base)
   13590            2 :     x = gen_rtx_PLUS (Pmode, addr.base, x);
   13591       111760 :   if (MEM_P (orig_x))
   13592          198 :     x = replace_equiv_address_nv (orig_x, x);
   13593              :   return x;
   13594              : }
   13595              : 
   13596              : /* In the name of slightly smaller debug output, and to cater to
   13597              :    general assembler lossage, recognize PIC+GOTOFF and turn it back
   13598              :    into a direct symbol reference.
   13599              : 
   13600              :    On Darwin, this is necessary to avoid a crash, because Darwin
   13601              :    has a different PIC label for each routine but the DWARF debugging
   13602              :    information is not associated with any particular routine, so it's
   13603              :    necessary to remove references to the PIC label from RTL stored by
   13604              :    the DWARF output code.
   13605              : 
   13606              :    This helper is used in the normal ix86_delegitimize_address
   13607              :    entrypoint (e.g. used in the target delegitimization hook) and
   13608              :    in ix86_find_base_term.  As compile time memory optimization, we
   13609              :    avoid allocating rtxes that will not change anything on the outcome
   13610              :    of the callers (find_base_value and find_base_term).  */
   13611              : 
   13612              : static inline rtx
   13613   3518361084 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
   13614              : {
   13615   3518361084 :   rtx orig_x = delegitimize_mem_from_attrs (x);
   13616              :   /* addend is NULL or some rtx if x is something+GOTOFF where
   13617              :      something doesn't include the PIC register.  */
   13618   3518361084 :   rtx addend = NULL_RTX;
   13619              :   /* reg_addend is NULL or a multiple of some register.  */
   13620   3518361084 :   rtx reg_addend = NULL_RTX;
   13621              :   /* const_addend is NULL or a const_int.  */
   13622   3518361084 :   rtx const_addend = NULL_RTX;
   13623              :   /* This is the result, or NULL.  */
   13624   3518361084 :   rtx result = NULL_RTX;
   13625              : 
   13626   3518361084 :   x = orig_x;
   13627              : 
   13628   3518361084 :   if (MEM_P (x))
   13629     62115782 :     x = XEXP (x, 0);
   13630              : 
   13631   3518361084 :   if (TARGET_64BIT)
   13632              :     {
   13633    253696974 :       if (GET_CODE (x) == CONST
   13634      8666660 :           && GET_CODE (XEXP (x, 0)) == PLUS
   13635      6730827 :           && GET_MODE (XEXP (x, 0)) == Pmode
   13636      6730778 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13637      6730778 :           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
   13638    253701108 :           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
   13639              :         {
   13640              :           /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
   13641              :              base.  A CONST can't be arg_pointer_rtx based.  */
   13642            0 :           if (base_term_p && MEM_P (orig_x))
   13643              :             return orig_x;
   13644            0 :           rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
   13645            0 :           x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
   13646            0 :           if (MEM_P (orig_x))
   13647            0 :             x = replace_equiv_address_nv (orig_x, x);
   13648            0 :           return x;
   13649              :         }
   13650              : 
   13651    253696974 :       if (GET_CODE (x) == CONST
   13652      8666660 :           && GET_CODE (XEXP (x, 0)) == UNSPEC
   13653      1935882 :           && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
   13654       654674 :               || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
   13655      1281208 :           && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
   13656              :         {
   13657       295663 :           x = XVECEXP (XEXP (x, 0), 0, 0);
   13658       295663 :           if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
   13659              :             {
   13660            9 :               x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
   13661            9 :               if (x == NULL_RTX)
   13662              :                 return orig_x;
   13663              :             }
   13664       295663 :           return x;
   13665              :         }
   13666              : 
   13667    253401311 :       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
   13668    253399650 :         return ix86_delegitimize_tls_address (orig_x);
   13669              : 
   13670              :       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
   13671              :          and -mcmodel=medium -fpic.  */
   13672              :     }
   13673              : 
   13674   3264665771 :   if (GET_CODE (x) != PLUS
   13675   1548877367 :       || GET_CODE (XEXP (x, 1)) != CONST)
   13676   3238362399 :     return ix86_delegitimize_tls_address (orig_x);
   13677              : 
   13678     26303372 :   if (ix86_pic_register_p (XEXP (x, 0)))
   13679              :     /* %ebx + GOT/GOTOFF */
   13680              :     ;
   13681      1275534 :   else if (GET_CODE (XEXP (x, 0)) == PLUS)
   13682              :     {
   13683              :       /* %ebx + %reg * scale + GOT/GOTOFF */
   13684       472513 :       reg_addend = XEXP (x, 0);
   13685       472513 :       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
   13686       395549 :         reg_addend = XEXP (reg_addend, 1);
   13687        76964 :       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
   13688        45509 :         reg_addend = XEXP (reg_addend, 0);
   13689              :       else
   13690              :         {
   13691        31455 :           reg_addend = NULL_RTX;
   13692        31455 :           addend = XEXP (x, 0);
   13693              :         }
   13694              :     }
   13695              :   else
   13696              :     addend = XEXP (x, 0);
   13697              : 
   13698     26303372 :   x = XEXP (XEXP (x, 1), 0);
   13699     26303372 :   if (GET_CODE (x) == PLUS
   13700      1443934 :       && CONST_INT_P (XEXP (x, 1)))
   13701              :     {
   13702      1443934 :       const_addend = XEXP (x, 1);
   13703      1443934 :       x = XEXP (x, 0);
   13704              :     }
   13705              : 
   13706     26303372 :   if (GET_CODE (x) == UNSPEC
   13707     25631942 :       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
   13708      6725235 :           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
   13709      1094694 :           || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
   13710            4 :               && !MEM_P (orig_x) && !addend)))
   13711     24537252 :     result = XVECEXP (x, 0, 0);
   13712              : 
   13713     24537252 :   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
   13714              :       && !MEM_P (orig_x))
   13715              :     result = XVECEXP (x, 0, 0);
   13716              : 
   13717     24537252 :   if (! result)
   13718      1766120 :     return ix86_delegitimize_tls_address (orig_x);
   13719              : 
   13720              :   /* For (PLUS something CONST_INT) both find_base_{value,term} just
   13721              :      recurse on the first operand.  */
   13722     24537252 :   if (const_addend && !base_term_p)
   13723       354438 :     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   13724     24537252 :   if (reg_addend)
   13725       857804 :     result = gen_rtx_PLUS (Pmode, reg_addend, result);
   13726     24537252 :   if (addend)
   13727              :     {
   13728              :       /* If the rest of original X doesn't involve the PIC register, add
   13729              :          addend and subtract pic_offset_table_rtx.  This can happen e.g.
   13730              :          for code like:
   13731              :          leal (%ebx, %ecx, 4), %ecx
   13732              :          ...
   13733              :          movl foo@GOTOFF(%ecx), %edx
   13734              :          in which case we return (%ecx - %ebx) + foo
   13735              :          or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
   13736              :          and reload has completed.  Don't do the latter for debug,
   13737              :          as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
   13738       135807 :       if (pic_offset_table_rtx
   13739       135807 :           && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
   13740         2370 :         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
   13741              :                                                      pic_offset_table_rtx),
   13742              :                                result);
   13743       135017 :       else if (base_term_p
   13744       128685 :                && pic_offset_table_rtx
   13745              :                && !TARGET_MACHO
   13746              :                && !TARGET_VXWORKS_VAROFF)
   13747              :         {
   13748       257370 :           rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
   13749       257370 :           tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
   13750       257370 :           result = gen_rtx_PLUS (Pmode, tmp, result);
   13751       128685 :         }
   13752              :       else
   13753              :         return orig_x;
   13754              :     }
   13755     49061755 :   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
   13756              :     {
   13757            0 :       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
   13758            0 :       if (result == NULL_RTX)
   13759              :         return orig_x;
   13760              :     }
   13761              :   return result;
   13762              : }
   13763              : 
   13764              : /* The normal instantiation of the above template.  */
   13765              : 
   13766              : static rtx
   13767    324376186 : ix86_delegitimize_address (rtx x)
   13768              : {
   13769    324376186 :   return ix86_delegitimize_address_1 (x, false);
   13770              : }
   13771              : 
   13772              : /* If X is a machine specific address (i.e. a symbol or label being
   13773              :    referenced as a displacement from the GOT implemented using an
   13774              :    UNSPEC), then return the base term.  Otherwise return X.  */
   13775              : 
   13776              : rtx
   13777   6676884574 : ix86_find_base_term (rtx x)
   13778              : {
   13779   6676884574 :   rtx term;
   13780              : 
   13781   6676884574 :   if (TARGET_64BIT)
   13782              :     {
   13783   3482899676 :       if (GET_CODE (x) != CONST)
   13784              :         return x;
   13785     45070592 :       term = XEXP (x, 0);
   13786     45070592 :       if (GET_CODE (term) == PLUS
   13787     45055719 :           && CONST_INT_P (XEXP (term, 1)))
   13788     45055719 :         term = XEXP (term, 0);
   13789     45070592 :       if (GET_CODE (term) != UNSPEC
   13790        40579 :           || (XINT (term, 1) != UNSPEC_GOTPCREL
   13791        40579 :               && XINT (term, 1) != UNSPEC_PCREL))
   13792              :         return x;
   13793              : 
   13794            0 :       return XVECEXP (term, 0, 0);
   13795              :     }
   13796              : 
   13797   3193984898 :   return ix86_delegitimize_address_1 (x, true);
   13798              : }
   13799              : 
   13800              : /* Return true if X shouldn't be emitted into the debug info.
   13801              :    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
   13802              :    symbol easily into the .debug_info section, so we need not to
   13803              :    delegitimize, but instead assemble as @gotoff.
   13804              :    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
   13805              :    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
   13806              : 
   13807              : static bool
   13808      1879396 : ix86_const_not_ok_for_debug_p (rtx x)
   13809              : {
   13810      1879396 :   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
   13811              :     return true;
   13812              : 
   13813      1879376 :   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
   13814            0 :     return true;
   13815              : 
   13816              :   return false;
   13817              : }
   13818              : 
   13819              : static void
   13820      7141600 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
   13821              :                     bool fp, FILE *file)
   13822              : {
   13823      7141600 :   const char *suffix;
   13824              : 
   13825      7141600 :   if (mode == CCFPmode)
   13826              :     {
   13827       564911 :       code = ix86_fp_compare_code_to_integer (code);
   13828       564911 :       mode = CCmode;
   13829              :     }
   13830      7141600 :   if (reverse)
   13831       209123 :     code = reverse_condition (code);
   13832              : 
   13833      7141600 :   switch (code)
   13834              :     {
   13835      2772440 :     case EQ:
   13836      2772440 :       gcc_assert (mode != CCGZmode);
   13837      2772440 :       switch (mode)
   13838              :         {
   13839              :         case E_CCAmode:
   13840              :           suffix = "a";
   13841              :           break;
   13842              :         case E_CCCmode:
   13843        26316 :           suffix = "c";
   13844              :           break;
   13845              :         case E_CCOmode:
   13846      7141600 :           suffix = "o";
   13847              :           break;
   13848              :         case E_CCPmode:
   13849       233716 :           suffix = "p";
   13850              :           break;
   13851              :         case E_CCSmode:
   13852       121529 :           suffix = "s";
   13853              :           break;
   13854      2752732 :         default:
   13855      2752732 :           suffix = "e";
   13856      2752732 :           break;
   13857              :         }
   13858              :       break;
   13859      2318476 :     case NE:
   13860      2318476 :       gcc_assert (mode != CCGZmode);
   13861      2318476 :       switch (mode)
   13862              :         {
   13863              :         case E_CCAmode:
   13864              :           suffix = "na";
   13865              :           break;
   13866              :         case E_CCCmode:
   13867        12039 :           suffix = "nc";
   13868              :           break;
   13869        10769 :         case E_CCOmode:
   13870        10769 :           suffix = "no";
   13871        10769 :           break;
   13872              :         case E_CCPmode:
   13873         4433 :           suffix = "np";
   13874              :           break;
   13875              :         case E_CCSmode:
   13876        50693 :           suffix = "ns";
   13877              :           break;
   13878      2305821 :         default:
   13879      2305821 :           suffix = "ne";
   13880      2305821 :           break;
   13881              :         }
   13882              :       break;
   13883       256062 :     case GT:
   13884       256062 :       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
   13885              :       suffix = "g";
   13886              :       break;
   13887       173737 :     case GTU:
   13888              :       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
   13889              :          Those same assemblers have the same but opposite lossage on cmov.  */
   13890       173737 :       if (mode == CCmode)
   13891       173799 :         suffix = fp ? "nbe" : "a";
   13892              :       else
   13893            0 :         gcc_unreachable ();
   13894              :       break;
   13895       236818 :     case LT:
   13896       236818 :       switch (mode)
   13897              :         {
   13898              :         case E_CCNOmode:
   13899              :         case E_CCGOCmode:
   13900              :           suffix = "s";
   13901              :           break;
   13902              : 
   13903              :         case E_CCmode:
   13904              :         case E_CCGCmode:
   13905              :         case E_CCGZmode:
   13906      7141600 :           suffix = "l";
   13907              :           break;
   13908              : 
   13909            0 :         default:
   13910            0 :           gcc_unreachable ();
   13911              :         }
   13912              :       break;
   13913       445407 :     case LTU:
   13914       445407 :       if (mode == CCmode || mode == CCGZmode)
   13915              :         suffix = "b";
   13916        24985 :       else if (mode == CCCmode)
   13917        26316 :         suffix = fp ? "b" : "c";
   13918              :       else
   13919            0 :         gcc_unreachable ();
   13920              :       break;
   13921       145364 :     case GE:
   13922       145364 :       switch (mode)
   13923              :         {
   13924              :         case E_CCNOmode:
   13925              :         case E_CCGOCmode:
   13926              :           suffix = "ns";
   13927              :           break;
   13928              : 
   13929              :         case E_CCmode:
   13930              :         case E_CCGCmode:
   13931              :         case E_CCGZmode:
   13932      7141600 :           suffix = "ge";
   13933              :           break;
   13934              : 
   13935            0 :         default:
   13936            0 :           gcc_unreachable ();
   13937              :         }
   13938              :       break;
   13939       189961 :     case GEU:
   13940       189961 :       if (mode == CCmode || mode == CCGZmode)
   13941              :         suffix = "nb";
   13942        10173 :       else if (mode == CCCmode)
   13943        12039 :         suffix = fp ? "nb" : "nc";
   13944              :       else
   13945            0 :         gcc_unreachable ();
   13946              :       break;
   13947       246640 :     case LE:
   13948       246640 :       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
   13949              :       suffix = "le";
   13950              :       break;
   13951       118544 :     case LEU:
   13952       118544 :       if (mode == CCmode)
   13953              :         suffix = "be";
   13954              :       else
   13955            0 :         gcc_unreachable ();
   13956              :       break;
   13957       233716 :     case UNORDERED:
   13958       233723 :       suffix = fp ? "u" : "p";
   13959              :       break;
   13960         4435 :     case ORDERED:
   13961         4440 :       suffix = fp ? "nu" : "np";
   13962              :       break;
   13963            0 :     default:
   13964            0 :       gcc_unreachable ();
   13965              :     }
   13966      7141600 :   fputs (suffix, file);
   13967      7141600 : }
   13968              : 
   13969              : /* Print the name of register X to FILE based on its machine mode and number.
   13970              :    If CODE is 'w', pretend the mode is HImode.
   13971              :    If CODE is 'b', pretend the mode is QImode.
   13972              :    If CODE is 'k', pretend the mode is SImode.
   13973              :    If CODE is 'q', pretend the mode is DImode.
   13974              :    If CODE is 'x', pretend the mode is V4SFmode.
   13975              :    If CODE is 't', pretend the mode is V8SFmode.
   13976              :    If CODE is 'g', pretend the mode is V16SFmode.
   13977              :    If CODE is 'h', pretend the reg is the 'high' byte register.
   13978              :    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
   13979              :    If CODE is 'd', duplicate the operand for AVX instruction.
   13980              :    If CODE is 'V', print naked full integer register name without %.
   13981              :  */
   13982              : 
   13983              : void
   13984    123594049 : print_reg (rtx x, int code, FILE *file)
   13985              : {
   13986    123594049 :   const char *reg;
   13987    123594049 :   int msize;
   13988    123594049 :   unsigned int regno;
   13989    123594049 :   bool duplicated;
   13990              : 
   13991    123594049 :   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
   13992    123591596 :     putc ('%', file);
   13993              : 
   13994    123594049 :   if (x == pc_rtx)
   13995              :     {
   13996      5746829 :       gcc_assert (TARGET_64BIT);
   13997      5746829 :       fputs ("rip", file);
   13998      5746829 :       return;
   13999              :     }
   14000              : 
   14001    117847220 :   if (code == 'y' && STACK_TOP_P (x))
   14002              :     {
   14003       289920 :       fputs ("st(0)", file);
   14004       289920 :       return;
   14005              :     }
   14006              : 
   14007    117557300 :   if (code == 'w')
   14008              :     msize = 2;
   14009              :   else if (code == 'b')
   14010              :     msize = 1;
   14011              :   else if (code == 'k')
   14012              :     msize = 4;
   14013              :   else if (code == 'q')
   14014              :     msize = 8;
   14015              :   else if (code == 'h')
   14016              :     msize = 0;
   14017              :   else if (code == 'x')
   14018              :     msize = 16;
   14019              :   else if (code == 't')
   14020              :     msize = 32;
   14021              :   else if (code == 'g')
   14022              :     msize = 64;
   14023              :   else
   14024    200906834 :     msize = GET_MODE_SIZE (GET_MODE (x));
   14025              : 
   14026    117557300 :   regno = REGNO (x);
   14027              : 
   14028    117557300 :   if (regno == ARG_POINTER_REGNUM
   14029    117557300 :       || regno == FRAME_POINTER_REGNUM
   14030    117557300 :       || regno == FPSR_REG)
   14031              :     {
   14032            0 :       output_operand_lossage
   14033            0 :         ("invalid use of register '%s'", reg_names[regno]);
   14034            0 :       return;
   14035              :     }
   14036    117557300 :   else if (regno == FLAGS_REG)
   14037              :     {
   14038            1 :       output_operand_lossage ("invalid use of asm flag output");
   14039            1 :       return;
   14040              :     }
   14041              : 
   14042    117557299 :   if (code == 'V')
   14043              :     {
   14044            1 :       if (GENERAL_REGNO_P (regno))
   14045            2 :         msize = GET_MODE_SIZE (word_mode);
   14046              :       else
   14047            0 :         error ("%<V%> modifier on non-integer register");
   14048              :     }
   14049              : 
   14050    117557299 :   duplicated = code == 'd' && TARGET_AVX;
   14051              : 
   14052    117557299 :   switch (msize)
   14053              :     {
   14054     78101021 :     case 16:
   14055     78101021 :     case 12:
   14056     78101021 :     case 8:
   14057    146139931 :       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
   14058            5 :         warning (0, "unsupported size for integer register");
   14059              :       /* FALLTHRU */
   14060    114123371 :     case 4:
   14061    114123371 :       if (LEGACY_INT_REGNO_P (regno))
   14062    123395879 :         putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
   14063              :       /* FALLTHRU */
   14064    115013775 :     case 2:
   14065     22267992 :     normal:
   14066    115013775 :       reg = hi_reg_name[regno];
   14067    115013775 :       break;
   14068      2280182 :     case 1:
   14069      2280182 :       if (regno >= ARRAY_SIZE (qi_reg_name))
   14070       274566 :         goto normal;
   14071      2005616 :       if (!ANY_QI_REGNO_P (regno))
   14072            0 :         error ("unsupported size for integer register");
   14073      2005616 :       reg = qi_reg_name[regno];
   14074      2005616 :       break;
   14075        27049 :     case 0:
   14076        27049 :       if (regno >= ARRAY_SIZE (qi_high_reg_name))
   14077            0 :         goto normal;
   14078        27049 :       reg = qi_high_reg_name[regno];
   14079        27049 :       break;
   14080       510859 :     case 32:
   14081       510859 :     case 64:
   14082       510859 :       if (SSE_REGNO_P (regno))
   14083              :         {
   14084       510859 :           gcc_assert (!duplicated);
   14085       715875 :           putc (msize == 32 ? 'y' : 'z', file);
   14086       510859 :           reg = hi_reg_name[regno] + 1;
   14087       510859 :           break;
   14088              :         }
   14089            0 :       goto normal;
   14090            0 :     default:
   14091            0 :       gcc_unreachable ();
   14092              :     }
   14093              : 
   14094    117557299 :   fputs (reg, file);
   14095              : 
   14096              :   /* Irritatingly, AMD extended registers use
   14097              :      different naming convention: "r%d[bwd]"  */
   14098    117557299 :   if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   14099              :     {
   14100     10477374 :       gcc_assert (TARGET_64BIT);
   14101     10477374 :       switch (msize)
   14102              :         {
   14103            0 :           case 0:
   14104            0 :             error ("extended registers have no high halves");
   14105            0 :             break;
   14106       182530 :           case 1:
   14107       182530 :             putc ('b', file);
   14108       182530 :             break;
   14109        28302 :           case 2:
   14110        28302 :             putc ('w', file);
   14111        28302 :             break;
   14112      2542511 :           case 4:
   14113      2542511 :             putc ('d', file);
   14114      2542511 :             break;
   14115              :           case 8:
   14116              :             /* no suffix */
   14117              :             break;
   14118            0 :           default:
   14119            0 :             error ("unsupported operand size for extended register");
   14120            0 :             break;
   14121              :         }
   14122     10477374 :       return;
   14123              :     }
   14124              : 
   14125    107079925 :   if (duplicated)
   14126              :     {
   14127        16877 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14128        16856 :         fprintf (file, ", %%%s", reg);
   14129              :       else
   14130           21 :         fprintf (file, ", %s", reg);
   14131              :     }
   14132              : }
   14133              : 
   14134              : /* Meaning of CODE:
   14135              :    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
   14136              :    C -- print opcode suffix for set/cmov insn.
   14137              :    c -- like C, but print reversed condition
   14138              :    F,f -- likewise, but for floating-point.
   14139              :    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
   14140              :         otherwise nothing
   14141              :    R -- print embedded rounding and sae.
   14142              :    r -- print only sae.
   14143              :    z -- print the opcode suffix for the size of the current operand.
   14144              :    Z -- likewise, with special suffixes for x87 instructions.
   14145              :    * -- print a star (in certain assembler syntax)
   14146              :    A -- print an absolute memory reference.
   14147              :    E -- print address with DImode register names if TARGET_64BIT.
   14148              :    w -- print the operand as if it's a "word" (HImode) even if it isn't.
   14149              :    s -- print a shift double count, followed by the assemblers argument
   14150              :         delimiter.
   14151              :    b -- print the QImode name of the register for the indicated operand.
   14152              :         %b0 would print %al if operands[0] is reg 0.
   14153              :    w --  likewise, print the HImode name of the register.
   14154              :    k --  likewise, print the SImode name of the register.
   14155              :    q --  likewise, print the DImode name of the register.
   14156              :    x --  likewise, print the V4SFmode name of the register.
   14157              :    t --  likewise, print the V8SFmode name of the register.
   14158              :    g --  likewise, print the V16SFmode name of the register.
   14159              :    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
   14160              :    y -- print "st(0)" instead of "st" as a register.
   14161              :    d -- print duplicated register operand for AVX instruction.
   14162              :    D -- print condition for SSE cmp instruction.
   14163              :    P -- if PIC, print an @PLT suffix.  For -fno-plt, load function
   14164              :         address from GOT.
   14165              :    p -- print raw symbol name.
   14166              :    X -- don't print any sort of PIC '@' suffix for a symbol.
   14167              :    & -- print some in-use local-dynamic symbol name.
   14168              :    H -- print a memory address offset by 8; used for sse high-parts
   14169              :    Y -- print condition for XOP pcom* instruction.
   14170              :    V -- print naked full integer register name without %.
   14171              :    v -- print segment override prefix
   14172              :    + -- print a branch hint as 'cs' or 'ds' prefix
   14173              :    ; -- print a semicolon (after prefixes due to bug in older gas).
   14174              :    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
   14175              :    ^ -- print addr32 prefix if Pmode != word_mode
   14176              :    M -- print addr32 prefix for TARGET_X32 with VSIB address.
   14177              :    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
   14178              :    N -- print maskz if it's constant 0 operand.
   14179              :    G -- print embedded flag for ccmp/ctest.
   14180              :  */
   14181              : 
   14182              : void
   14183    176999509 : ix86_print_operand (FILE *file, rtx x, int code)
   14184              : {
   14185    177189503 :   if (code)
   14186              :     {
   14187     62151842 :       switch (code)
   14188              :         {
   14189       189990 :         case 'A':
   14190       189990 :           switch (ASSEMBLER_DIALECT)
   14191              :             {
   14192       189990 :             case ASM_ATT:
   14193       189990 :               putc ('*', file);
   14194       189990 :               break;
   14195              : 
   14196            0 :             case ASM_INTEL:
   14197              :               /* Intel syntax. For absolute addresses, registers should not
   14198              :                  be surrounded by braces.  */
   14199            0 :               if (!REG_P (x))
   14200              :                 {
   14201            0 :                   putc ('[', file);
   14202            0 :                   ix86_print_operand (file, x, 0);
   14203            0 :                   putc (']', file);
   14204            0 :                   return;
   14205              :                 }
   14206              :               break;
   14207              : 
   14208            0 :             default:
   14209            0 :               gcc_unreachable ();
   14210              :             }
   14211              : 
   14212       189990 :           ix86_print_operand (file, x, 0);
   14213       189990 :           return;
   14214              : 
   14215      3557388 :         case 'E':
   14216              :           /* Wrap address in an UNSPEC to declare special handling.  */
   14217      3557388 :           if (TARGET_64BIT)
   14218      3072316 :             x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
   14219              : 
   14220      3557388 :           output_address (VOIDmode, x);
   14221      3557388 :           return;
   14222              : 
   14223            0 :         case 'L':
   14224            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14225            0 :             putc ('l', file);
   14226            0 :           return;
   14227              : 
   14228            0 :         case 'W':
   14229            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14230            0 :             putc ('w', file);
   14231            0 :           return;
   14232              : 
   14233            0 :         case 'B':
   14234            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14235            0 :             putc ('b', file);
   14236            0 :           return;
   14237              : 
   14238            0 :         case 'Q':
   14239            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14240            0 :             putc ('l', file);
   14241            0 :           return;
   14242              : 
   14243            0 :         case 'S':
   14244            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14245            0 :             putc ('s', file);
   14246            0 :           return;
   14247              : 
   14248            0 :         case 'T':
   14249            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14250            0 :             putc ('t', file);
   14251            0 :           return;
   14252              : 
   14253              :         case 'O':
   14254              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14255              :           if (ASSEMBLER_DIALECT != ASM_ATT)
   14256              :             return;
   14257              : 
   14258              :           switch (GET_MODE_SIZE (GET_MODE (x)))
   14259              :             {
   14260              :             case 2:
   14261              :               putc ('w', file);
   14262              :               break;
   14263              : 
   14264              :             case 4:
   14265              :               putc ('l', file);
   14266              :               break;
   14267              : 
   14268              :             case 8:
   14269              :               putc ('q', file);
   14270              :               break;
   14271              : 
   14272              :             default:
   14273              :               output_operand_lossage ("invalid operand size for operand "
   14274              :                                       "code 'O'");
   14275              :               return;
   14276              :             }
   14277              : 
   14278              :           putc ('.', file);
   14279              : #endif
   14280              :           return;
   14281              : 
   14282        38031 :         case 'z':
   14283        38031 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14284              :             {
   14285              :               /* Opcodes don't get size suffixes if using Intel opcodes.  */
   14286        38029 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   14287              :                 return;
   14288              : 
   14289        76058 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14290              :                 {
   14291            6 :                 case 1:
   14292            6 :                   putc ('b', file);
   14293            6 :                   return;
   14294              : 
   14295            6 :                 case 2:
   14296            6 :                   putc ('w', file);
   14297            6 :                   return;
   14298              : 
   14299        37534 :                 case 4:
   14300        37534 :                   putc ('l', file);
   14301        37534 :                   return;
   14302              : 
   14303          483 :                 case 8:
   14304          483 :                   putc ('q', file);
   14305          483 :                   return;
   14306              : 
   14307            0 :                 default:
   14308            0 :                   output_operand_lossage ("invalid operand size for operand "
   14309              :                                           "code 'z'");
   14310            0 :                   return;
   14311              :                 }
   14312              :             }
   14313              : 
   14314            2 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14315              :             {
   14316            1 :               if (this_is_asm_operands)
   14317            1 :                 warning_for_asm (this_is_asm_operands,
   14318              :                                  "non-integer operand used with operand code %<z%>");
   14319              :               else
   14320            0 :                 warning (0, "non-integer operand used with operand code %<z%>");
   14321              :             }
   14322              :           /* FALLTHRU */
   14323              : 
   14324       378667 :         case 'Z':
   14325              :           /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
   14326       378667 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14327              :             return;
   14328              : 
   14329       378667 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14330              :             {
   14331        29314 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14332              :                 {
   14333         3525 :                 case 2:
   14334              : #ifdef HAVE_AS_IX86_FILDS
   14335         3525 :                   putc ('s', file);
   14336              : #endif
   14337         3525 :                   return;
   14338              : 
   14339         3941 :                 case 4:
   14340         3941 :                   putc ('l', file);
   14341         3941 :                   return;
   14342              : 
   14343         7191 :                 case 8:
   14344              : #ifdef HAVE_AS_IX86_FILDQ
   14345         7191 :                   putc ('q', file);
   14346              : #else
   14347              :                   fputs ("ll", file);
   14348              : #endif
   14349         7191 :                   return;
   14350              : 
   14351              :                 default:
   14352              :                   break;
   14353              :                 }
   14354              :             }
   14355       364010 :           else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14356              :             {
   14357              :               /* 387 opcodes don't get size suffixes
   14358              :                  if the operands are registers.  */
   14359       364008 :               if (STACK_REG_P (x))
   14360              :                 return;
   14361              : 
   14362       683488 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14363              :                 {
   14364        23317 :                 case 4:
   14365        23317 :                   putc ('s', file);
   14366        23317 :                   return;
   14367              : 
   14368        32727 :                 case 8:
   14369        32727 :                   putc ('l', file);
   14370        32727 :                   return;
   14371              : 
   14372       285698 :                 case 12:
   14373       285698 :                 case 16:
   14374       285698 :                   putc ('t', file);
   14375       285698 :                   return;
   14376              : 
   14377              :                 default:
   14378              :                   break;
   14379              :                 }
   14380              :             }
   14381              :           else
   14382              :             {
   14383            2 :               output_operand_lossage ("invalid operand type used with "
   14384              :                                       "operand code '%c'", code);
   14385            2 :               return;
   14386              :             }
   14387              : 
   14388            2 :           output_operand_lossage ("invalid operand size for operand code '%c'",
   14389              :                                   code);
   14390            2 :           return;
   14391              : 
   14392              :         case 'd':
   14393              :         case 'b':
   14394              :         case 'w':
   14395              :         case 'k':
   14396              :         case 'q':
   14397              :         case 'h':
   14398              :         case 't':
   14399              :         case 'g':
   14400              :         case 'y':
   14401              :         case 'x':
   14402              :         case 'X':
   14403              :         case 'P':
   14404              :         case 'p':
   14405              :         case 'V':
   14406              :           break;
   14407              : 
   14408            0 :         case 's':
   14409            0 :           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
   14410              :             {
   14411            0 :               ix86_print_operand (file, x, 0);
   14412            0 :               fputs (", ", file);
   14413              :             }
   14414            0 :           return;
   14415              : 
   14416          494 :         case 'Y':
   14417          494 :           switch (GET_CODE (x))
   14418              :             {
   14419          182 :             case NE:
   14420          182 :               fputs ("neq", file);
   14421          182 :               break;
   14422           32 :             case EQ:
   14423           32 :               fputs ("eq", file);
   14424           32 :               break;
   14425           64 :             case GE:
   14426           64 :             case GEU:
   14427           64 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
   14428           64 :               break;
   14429           40 :             case GT:
   14430           40 :             case GTU:
   14431           40 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
   14432           40 :               break;
   14433           64 :             case LE:
   14434           64 :             case LEU:
   14435           64 :               fputs ("le", file);
   14436           64 :               break;
   14437          112 :             case LT:
   14438          112 :             case LTU:
   14439          112 :               fputs ("lt", file);
   14440          112 :               break;
   14441            0 :             case UNORDERED:
   14442            0 :               fputs ("unord", file);
   14443            0 :               break;
   14444            0 :             case ORDERED:
   14445            0 :               fputs ("ord", file);
   14446            0 :               break;
   14447            0 :             case UNEQ:
   14448            0 :               fputs ("ueq", file);
   14449            0 :               break;
   14450            0 :             case UNGE:
   14451            0 :               fputs ("nlt", file);
   14452            0 :               break;
   14453            0 :             case UNGT:
   14454            0 :               fputs ("nle", file);
   14455            0 :               break;
   14456            0 :             case UNLE:
   14457            0 :               fputs ("ule", file);
   14458            0 :               break;
   14459            0 :             case UNLT:
   14460            0 :               fputs ("ult", file);
   14461            0 :               break;
   14462            0 :             case LTGT:
   14463            0 :               fputs ("une", file);
   14464            0 :               break;
   14465            0 :             default:
   14466            0 :               output_operand_lossage ("operand is not a condition code, "
   14467              :                                       "invalid operand code 'Y'");
   14468            0 :               return;
   14469              :             }
   14470          494 :           return;
   14471              : 
   14472         9327 :         case 'D':
   14473              :           /* Little bit of braindamage here.  The SSE compare instructions
   14474              :              does use completely different names for the comparisons that the
   14475              :              fp conditional moves.  */
   14476         9327 :           switch (GET_CODE (x))
   14477              :             {
   14478            3 :             case UNEQ:
   14479            3 :               if (TARGET_AVX)
   14480              :                 {
   14481            3 :                   fputs ("eq_us", file);
   14482            3 :                   break;
   14483              :                 }
   14484              :              /* FALLTHRU */
   14485         4635 :             case EQ:
   14486         4635 :               fputs ("eq", file);
   14487         4635 :               break;
   14488            0 :             case UNLT:
   14489            0 :               if (TARGET_AVX)
   14490              :                 {
   14491            0 :                   fputs ("nge", file);
   14492            0 :                   break;
   14493              :                 }
   14494              :              /* FALLTHRU */
   14495         1628 :             case LT:
   14496         1628 :               fputs ("lt", file);
   14497         1628 :               break;
   14498            0 :             case UNLE:
   14499            0 :               if (TARGET_AVX)
   14500              :                 {
   14501            0 :                   fputs ("ngt", file);
   14502            0 :                   break;
   14503              :                 }
   14504              :              /* FALLTHRU */
   14505          795 :             case LE:
   14506          795 :               fputs ("le", file);
   14507          795 :               break;
   14508           95 :             case UNORDERED:
   14509           95 :               fputs ("unord", file);
   14510           95 :               break;
   14511           24 :             case LTGT:
   14512           24 :               if (TARGET_AVX)
   14513              :                 {
   14514           24 :                   fputs ("neq_oq", file);
   14515           24 :                   break;
   14516              :                 }
   14517              :              /* FALLTHRU */
   14518          893 :             case NE:
   14519          893 :               fputs ("neq", file);
   14520          893 :               break;
   14521            0 :             case GE:
   14522            0 :               if (TARGET_AVX)
   14523              :                 {
   14524            0 :                   fputs ("ge", file);
   14525            0 :                   break;
   14526              :                 }
   14527              :              /* FALLTHRU */
   14528          403 :             case UNGE:
   14529          403 :               fputs ("nlt", file);
   14530          403 :               break;
   14531            0 :             case GT:
   14532            0 :               if (TARGET_AVX)
   14533              :                 {
   14534            0 :                   fputs ("gt", file);
   14535            0 :                   break;
   14536              :                 }
   14537              :              /* FALLTHRU */
   14538          768 :             case UNGT:
   14539          768 :               fputs ("nle", file);
   14540          768 :               break;
   14541           83 :             case ORDERED:
   14542           83 :               fputs ("ord", file);
   14543           83 :               break;
   14544            0 :             default:
   14545            0 :               output_operand_lossage ("operand is not a condition code, "
   14546              :                                       "invalid operand code 'D'");
   14547            0 :               return;
   14548              :             }
   14549         9327 :           return;
   14550              : 
   14551      7141600 :         case 'F':
   14552      7141600 :         case 'f':
   14553              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14554              :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14555              :             putc ('.', file);
   14556              :           gcc_fallthrough ();
   14557              : #endif
   14558              : 
   14559      7141600 :         case 'C':
   14560      7141600 :         case 'c':
   14561      7141600 :           if (!COMPARISON_P (x))
   14562              :             {
   14563            0 :               output_operand_lossage ("operand is not a condition code, "
   14564              :                                       "invalid operand code '%c'", code);
   14565            0 :               return;
   14566              :             }
   14567      7141600 :           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
   14568      7141600 :                               code == 'c' || code == 'f',
   14569      7141600 :                               code == 'F' || code == 'f',
   14570              :                               file);
   14571      7141600 :           return;
   14572              : 
   14573           21 :         case 'G':
   14574           21 :           {
   14575           21 :             int dfv = INTVAL (x);
   14576           21 :             const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
   14577           21 :             fputs (dfv_suffix, file);
   14578              :           }
   14579           21 :           return;
   14580              : 
   14581         1434 :         case 'H':
   14582         1434 :           if (!offsettable_memref_p (x))
   14583              :             {
   14584            1 :               output_operand_lossage ("operand is not an offsettable memory "
   14585              :                                       "reference, invalid operand code 'H'");
   14586            1 :               return;
   14587              :             }
   14588              :           /* It doesn't actually matter what mode we use here, as we're
   14589              :              only going to use this for printing.  */
   14590         1433 :           x = adjust_address_nv (x, DImode, 8);
   14591              :           /* Output 'qword ptr' for intel assembler dialect.  */
   14592         1433 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14593            0 :             code = 'q';
   14594              :           break;
   14595              : 
   14596        75584 :         case 'K':
   14597        75584 :           if (!CONST_INT_P (x))
   14598              :             {
   14599            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14600              :                                       "operand code 'K'");
   14601            1 :               return;
   14602              :             }
   14603              : 
   14604        75583 :           if (INTVAL (x) & IX86_HLE_ACQUIRE)
   14605              : #ifdef HAVE_AS_IX86_HLE
   14606           22 :             fputs ("xacquire ", file);
   14607              : #else
   14608              :             fputs ("\n" ASM_BYTE "0xf2\n\t", file);
   14609              : #endif
   14610        75561 :           else if (INTVAL (x) & IX86_HLE_RELEASE)
   14611              : #ifdef HAVE_AS_IX86_HLE
   14612           24 :             fputs ("xrelease ", file);
   14613              : #else
   14614              :             fputs ("\n" ASM_BYTE "0xf3\n\t", file);
   14615              : #endif
   14616              :           /* We do not want to print value of the operand.  */
   14617        75583 :           return;
   14618              : 
   14619        43036 :         case 'N':
   14620        43036 :           if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
   14621        15485 :             fputs ("{z}", file);
   14622        43036 :           return;
   14623              : 
   14624         4008 :         case 'r':
   14625         4008 :           if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
   14626              :             {
   14627            2 :               output_operand_lossage ("operand is not a specific integer, "
   14628              :                                       "invalid operand code 'r'");
   14629            2 :               return;
   14630              :             }
   14631              : 
   14632         4006 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14633            1 :             fputs (", ", file);
   14634              : 
   14635         4006 :           fputs ("{sae}", file);
   14636              : 
   14637         4006 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14638         4005 :             fputs (", ", file);
   14639              : 
   14640         4006 :           return;
   14641              : 
   14642         5988 :         case 'R':
   14643         5988 :           if (!CONST_INT_P (x))
   14644              :             {
   14645            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14646              :                                       "operand code 'R'");
   14647            1 :               return;
   14648              :             }
   14649              : 
   14650         5987 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14651            6 :             fputs (", ", file);
   14652              : 
   14653         5987 :           switch (INTVAL (x))
   14654              :             {
   14655         5172 :             case ROUND_NEAREST_INT | ROUND_SAE:
   14656         5172 :               fputs ("{rn-sae}", file);
   14657         5172 :               break;
   14658          637 :             case ROUND_NEG_INF | ROUND_SAE:
   14659          637 :               fputs ("{rd-sae}", file);
   14660          637 :               break;
   14661           56 :             case ROUND_POS_INF | ROUND_SAE:
   14662           56 :               fputs ("{ru-sae}", file);
   14663           56 :               break;
   14664          121 :             case ROUND_ZERO | ROUND_SAE:
   14665          121 :               fputs ("{rz-sae}", file);
   14666          121 :               break;
   14667            1 :             default:
   14668            1 :               output_operand_lossage ("operand is not a specific integer, "
   14669              :                                       "invalid operand code 'R'");
   14670              :             }
   14671              : 
   14672         5987 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14673         5981 :             fputs (", ", file);
   14674              : 
   14675         5987 :           return;
   14676              : 
   14677        10522 :         case 'v':
   14678        10522 :           if (MEM_P (x))
   14679              :             {
   14680        10641 :               switch (MEM_ADDR_SPACE (x))
   14681              :                 {
   14682              :                 case ADDR_SPACE_GENERIC:
   14683              :                   break;
   14684            0 :                 case ADDR_SPACE_SEG_FS:
   14685            0 :                   fputs ("fs ", file);
   14686            0 :                   break;
   14687            0 :                 case ADDR_SPACE_SEG_GS:
   14688            0 :                   fputs ("gs ", file);
   14689            0 :                   break;
   14690            0 :                 default:
   14691            0 :                   gcc_unreachable ();
   14692              :                 }
   14693              :             }
   14694              :           else
   14695            0 :             output_operand_lossage ("operand is not a memory reference, "
   14696              :                                     "invalid operand code 'v'");
   14697        10522 :           return;
   14698              : 
   14699            0 :         case '*':
   14700            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14701            0 :             putc ('*', file);
   14702            0 :           return;
   14703              : 
   14704          202 :         case '&':
   14705          202 :           {
   14706          202 :             const char *name = get_some_local_dynamic_name ();
   14707          202 :             if (name == NULL)
   14708            1 :               output_operand_lossage ("'%%&' used without any "
   14709              :                                       "local dynamic TLS references");
   14710              :             else
   14711          201 :               assemble_name (file, name);
   14712          202 :             return;
   14713              :           }
   14714              : 
   14715      6492266 :         case '+':
   14716      6492266 :           {
   14717      6492266 :             rtx x;
   14718              : 
   14719      6492266 :             if (!optimize
   14720      5076378 :                 || optimize_function_for_size_p (cfun)
   14721     11380393 :                 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
   14722      4888127 :                     && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
   14723      6492266 :               return;
   14724              : 
   14725            0 :             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
   14726            0 :             if (x)
   14727              :               {
   14728            0 :                 int pred_val = profile_probability::from_reg_br_prob_note
   14729            0 :                                  (XINT (x, 0)).to_reg_br_prob_base ();
   14730              : 
   14731            0 :                 bool taken = pred_val > REG_BR_PROB_BASE / 2;
   14732              :                 /* We use 3e (DS) prefix for taken branches and
   14733              :                    2e (CS) prefix for not taken branches.  */
   14734            0 :                 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
   14735            0 :                   fputs ("ds ; ", file);
   14736            0 :                 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
   14737            0 :                   fputs ("cs ; ", file);
   14738              :               }
   14739            0 :             return;
   14740              :           }
   14741              : 
   14742              :         case ';':
   14743              : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
   14744              :           putc (';', file);
   14745              : #endif
   14746              :           return;
   14747              : 
   14748         3391 :         case '~':
   14749         3391 :           putc (TARGET_AVX2 ? 'i' : 'f', file);
   14750         3391 :           return;
   14751              : 
   14752         1675 :         case 'M':
   14753         1675 :           if (TARGET_X32)
   14754              :             {
   14755              :               /* NB: 32-bit indices in VSIB address are sign-extended
   14756              :                  to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
   14757              :                  sign-extended to 0xfffffffff7fa3010 which is invalid
   14758              :                  address.  Add addr32 prefix if there is no base
   14759              :                  register nor symbol.  */
   14760           40 :               bool ok;
   14761           40 :               struct ix86_address parts;
   14762           40 :               ok = ix86_decompose_address (x, &parts);
   14763           40 :               gcc_assert (ok && parts.index == NULL_RTX);
   14764           40 :               if (parts.base == NULL_RTX
   14765           40 :                   && (parts.disp == NULL_RTX
   14766           34 :                       || !symbolic_operand (parts.disp,
   14767           34 :                                             GET_MODE (parts.disp))))
   14768           34 :                 fputs ("addr32 ", file);
   14769              :             }
   14770         1675 :           return;
   14771              : 
   14772        22277 :         case '^':
   14773        25470 :           if (Pmode != word_mode)
   14774            0 :             fputs ("addr32 ", file);
   14775        22277 :           return;
   14776              : 
   14777     14849028 :         case '!':
   14778     14849028 :           if (ix86_notrack_prefixed_insn_p (current_output_insn))
   14779         3776 :             fputs ("notrack ", file);
   14780     14849028 :           return;
   14781              : 
   14782            1 :         default:
   14783            1 :           output_operand_lossage ("invalid operand code '%c'", code);
   14784              :         }
   14785              :     }
   14786              : 
   14787    143964090 :   if (REG_P (x))
   14788     85745866 :     print_reg (x, code, file);
   14789              : 
   14790     58218224 :   else if (MEM_P (x))
   14791              :     {
   14792     33360734 :       rtx addr = XEXP (x, 0);
   14793              : 
   14794              :       /* No `byte ptr' prefix for call instructions ... */
   14795     33360734 :       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
   14796              :         {
   14797          321 :           machine_mode mode = GET_MODE (x);
   14798          321 :           const char *size;
   14799              : 
   14800              :           /* Check for explicit size override codes.  */
   14801          321 :           if (code == 'b')
   14802              :             size = "BYTE";
   14803              :           else if (code == 'w')
   14804              :             size = "WORD";
   14805              :           else if (code == 'k')
   14806              :             size = "DWORD";
   14807              :           else if (code == 'q')
   14808              :             size = "QWORD";
   14809              :           else if (code == 'x')
   14810              :             size = "XMMWORD";
   14811              :           else if (code == 't')
   14812              :             size = "YMMWORD";
   14813              :           else if (code == 'g')
   14814              :             size = "ZMMWORD";
   14815          235 :           else if (mode == BLKmode)
   14816              :             /* ... or BLKmode operands, when not overridden.  */
   14817              :             size = NULL;
   14818              :           else
   14819          466 :             switch (GET_MODE_SIZE (mode))
   14820              :               {
   14821              :               case 1: size = "BYTE"; break;
   14822              :               case 2: size = "WORD"; break;
   14823              :               case 4: size = "DWORD"; break;
   14824              :               case 8: size = "QWORD"; break;
   14825              :               case 12: size = "TBYTE"; break;
   14826            7 :               case 16:
   14827            7 :                 if (mode == XFmode)
   14828              :                   size = "TBYTE";
   14829              :                 else
   14830              :                   size = "XMMWORD";
   14831              :                 break;
   14832              :               case 32: size = "YMMWORD"; break;
   14833              :               case 64: size = "ZMMWORD"; break;
   14834            0 :               default:
   14835            0 :                 gcc_unreachable ();
   14836              :               }
   14837              :           if (size)
   14838              :             {
   14839          319 :               fputs (size, file);
   14840          319 :               fputs (" PTR ", file);
   14841              :             }
   14842              :         }
   14843              : 
   14844     33360734 :       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   14845            0 :         output_operand_lossage ("invalid constraints for operand");
   14846              :       else
   14847     33360734 :         ix86_print_operand_address_as
   14848     34032351 :           (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
   14849              :     }
   14850              : 
   14851     24857490 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
   14852              :     {
   14853          762 :       long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   14854          762 :                                REAL_MODE_FORMAT (HFmode));
   14855          762 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14856          762 :         putc ('$', file);
   14857          762 :       fprintf (file, "0x%04x", (unsigned int) l);
   14858          762 :     }
   14859              : 
   14860     24856728 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
   14861              :     {
   14862        20624 :       long l;
   14863              : 
   14864        20624 :       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14865              : 
   14866        20624 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14867        20624 :         putc ('$', file);
   14868              :       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
   14869        20624 :       if (code == 'q')
   14870          327 :         fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
   14871              :                  (unsigned long long) (int) l);
   14872              :       else
   14873        20297 :         fprintf (file, "0x%08x", (unsigned int) l);
   14874              :     }
   14875              : 
   14876     24836104 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
   14877              :     {
   14878         3278 :       long l[2];
   14879              : 
   14880         3278 :       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14881              : 
   14882         3278 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14883         3278 :         putc ('$', file);
   14884         3278 :       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
   14885         3278 :     }
   14886              : 
   14887              :   /* These float cases don't actually occur as immediate operands.  */
   14888     24832826 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
   14889              :     {
   14890            0 :       char dstr[30];
   14891              : 
   14892            0 :       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
   14893            0 :       fputs (dstr, file);
   14894            0 :     }
   14895              : 
   14896              :   /* Print bcst_mem_operand.  */
   14897     24832826 :   else if (GET_CODE (x) == VEC_DUPLICATE)
   14898              :     {
   14899          313 :       machine_mode vmode = GET_MODE (x);
   14900              :       /* Must be bcst_memory_operand.  */
   14901          313 :       gcc_assert (bcst_mem_operand (x, vmode));
   14902              : 
   14903          313 :       rtx mem = XEXP (x,0);
   14904          313 :       ix86_print_operand (file, mem, 0);
   14905              : 
   14906          313 :       switch (vmode)
   14907              :         {
   14908           28 :         case E_V2DImode:
   14909           28 :         case E_V2DFmode:
   14910           28 :           fputs ("{1to2}", file);
   14911           28 :           break;
   14912           74 :         case E_V4SImode:
   14913           74 :         case E_V4SFmode:
   14914           74 :         case E_V4DImode:
   14915           74 :         case E_V4DFmode:
   14916           74 :           fputs ("{1to4}", file);
   14917           74 :           break;
   14918           93 :         case E_V8SImode:
   14919           93 :         case E_V8SFmode:
   14920           93 :         case E_V8DFmode:
   14921           93 :         case E_V8DImode:
   14922           93 :         case E_V8HFmode:
   14923           93 :           fputs ("{1to8}", file);
   14924           93 :           break;
   14925          110 :         case E_V16SFmode:
   14926          110 :         case E_V16SImode:
   14927          110 :         case E_V16HFmode:
   14928          110 :           fputs ("{1to16}", file);
   14929          110 :           break;
   14930            8 :         case E_V32HFmode:
   14931            8 :           fputs ("{1to32}", file);
   14932            8 :           break;
   14933            0 :         default:
   14934            0 :           gcc_unreachable ();
   14935              :         }
   14936              :     }
   14937              : 
   14938              :   else
   14939              :     {
   14940              :       /* We have patterns that allow zero sets of memory, for instance.
   14941              :          In 64-bit mode, we should probably support all 8-byte vectors,
   14942              :          since we can in fact encode that into an immediate.  */
   14943     24832513 :       if (CONST_VECTOR_P (x))
   14944              :         {
   14945         3264 :           if (x != CONST0_RTX (GET_MODE (x)))
   14946            2 :             output_operand_lossage ("invalid vector immediate");
   14947         3264 :           x = const0_rtx;
   14948              :         }
   14949              : 
   14950     24832513 :       if (code == 'P')
   14951              :         {
   14952      5941241 :           if (ix86_force_load_from_GOT_p (x, true))
   14953              :             {
   14954              :               /* For inline assembly statement, load function address
   14955              :                  from GOT with 'P' operand modifier to avoid PLT.  */
   14956            4 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   14957              :                                   (TARGET_64BIT
   14958              :                                    ? UNSPEC_GOTPCREL
   14959              :                                    : UNSPEC_GOT));
   14960            4 :               x = gen_rtx_CONST (Pmode, x);
   14961            4 :               x = gen_const_mem (Pmode, x);
   14962            4 :               ix86_print_operand (file, x, 'A');
   14963            4 :               return;
   14964              :             }
   14965              :         }
   14966     18891272 :       else if (code != 'p')
   14967              :         {
   14968     18891163 :           if (CONST_INT_P (x))
   14969              :             {
   14970     15605835 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14971     15605607 :                 putc ('$', file);
   14972              :             }
   14973      3285328 :           else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
   14974         9390 :                    || LABEL_REF_P (x))
   14975              :             {
   14976      3285326 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14977      3285302 :                 putc ('$', file);
   14978              :               else
   14979           24 :                 fputs ("OFFSET FLAT:", file);
   14980              :             }
   14981              :         }
   14982     24832509 :       if (CONST_INT_P (x))
   14983     15605921 :         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
   14984      9226588 :       else if (flag_pic || MACHOPIC_INDIRECT)
   14985       529823 :         output_pic_addr_const (file, x, code);
   14986              :       else
   14987      8696765 :         output_addr_const (file, x);
   14988              :     }
   14989              : }
   14990              : 
   14991              : static bool
   14992     21447618 : ix86_print_operand_punct_valid_p (unsigned char code)
   14993              : {
   14994     21447618 :   return (code == '*' || code == '+' || code == '&' || code == ';'
   14995     14871305 :           || code == '~' || code == '^' || code == '!');
   14996              : }
   14997              : 
   14998              : /* Print a memory operand whose address is ADDR.  */
   14999              : 
   15000              : static void
   15001     36920388 : ix86_print_operand_address_as (FILE *file, rtx addr,
   15002              :                                addr_space_t as, bool raw)
   15003              : {
   15004     36920388 :   struct ix86_address parts;
   15005     36920388 :   rtx base, index, disp;
   15006     36920388 :   int scale;
   15007     36920388 :   int ok;
   15008     36920388 :   bool vsib = false;
   15009     36920388 :   int code = 0;
   15010              : 
   15011     36920388 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
   15012              :     {
   15013         1675 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   15014         1675 :       gcc_assert (parts.index == NULL_RTX);
   15015         1675 :       parts.index = XVECEXP (addr, 0, 1);
   15016         1675 :       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
   15017         1675 :       addr = XVECEXP (addr, 0, 0);
   15018         1675 :       vsib = true;
   15019              :     }
   15020     36918713 :   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
   15021              :     {
   15022      3072316 :       gcc_assert (TARGET_64BIT);
   15023      3072316 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   15024      3072316 :       code = 'q';
   15025              :     }
   15026              :   else
   15027     33846397 :     ok = ix86_decompose_address (addr, &parts);
   15028              : 
   15029     36920388 :   gcc_assert (ok);
   15030              : 
   15031     36920388 :   base = parts.base;
   15032     36920388 :   index = parts.index;
   15033     36920388 :   disp = parts.disp;
   15034     36920388 :   scale = parts.scale;
   15035              : 
   15036     36920388 :   if (ADDR_SPACE_GENERIC_P (as))
   15037     36638603 :     as = parts.seg;
   15038              :   else
   15039       281785 :     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
   15040              : 
   15041     36920388 :   if (!ADDR_SPACE_GENERIC_P (as) && !raw)
   15042              :     {
   15043       281800 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15044       281798 :         putc ('%', file);
   15045              : 
   15046       281800 :       switch (as)
   15047              :         {
   15048       182111 :         case ADDR_SPACE_SEG_FS:
   15049       182111 :           fputs ("fs:", file);
   15050       182111 :           break;
   15051        99689 :         case ADDR_SPACE_SEG_GS:
   15052        99689 :           fputs ("gs:", file);
   15053        99689 :           break;
   15054            0 :         default:
   15055            0 :           gcc_unreachable ();
   15056              :         }
   15057              :     }
   15058              : 
   15059              :   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
   15060     36920388 :   if (TARGET_64BIT && !base && !index && !raw)
   15061              :     {
   15062      6008308 :       rtx symbol = disp;
   15063              : 
   15064      6008308 :       if (GET_CODE (disp) == CONST
   15065      2184738 :           && GET_CODE (XEXP (disp, 0)) == PLUS
   15066      2099597 :           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15067      2099597 :         symbol = XEXP (XEXP (disp, 0), 0);
   15068              : 
   15069      6008308 :       if (LABEL_REF_P (symbol)
   15070      6008308 :           || (SYMBOL_REF_P (symbol)
   15071      5746959 :               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
   15072      5746829 :         base = pc_rtx;
   15073              :     }
   15074              : 
   15075     36920388 :   if (!base && !index)
   15076              :     {
   15077              :       /* Displacement only requires special attention.  */
   15078       601038 :       if (CONST_INT_P (disp))
   15079              :         {
   15080       269320 :           if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
   15081            0 :             fputs ("ds:", file);
   15082       269320 :           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
   15083              :         }
   15084              :       /* Load the external function address via the GOT slot to avoid PLT.  */
   15085       331718 :       else if (GET_CODE (disp) == CONST
   15086       113439 :                && GET_CODE (XEXP (disp, 0)) == UNSPEC
   15087        85379 :                && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
   15088         9571 :                    || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
   15089       407526 :                && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   15090           24 :         output_pic_addr_const (file, disp, 0);
   15091       331694 :       else if (flag_pic)
   15092       114810 :         output_pic_addr_const (file, disp, 0);
   15093              :       else
   15094       216884 :         output_addr_const (file, disp);
   15095              :     }
   15096              :   else
   15097              :     {
   15098              :       /* Print SImode register names to force addr32 prefix.  */
   15099     36319350 :       if (SImode_address_operand (addr, VOIDmode))
   15100              :         {
   15101           37 :           if (flag_checking)
   15102              :             {
   15103           37 :               gcc_assert (TARGET_64BIT);
   15104           37 :               switch (GET_CODE (addr))
   15105              :                 {
   15106            0 :                 case SUBREG:
   15107            0 :                   gcc_assert (GET_MODE (addr) == SImode);
   15108            0 :                   gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
   15109              :                   break;
   15110           37 :                 case ZERO_EXTEND:
   15111           37 :                 case AND:
   15112           37 :                   gcc_assert (GET_MODE (addr) == DImode);
   15113              :                   break;
   15114            0 :                 default:
   15115            0 :                   gcc_unreachable ();
   15116              :                 }
   15117              :             }
   15118           37 :           gcc_assert (!code);
   15119              :           code = 'k';
   15120              :         }
   15121     36319313 :       else if (code == 0
   15122     33248629 :                && TARGET_X32
   15123          482 :                && disp
   15124          410 :                && CONST_INT_P (disp)
   15125          311 :                && INTVAL (disp) < -16*1024*1024)
   15126              :         {
   15127              :           /* X32 runs in 64-bit mode, where displacement, DISP, in
   15128              :              address DISP(%r64), is encoded as 32-bit immediate sign-
   15129              :              extended from 32-bit to 64-bit.  For -0x40000300(%r64),
   15130              :              address is %r64 + 0xffffffffbffffd00.  When %r64 <
   15131              :              0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
   15132              :              which is invalid for x32.  The correct address is %r64
   15133              :              - 0x40000300 == 0xf7ffdd64.  To properly encode
   15134              :              -0x40000300(%r64) for x32, we zero-extend negative
   15135              :              displacement by forcing addr32 prefix which truncates
   15136              :              0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
   15137              :              zero-extend all negative displacements, including -1(%rsp).
   15138              :              However, for small negative displacements, sign-extension
   15139              :              won't cause overflow.  We only zero-extend negative
   15140              :              displacements if they < -16*1024*1024, which is also used
   15141              :              to check legitimate address displacements for PIC.  */
   15142           38 :           code = 'k';
   15143              :         }
   15144              : 
   15145              :       /* Since the upper 32 bits of RSP are always zero for x32,
   15146              :          we can encode %esp as %rsp to avoid 0x67 prefix if
   15147              :          there is no index register.  */
   15148          976 :       if (TARGET_X32 && Pmode == SImode
   15149     36319754 :           && !index && base && REG_P (base) && REGNO (base) == SP_REG)
   15150              :         code = 'q';
   15151              : 
   15152     36319350 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15153              :         {
   15154     36318980 :           if (disp)
   15155              :             {
   15156     32251830 :               if (flag_pic)
   15157      2829259 :                 output_pic_addr_const (file, disp, 0);
   15158     29422571 :               else if (LABEL_REF_P (disp))
   15159         5157 :                 output_asm_label (disp);
   15160              :               else
   15161     29417414 :                 output_addr_const (file, disp);
   15162              :             }
   15163              : 
   15164     36318980 :           putc ('(', file);
   15165     36318980 :           if (base)
   15166     35901870 :             print_reg (base, code, file);
   15167     36318980 :           if (index)
   15168              :             {
   15169      1945890 :               putc (',', file);
   15170      3890153 :               print_reg (index, vsib ? 0 : code, file);
   15171      1945890 :               if (scale != 1 || vsib)
   15172      1031319 :                 fprintf (file, ",%d", scale);
   15173              :             }
   15174     36318980 :           putc (')', file);
   15175              :         }
   15176              :       else
   15177              :         {
   15178          370 :           rtx offset = NULL_RTX;
   15179              : 
   15180          370 :           if (disp)
   15181              :             {
   15182              :               /* Pull out the offset of a symbol; print any symbol itself.  */
   15183          290 :               if (GET_CODE (disp) == CONST
   15184           18 :                   && GET_CODE (XEXP (disp, 0)) == PLUS
   15185           18 :                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15186              :                 {
   15187           18 :                   offset = XEXP (XEXP (disp, 0), 1);
   15188           18 :                   disp = gen_rtx_CONST (VOIDmode,
   15189              :                                         XEXP (XEXP (disp, 0), 0));
   15190              :                 }
   15191              : 
   15192          290 :               if (flag_pic)
   15193            0 :                 output_pic_addr_const (file, disp, 0);
   15194          290 :               else if (LABEL_REF_P (disp))
   15195            0 :                 output_asm_label (disp);
   15196          290 :               else if (CONST_INT_P (disp))
   15197              :                 offset = disp;
   15198              :               else
   15199          123 :                 output_addr_const (file, disp);
   15200              :             }
   15201              : 
   15202          370 :           putc ('[', file);
   15203          370 :           if (base)
   15204              :             {
   15205          329 :               print_reg (base, code, file);
   15206          329 :               if (offset)
   15207              :                 {
   15208          185 :                   if (INTVAL (offset) >= 0)
   15209           20 :                     putc ('+', file);
   15210          185 :                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15211              :                 }
   15212              :             }
   15213           41 :           else if (offset)
   15214            0 :             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15215              :           else
   15216           41 :             putc ('0', file);
   15217              : 
   15218          370 :           if (index)
   15219              :             {
   15220           94 :               putc ('+', file);
   15221          140 :               print_reg (index, vsib ? 0 : code, file);
   15222           94 :               if (scale != 1 || vsib)
   15223           92 :                 fprintf (file, "*%d", scale);
   15224              :             }
   15225          370 :           putc (']', file);
   15226              :         }
   15227              :     }
   15228     36920388 : }
   15229              : 
   15230              : static void
   15231      3559655 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
   15232              : {
   15233      3559655 :   if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   15234            1 :     output_operand_lossage ("invalid constraints for operand");
   15235              :   else
   15236      3559654 :     ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
   15237      3559655 : }
   15238              : 
   15239              : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
   15240              : 
   15241              : static bool
   15242        15368 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
   15243              : {
   15244        15368 :   rtx op;
   15245              : 
   15246        15368 :   if (GET_CODE (x) != UNSPEC)
   15247              :     return false;
   15248              : 
   15249        15368 :   op = XVECEXP (x, 0, 0);
   15250        15368 :   switch (XINT (x, 1))
   15251              :     {
   15252         1358 :     case UNSPEC_GOTOFF:
   15253         1358 :       output_addr_const (file, op);
   15254         1358 :       fputs ("@gotoff", file);
   15255         1358 :       break;
   15256            0 :     case UNSPEC_GOTTPOFF:
   15257            0 :       output_addr_const (file, op);
   15258              :       /* FIXME: This might be @TPOFF in Sun ld.  */
   15259            0 :       fputs ("@gottpoff", file);
   15260            0 :       break;
   15261            0 :     case UNSPEC_TPOFF:
   15262            0 :       output_addr_const (file, op);
   15263            0 :       fputs ("@tpoff", file);
   15264            0 :       break;
   15265        10914 :     case UNSPEC_NTPOFF:
   15266        10914 :       output_addr_const (file, op);
   15267        10914 :       if (TARGET_64BIT)
   15268        10168 :         fputs ("@tpoff", file);
   15269              :       else
   15270          746 :         fputs ("@ntpoff", file);
   15271              :       break;
   15272            0 :     case UNSPEC_DTPOFF:
   15273            0 :       output_addr_const (file, op);
   15274            0 :       fputs ("@dtpoff", file);
   15275            0 :       break;
   15276         3095 :     case UNSPEC_GOTNTPOFF:
   15277         3095 :       output_addr_const (file, op);
   15278         3095 :       if (TARGET_64BIT)
   15279         3095 :         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   15280              :                "@gottpoff(%rip)" : "@gottpoff[rip]", file);
   15281              :       else
   15282            0 :         fputs ("@gotntpoff", file);
   15283              :       break;
   15284            1 :     case UNSPEC_INDNTPOFF:
   15285            1 :       output_addr_const (file, op);
   15286            1 :       fputs ("@indntpoff", file);
   15287            1 :       break;
   15288            0 :     case UNSPEC_SECREL32:
   15289            0 :       output_addr_const (file, op);
   15290            0 :       fputs ("@secrel32", file);
   15291            0 :       break;
   15292              : #if TARGET_MACHO
   15293              :     case UNSPEC_MACHOPIC_OFFSET:
   15294              :       output_addr_const (file, op);
   15295              :       putc ('-', file);
   15296              :       machopic_output_function_base_name (file);
   15297              :       break;
   15298              : #endif
   15299              : 
   15300              :     default:
   15301              :       return false;
   15302              :     }
   15303              : 
   15304              :   return true;
   15305              : }
   15306              : 
   15307              : 
   15308              : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
   15309              :    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
   15310              :    is the expression of the binary operation.  The output may either be
   15311              :    emitted here, or returned to the caller, like all output_* functions.
   15312              : 
   15313              :    There is no guarantee that the operands are the same mode, as they
   15314              :    might be within FLOAT or FLOAT_EXTEND expressions.  */
   15315              : 
   15316              : #ifndef SYSV386_COMPAT
   15317              : /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
   15318              :    wants to fix the assemblers because that causes incompatibility
   15319              :    with gcc.  No-one wants to fix gcc because that causes
   15320              :    incompatibility with assemblers...  You can use the option of
   15321              :    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
   15322              : #define SYSV386_COMPAT 1
   15323              : #endif
   15324              : 
   15325              : const char *
   15326       606614 : output_387_binary_op (rtx_insn *insn, rtx *operands)
   15327              : {
   15328       606614 :   static char buf[40];
   15329       606614 :   const char *p;
   15330       606614 :   bool is_sse
   15331       606614 :     = (SSE_REG_P (operands[0])
   15332       661833 :        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
   15333              : 
   15334        55219 :   if (is_sse)
   15335              :     p = "%v";
   15336        55219 :   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
   15337        55212 :            || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
   15338              :     p = "fi";
   15339              :   else
   15340       606614 :     p = "f";
   15341              : 
   15342       606614 :   strcpy (buf, p);
   15343              : 
   15344       606614 :   switch (GET_CODE (operands[3]))
   15345              :     {
   15346              :     case PLUS:
   15347              :       p = "add"; break;
   15348              :     case MINUS:
   15349              :       p = "sub"; break;
   15350        94531 :     case MULT:
   15351        94531 :       p = "mul"; break;
   15352        27657 :     case DIV:
   15353        27657 :       p = "div"; break;
   15354            0 :     default:
   15355            0 :       gcc_unreachable ();
   15356              :     }
   15357              : 
   15358       606614 :   strcat (buf, p);
   15359              : 
   15360       606614 :   if (is_sse)
   15361              :    {
   15362       551395 :      p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
   15363       551395 :      strcat (buf, p);
   15364              : 
   15365       551395 :      if (TARGET_AVX)
   15366              :        p = "\t{%2, %1, %0|%0, %1, %2}";
   15367              :      else
   15368       534937 :        p = "\t{%2, %0|%0, %2}";
   15369              : 
   15370       551395 :      strcat (buf, p);
   15371       551395 :      return buf;
   15372              :    }
   15373              : 
   15374              :   /* Even if we do not want to check the inputs, this documents input
   15375              :      constraints.  Which helps in understanding the following code.  */
   15376        55219 :   if (flag_checking)
   15377              :     {
   15378        55218 :       if (STACK_REG_P (operands[0])
   15379        55218 :           && ((REG_P (operands[1])
   15380        53638 :                && REGNO (operands[0]) == REGNO (operands[1])
   15381        49647 :                && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
   15382         5571 :               || (REG_P (operands[2])
   15383         5571 :                   && REGNO (operands[0]) == REGNO (operands[2])
   15384         5571 :                   && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
   15385       110436 :           && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
   15386              :         ; /* ok */
   15387              :       else
   15388            0 :         gcc_unreachable ();
   15389              :     }
   15390              : 
   15391        55219 :   switch (GET_CODE (operands[3]))
   15392              :     {
   15393        40407 :     case MULT:
   15394        40407 :     case PLUS:
   15395        40407 :       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
   15396         1989 :         std::swap (operands[1], operands[2]);
   15397              : 
   15398              :       /* know operands[0] == operands[1].  */
   15399              : 
   15400        40407 :       if (MEM_P (operands[2]))
   15401              :         {
   15402              :           p = "%Z2\t%2";
   15403              :           break;
   15404              :         }
   15405              : 
   15406        36048 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15407              :         {
   15408        21075 :           if (STACK_TOP_P (operands[0]))
   15409              :             /* How is it that we are storing to a dead operand[2]?
   15410              :                Well, presumably operands[1] is dead too.  We can't
   15411              :                store the result to st(0) as st(0) gets popped on this
   15412              :                instruction.  Instead store to operands[2] (which I
   15413              :                think has to be st(1)).  st(1) will be popped later.
   15414              :                gcc <= 2.8.1 didn't have this check and generated
   15415              :                assembly code that the Unixware assembler rejected.  */
   15416              :             p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
   15417              :           else
   15418              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15419              :           break;
   15420              :         }
   15421              : 
   15422        14973 :       if (STACK_TOP_P (operands[0]))
   15423              :         p = "\t{%y2, %0|%0, %y2}";    /* st(0) = st(0) op st(r2) */
   15424              :       else
   15425              :         p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
   15426              :       break;
   15427              : 
   15428        14812 :     case MINUS:
   15429        14812 :     case DIV:
   15430        14812 :       if (MEM_P (operands[1]))
   15431              :         {
   15432              :           p = "r%Z1\t%1";
   15433              :           break;
   15434              :         }
   15435              : 
   15436        14376 :       if (MEM_P (operands[2]))
   15437              :         {
   15438              :           p = "%Z2\t%2";
   15439              :           break;
   15440              :         }
   15441              : 
   15442        12764 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15443              :         {
   15444              : #if SYSV386_COMPAT
   15445              :           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
   15446              :              derived assemblers, confusingly reverse the direction of
   15447              :              the operation for fsub{r} and fdiv{r} when the
   15448              :              destination register is not st(0).  The Intel assembler
   15449              :              doesn't have this brain damage.  Read !SYSV386_COMPAT to
   15450              :              figure out what the hardware really does.  */
   15451         6175 :           if (STACK_TOP_P (operands[0]))
   15452              :             p = "{p\t%0, %2|rp\t%2, %0}";
   15453              :           else
   15454              :             p = "{rp\t%2, %0|p\t%0, %2}";
   15455              : #else
   15456              :           if (STACK_TOP_P (operands[0]))
   15457              :             /* As above for fmul/fadd, we can't store to st(0).  */
   15458              :             p = "rp\t{%0, %2|%2, %0}";        /* st(1) = st(0) op st(1); pop */
   15459              :           else
   15460              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15461              : #endif
   15462              :           break;
   15463              :         }
   15464              : 
   15465         6589 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   15466              :         {
   15467              : #if SYSV386_COMPAT
   15468         3076 :           if (STACK_TOP_P (operands[0]))
   15469              :             p = "{rp\t%0, %1|p\t%1, %0}";
   15470              :           else
   15471              :             p = "{p\t%1, %0|rp\t%0, %1}";
   15472              : #else
   15473              :           if (STACK_TOP_P (operands[0]))
   15474              :             p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
   15475              :           else
   15476              :             p = "rp\t{%1, %0|%0, %1}";        /* st(r2) = st(0) op st(r2); pop */
   15477              : #endif
   15478              :           break;
   15479              :         }
   15480              : 
   15481         3513 :       if (STACK_TOP_P (operands[0]))
   15482              :         {
   15483         2670 :           if (STACK_TOP_P (operands[1]))
   15484              :             p = "\t{%y2, %0|%0, %y2}";        /* st(0) = st(0) op st(r2) */
   15485              :           else
   15486              :             p = "r\t{%y1, %0|%0, %y1}";       /* st(0) = st(r1) op st(0) */
   15487              :           break;
   15488              :         }
   15489          843 :       else if (STACK_TOP_P (operands[1]))
   15490              :         {
   15491              : #if SYSV386_COMPAT
   15492              :           p = "{\t%1, %0|r\t%0, %1}";
   15493              : #else
   15494              :           p = "r\t{%1, %0|%0, %1}";   /* st(r2) = st(0) op st(r2) */
   15495              : #endif
   15496              :         }
   15497              :       else
   15498              :         {
   15499              : #if SYSV386_COMPAT
   15500              :           p = "{r\t%2, %0|\t%0, %2}";
   15501              : #else
   15502              :           p = "\t{%2, %0|%0, %2}";    /* st(r1) = st(r1) op st(0) */
   15503              : #endif
   15504              :         }
   15505              :       break;
   15506              : 
   15507            0 :     default:
   15508            0 :       gcc_unreachable ();
   15509              :     }
   15510              : 
   15511        55219 :   strcat (buf, p);
   15512        55219 :   return buf;
   15513              : }
   15514              : 
   15515              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15516              : 
   15517              : static int
   15518         1656 : ix86_dirflag_mode_needed (rtx_insn *insn)
   15519              : {
   15520         1656 :   if (CALL_P (insn))
   15521              :     {
   15522          339 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15523              :         return X86_DIRFLAG_ANY;
   15524              :       else
   15525              :         /* No need to emit CLD in interrupt handler for TARGET_CLD.  */
   15526          339 :         return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
   15527              :     }
   15528              : 
   15529         1317 :   if (recog_memoized (insn) < 0)
   15530              :     return X86_DIRFLAG_ANY;
   15531              : 
   15532         1315 :   if (get_attr_type (insn) == TYPE_STR)
   15533              :     {
   15534              :       /* Emit cld instruction if stringops are used in the function.  */
   15535            1 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15536            0 :         return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
   15537              :       else
   15538              :         return X86_DIRFLAG_RESET;
   15539              :     }
   15540              : 
   15541              :   return X86_DIRFLAG_ANY;
   15542              : }
   15543              : 
   15544              : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
   15545              : 
   15546              : static bool
   15547      2211789 : ix86_check_avx_upper_register (const_rtx exp)
   15548              : {
   15549              :   /* construct_container may return a parallel with expr_list
   15550              :      which contains the real reg and mode  */
   15551      2211789 :   subrtx_iterator::array_type array;
   15552      8461493 :   FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
   15553              :     {
   15554      6410803 :       const_rtx x = *iter;
   15555      2576503 :       if (SSE_REG_P (x)
   15556       832760 :           && !EXT_REX_SSE_REG_P (x)
   15557      8063395 :           && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
   15558       161099 :         return true;
   15559              :     }
   15560              : 
   15561      2050690 :   return false;
   15562      2211789 : }
   15563              : 
   15564              : /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
   15565              : 
   15566              : static void
   15567        51779 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
   15568              : {
   15569        51779 :   if (SSE_REG_P (dest)
   15570        12859 :       && !EXT_REX_SSE_REG_P (dest)
   15571        77497 :       && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15572              :     {
   15573          760 :       bool *used = (bool *) data;
   15574          760 :       *used = true;
   15575              :     }
   15576        51779 : }
   15577              : 
   15578              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15579              : 
   15580              : static int
   15581      2065601 : ix86_avx_u128_mode_needed (rtx_insn *insn)
   15582              : {
   15583      2065601 :   if (DEBUG_INSN_P (insn))
   15584              :     return AVX_U128_ANY;
   15585              : 
   15586      2065601 :   if (CALL_P (insn))
   15587              :     {
   15588        49568 :       rtx link;
   15589              : 
   15590              :       /* Needed mode is set to AVX_U128_CLEAN if there are
   15591              :          no 256bit or 512bit modes used in function arguments. */
   15592        49568 :       for (link = CALL_INSN_FUNCTION_USAGE (insn);
   15593       134768 :            link;
   15594        85200 :            link = XEXP (link, 1))
   15595              :         {
   15596        86252 :           if (GET_CODE (XEXP (link, 0)) == USE)
   15597              :             {
   15598        84842 :               rtx arg = XEXP (XEXP (link, 0), 0);
   15599              : 
   15600        84842 :               if (ix86_check_avx_upper_register (arg))
   15601              :                 return AVX_U128_DIRTY;
   15602              :             }
   15603              :         }
   15604              : 
   15605              :       /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
   15606              :          nor 512bit registers used in the function return register.  */
   15607        48516 :       bool avx_upper_reg_found = false;
   15608        48516 :       note_stores (insn, ix86_check_avx_upper_stores,
   15609              :                    &avx_upper_reg_found);
   15610        48516 :       if (avx_upper_reg_found)
   15611              :         return AVX_U128_DIRTY;
   15612              : 
   15613              :       /* If the function is known to preserve some SSE registers,
   15614              :          RA and previous passes can legitimately rely on that for
   15615              :          modes wider than 256 bits.  It's only safe to issue a
   15616              :          vzeroupper if all SSE registers are clobbered.  */
   15617        48332 :       const function_abi &abi = insn_callee_abi (insn);
   15618        48332 :       if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
   15619              :           /* Should be safe to issue an vzeroupper before sibling_call_p.
   15620              :              Also there not mode_exit for sibling_call, so there could be
   15621              :              missing vzeroupper for that.  */
   15622        48332 :           || !(SIBLING_CALL_P (insn)
   15623        47048 :                || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15624        47048 :                                          abi.mode_clobbers (V4DImode))))
   15625         8436 :         return AVX_U128_ANY;
   15626              : 
   15627        39896 :       return AVX_U128_CLEAN;
   15628              :     }
   15629              : 
   15630      2016033 :   rtx set = single_set (insn);
   15631      2016033 :   if (set)
   15632              :     {
   15633      1943069 :       rtx dest = SET_DEST (set);
   15634      1943069 :       rtx src = SET_SRC (set);
   15635      1461680 :       if (SSE_REG_P (dest)
   15636       552260 :           && !EXT_REX_SSE_REG_P (dest)
   15637      3035347 :           && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15638              :         {
   15639              :           /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
   15640              :              source isn't zero.  */
   15641       168567 :           if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
   15642              :             return AVX_U128_DIRTY;
   15643              :           else
   15644              :             return AVX_U128_ANY;
   15645              :         }
   15646              :       else
   15647              :         {
   15648      1774502 :           if (ix86_check_avx_upper_register (src))
   15649              :             return AVX_U128_DIRTY;
   15650              :         }
   15651              : 
   15652              :       /* This isn't YMM/ZMM load/store.  */
   15653              :       return AVX_U128_ANY;
   15654              :     }
   15655              : 
   15656              :   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
   15657              :      Hardware changes state only when a 256bit register is written to,
   15658              :      but we need to prevent the compiler from moving optimal insertion
   15659              :      point above eventual read from 256bit or 512 bit register.  */
   15660        72964 :   if (ix86_check_avx_upper_register (PATTERN (insn)))
   15661              :     return AVX_U128_DIRTY;
   15662              : 
   15663              :   return AVX_U128_ANY;
   15664              : }
   15665              : 
   15666              : /* Return mode that i387 must be switched into
   15667              :    prior to the execution of insn.  */
   15668              : 
   15669              : static int
   15670       417050 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
   15671              : {
   15672       417050 :   enum attr_i387_cw mode;
   15673              : 
   15674              :   /* The mode UNINITIALIZED is used to store control word after a
   15675              :      function call or ASM pattern.  The mode ANY specify that function
   15676              :      has no requirements on the control word and make no changes in the
   15677              :      bits we are interested in.  */
   15678              : 
   15679       417050 :   if (CALL_P (insn)
   15680       417050 :       || (NONJUMP_INSN_P (insn)
   15681       341245 :           && (asm_noperands (PATTERN (insn)) >= 0
   15682       341192 :               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
   15683        14637 :     return I387_CW_UNINITIALIZED;
   15684              : 
   15685       402413 :   if (recog_memoized (insn) < 0)
   15686              :     return I387_CW_ANY;
   15687              : 
   15688       401471 :   mode = get_attr_i387_cw (insn);
   15689              : 
   15690       401471 :   switch (entity)
   15691              :     {
   15692            0 :     case I387_ROUNDEVEN:
   15693            0 :       if (mode == I387_CW_ROUNDEVEN)
   15694              :         return mode;
   15695              :       break;
   15696              : 
   15697       396824 :     case I387_TRUNC:
   15698       396824 :       if (mode == I387_CW_TRUNC)
   15699              :         return mode;
   15700              :       break;
   15701              : 
   15702         3617 :     case I387_FLOOR:
   15703         3617 :       if (mode == I387_CW_FLOOR)
   15704              :         return mode;
   15705              :       break;
   15706              : 
   15707         1030 :     case I387_CEIL:
   15708         1030 :       if (mode == I387_CW_CEIL)
   15709              :         return mode;
   15710              :       break;
   15711              : 
   15712            0 :     default:
   15713            0 :       gcc_unreachable ();
   15714              :     }
   15715              : 
   15716              :   return I387_CW_ANY;
   15717              : }
   15718              : 
   15719              : /* Return mode that entity must be switched into
   15720              :    prior to the execution of insn.  */
   15721              : 
   15722              : static int
   15723      2484307 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
   15724              : {
   15725      2484307 :   switch (entity)
   15726              :     {
   15727         1656 :     case X86_DIRFLAG:
   15728         1656 :       return ix86_dirflag_mode_needed (insn);
   15729      2065601 :     case AVX_U128:
   15730      2065601 :       return ix86_avx_u128_mode_needed (insn);
   15731       417050 :     case I387_ROUNDEVEN:
   15732       417050 :     case I387_TRUNC:
   15733       417050 :     case I387_FLOOR:
   15734       417050 :     case I387_CEIL:
   15735       417050 :       return ix86_i387_mode_needed (entity, insn);
   15736            0 :     default:
   15737            0 :       gcc_unreachable ();
   15738              :     }
   15739              :   return 0;
   15740              : }
   15741              : 
   15742              : /* Calculate mode of upper 128bit AVX registers after the insn.  */
   15743              : 
   15744              : static int
   15745      2065601 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
   15746              : {
   15747      2065601 :   rtx pat = PATTERN (insn);
   15748              : 
   15749      2065601 :   if (vzeroupper_pattern (pat, VOIDmode)
   15750      2065601 :       || vzeroall_pattern (pat, VOIDmode))
   15751          175 :     return AVX_U128_CLEAN;
   15752              : 
   15753              :   /* We know that state is clean after CALL insn if there are no
   15754              :      256bit or 512bit registers used in the function return register. */
   15755      2065426 :   if (CALL_P (insn))
   15756              :     {
   15757        49522 :       bool avx_upper_reg_found = false;
   15758        49522 :       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
   15759              : 
   15760        49522 :       if (avx_upper_reg_found)
   15761              :         return AVX_U128_DIRTY;
   15762              : 
   15763              :       /* If the function desn't clobber any sse registers or only clobber
   15764              :          128-bit part, Then vzeroupper isn't issued before the function exit.
   15765              :          the status not CLEAN but ANY after the function.  */
   15766        48946 :       const function_abi &abi = insn_callee_abi (insn);
   15767        48946 :       if (!(SIBLING_CALL_P (insn)
   15768        47667 :             || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15769        47667 :                                       abi.mode_clobbers (V4DImode))))
   15770         8732 :         return AVX_U128_ANY;
   15771              : 
   15772        40214 :       return  AVX_U128_CLEAN;
   15773              :     }
   15774              : 
   15775              :   /* Otherwise, return current mode.  Remember that if insn
   15776              :      references AVX 256bit or 512bit registers, the mode was already
   15777              :      changed to DIRTY from MODE_NEEDED.  */
   15778              :   return mode;
   15779              : }
   15780              : 
   15781              : /* Return the mode that an insn results in.  */
   15782              : 
   15783              : static int
   15784      2483462 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
   15785              : {
   15786      2483462 :   switch (entity)
   15787              :     {
   15788              :     case X86_DIRFLAG:
   15789              :       return mode;
   15790      2065601 :     case AVX_U128:
   15791      2065601 :       return ix86_avx_u128_mode_after (mode, insn);
   15792              :     case I387_ROUNDEVEN:
   15793              :     case I387_TRUNC:
   15794              :     case I387_FLOOR:
   15795              :     case I387_CEIL:
   15796              :       return mode;
   15797            0 :     default:
   15798            0 :       gcc_unreachable ();
   15799              :     }
   15800              : }
   15801              : 
   15802              : static int
   15803          120 : ix86_dirflag_mode_entry (void)
   15804              : {
   15805              :   /* For TARGET_CLD or in the interrupt handler we can't assume
   15806              :      direction flag state at function entry.  */
   15807          120 :   if (TARGET_CLD
   15808          118 :       || cfun->machine->func_type != TYPE_NORMAL)
   15809          120 :     return X86_DIRFLAG_ANY;
   15810              : 
   15811              :   return X86_DIRFLAG_RESET;
   15812              : }
   15813              : 
   15814              : static int
   15815       122982 : ix86_avx_u128_mode_entry (void)
   15816              : {
   15817       122982 :   tree arg;
   15818              : 
   15819              :   /* Entry mode is set to AVX_U128_DIRTY if there are
   15820              :      256bit or 512bit modes used in function arguments.  */
   15821       310471 :   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
   15822       187489 :        arg = TREE_CHAIN (arg))
   15823              :     {
   15824       221457 :       rtx incoming = DECL_INCOMING_RTL (arg);
   15825              : 
   15826       221457 :       if (incoming && ix86_check_avx_upper_register (incoming))
   15827              :         return AVX_U128_DIRTY;
   15828              :     }
   15829              : 
   15830              :   return AVX_U128_CLEAN;
   15831              : }
   15832              : 
   15833              : /* Return a mode that ENTITY is assumed to be
   15834              :    switched to at function entry.  */
   15835              : 
   15836              : static int
   15837        75825 : ix86_mode_entry (int entity)
   15838              : {
   15839        75825 :   switch (entity)
   15840              :     {
   15841          120 :     case X86_DIRFLAG:
   15842          120 :       return ix86_dirflag_mode_entry ();
   15843        74570 :     case AVX_U128:
   15844        74570 :       return ix86_avx_u128_mode_entry ();
   15845              :     case I387_ROUNDEVEN:
   15846              :     case I387_TRUNC:
   15847              :     case I387_FLOOR:
   15848              :     case I387_CEIL:
   15849              :       return I387_CW_ANY;
   15850            0 :     default:
   15851            0 :       gcc_unreachable ();
   15852              :     }
   15853              : }
   15854              : 
   15855              : static int
   15856        73321 : ix86_avx_u128_mode_exit (void)
   15857              : {
   15858        73321 :   rtx reg = crtl->return_rtx;
   15859              : 
   15860              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
   15861              :      or 512 bit modes used in the function return register. */
   15862        73321 :   if (reg && ix86_check_avx_upper_register (reg))
   15863              :     return AVX_U128_DIRTY;
   15864              : 
   15865              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
   15866              :      modes used in function arguments, otherwise return AVX_U128_CLEAN.
   15867              :    */
   15868        48412 :   return ix86_avx_u128_mode_entry ();
   15869              : }
   15870              : 
   15871              : /* Return a mode that ENTITY is assumed to be
   15872              :    switched to at function exit.  */
   15873              : 
   15874              : static int
   15875        74431 : ix86_mode_exit (int entity)
   15876              : {
   15877        74431 :   switch (entity)
   15878              :     {
   15879              :     case X86_DIRFLAG:
   15880              :       return X86_DIRFLAG_ANY;
   15881        73321 :     case AVX_U128:
   15882        73321 :       return ix86_avx_u128_mode_exit ();
   15883         1076 :     case I387_ROUNDEVEN:
   15884         1076 :     case I387_TRUNC:
   15885         1076 :     case I387_FLOOR:
   15886         1076 :     case I387_CEIL:
   15887         1076 :       return I387_CW_ANY;
   15888            0 :     default:
   15889            0 :       gcc_unreachable ();
   15890              :     }
   15891              : }
   15892              : 
   15893              : static int
   15894      2175346 : ix86_mode_priority (int, int n)
   15895              : {
   15896      2175346 :   return n;
   15897              : }
   15898              : 
   15899              : /* Output code to initialize control word copies used by trunc?f?i and
   15900              :    rounding patterns.  CURRENT_MODE is set to current control word,
   15901              :    while NEW_MODE is set to new control word.  */
   15902              : 
   15903              : static void
   15904         3296 : emit_i387_cw_initialization (int mode)
   15905              : {
   15906         3296 :   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
   15907         3296 :   rtx new_mode;
   15908              : 
   15909         3296 :   enum ix86_stack_slot slot;
   15910              : 
   15911         3296 :   rtx reg = gen_reg_rtx (HImode);
   15912              : 
   15913         3296 :   emit_insn (gen_x86_fnstcw_1 (stored_mode));
   15914         3296 :   emit_move_insn (reg, copy_rtx (stored_mode));
   15915              : 
   15916         3296 :   switch (mode)
   15917              :     {
   15918            0 :     case I387_CW_ROUNDEVEN:
   15919              :       /* round to nearest */
   15920            0 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15921            0 :       slot = SLOT_CW_ROUNDEVEN;
   15922            0 :       break;
   15923              : 
   15924         3100 :     case I387_CW_TRUNC:
   15925              :       /* round toward zero (truncate) */
   15926         3100 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
   15927         3100 :       slot = SLOT_CW_TRUNC;
   15928         3100 :       break;
   15929              : 
   15930          137 :     case I387_CW_FLOOR:
   15931              :       /* round down toward -oo */
   15932          137 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15933          137 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
   15934          137 :       slot = SLOT_CW_FLOOR;
   15935          137 :       break;
   15936              : 
   15937           59 :     case I387_CW_CEIL:
   15938              :       /* round up toward +oo */
   15939           59 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15940           59 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
   15941           59 :       slot = SLOT_CW_CEIL;
   15942           59 :       break;
   15943              : 
   15944            0 :     default:
   15945            0 :       gcc_unreachable ();
   15946              :     }
   15947              : 
   15948         3296 :   gcc_assert (slot < MAX_386_STACK_LOCALS);
   15949              : 
   15950         3296 :   new_mode = assign_386_stack_local (HImode, slot);
   15951         3296 :   emit_move_insn (new_mode, reg);
   15952         3296 : }
   15953              : 
   15954              : /* Generate one or more insns to set ENTITY to MODE.  */
   15955              : 
   15956              : static void
   15957        51590 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
   15958              :                     HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
   15959              : {
   15960        51590 :   switch (entity)
   15961              :     {
   15962          265 :     case X86_DIRFLAG:
   15963          265 :       if (mode == X86_DIRFLAG_RESET)
   15964          265 :         emit_insn (gen_cld ());
   15965              :       break;
   15966        43149 :     case AVX_U128:
   15967        43149 :       if (mode == AVX_U128_CLEAN)
   15968        21843 :         ix86_expand_avx_vzeroupper ();
   15969              :       break;
   15970         8176 :     case I387_ROUNDEVEN:
   15971         8176 :     case I387_TRUNC:
   15972         8176 :     case I387_FLOOR:
   15973         8176 :     case I387_CEIL:
   15974         8176 :       if (mode != I387_CW_ANY
   15975         8176 :           && mode != I387_CW_UNINITIALIZED)
   15976         3296 :         emit_i387_cw_initialization (mode);
   15977              :       break;
   15978            0 :     default:
   15979            0 :       gcc_unreachable ();
   15980              :     }
   15981        51590 : }
   15982              : 
   15983              : /* Output code for INSN to convert a float to a signed int.  OPERANDS
   15984              :    are the insn operands.  The output may be [HSD]Imode and the input
   15985              :    operand may be [SDX]Fmode.  */
   15986              : 
   15987              : const char *
   15988         7437 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
   15989              : {
   15990         7437 :   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   15991         7437 :   bool dimode_p = GET_MODE (operands[0]) == DImode;
   15992         7437 :   int round_mode = get_attr_i387_cw (insn);
   15993              : 
   15994         7437 :   static char buf[40];
   15995         7437 :   const char *p;
   15996              : 
   15997              :   /* Jump through a hoop or two for DImode, since the hardware has no
   15998              :      non-popping instruction.  We used to do this a different way, but
   15999              :      that was somewhat fragile and broke with post-reload splitters.  */
   16000         7437 :   if ((dimode_p || fisttp) && !stack_top_dies)
   16001           25 :     output_asm_insn ("fld\t%y1", operands);
   16002              : 
   16003         7437 :   gcc_assert (STACK_TOP_P (operands[1]));
   16004         7437 :   gcc_assert (MEM_P (operands[0]));
   16005         7437 :   gcc_assert (GET_MODE (operands[1]) != TFmode);
   16006              : 
   16007         7437 :   if (fisttp)
   16008              :     return "fisttp%Z0\t%0";
   16009              : 
   16010         7436 :   strcpy (buf, "fist");
   16011              : 
   16012         7436 :   if (round_mode != I387_CW_ANY)
   16013         7392 :     output_asm_insn ("fldcw\t%3", operands);
   16014              : 
   16015         7436 :   p = "p%Z0\t%0";
   16016         7436 :   strcat (buf, p + !(stack_top_dies || dimode_p));
   16017              : 
   16018         7436 :   output_asm_insn (buf, operands);
   16019              : 
   16020         7436 :   if (round_mode != I387_CW_ANY)
   16021         7392 :     output_asm_insn ("fldcw\t%2", operands);
   16022              : 
   16023              :   return "";
   16024              : }
   16025              : 
   16026              : /* Output code for x87 ffreep insn.  The OPNO argument, which may only
   16027              :    have the values zero or one, indicates the ffreep insn's operand
   16028              :    from the OPERANDS array.  */
   16029              : 
   16030              : static const char *
   16031       275113 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
   16032              : {
   16033            0 :   if (TARGET_USE_FFREEP)
   16034              : #ifdef HAVE_AS_IX86_FFREEP
   16035            0 :     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
   16036              : #else
   16037              :     {
   16038              :       static char retval[32];
   16039              :       int regno = REGNO (operands[opno]);
   16040              : 
   16041              :       gcc_assert (STACK_REGNO_P (regno));
   16042              : 
   16043              :       regno -= FIRST_STACK_REG;
   16044              : 
   16045              :       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
   16046              :       return retval;
   16047              :     }
   16048              : #endif
   16049              : 
   16050            0 :   return opno ? "fstp\t%y1" : "fstp\t%y0";
   16051              : }
   16052              : 
   16053              : 
   16054              : /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
   16055              :    should be used.  UNORDERED_P is true when fucom should be used.  */
   16056              : 
   16057              : const char *
   16058       107670 : output_fp_compare (rtx_insn *insn, rtx *operands,
   16059              :                    bool eflags_p, bool unordered_p)
   16060              : {
   16061       107670 :   rtx *xops = eflags_p ? &operands[0] : &operands[1];
   16062       107670 :   bool stack_top_dies;
   16063              : 
   16064       107670 :   static char buf[40];
   16065       107670 :   const char *p;
   16066              : 
   16067       107670 :   gcc_assert (STACK_TOP_P (xops[0]));
   16068              : 
   16069       107670 :   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   16070              : 
   16071       107670 :   if (eflags_p)
   16072              :     {
   16073       107670 :       p = unordered_p ? "fucomi" : "fcomi";
   16074       107670 :       strcpy (buf, p);
   16075              : 
   16076       107670 :       p = "p\t{%y1, %0|%0, %y1}";
   16077       107670 :       strcat (buf, p + !stack_top_dies);
   16078              : 
   16079       107670 :       return buf;
   16080              :     }
   16081              : 
   16082            0 :   if (STACK_REG_P (xops[1])
   16083            0 :       && stack_top_dies
   16084            0 :       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
   16085              :     {
   16086            0 :       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
   16087              : 
   16088              :       /* If both the top of the 387 stack die, and the other operand
   16089              :          is also a stack register that dies, then this must be a
   16090              :          `fcompp' float compare.  */
   16091            0 :       p = unordered_p ? "fucompp" : "fcompp";
   16092            0 :       strcpy (buf, p);
   16093              :     }
   16094            0 :   else if (const0_operand (xops[1], VOIDmode))
   16095              :     {
   16096            0 :       gcc_assert (!unordered_p);
   16097            0 :       strcpy (buf, "ftst");
   16098              :     }
   16099              :   else
   16100              :     {
   16101            0 :       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
   16102              :         {
   16103            0 :           gcc_assert (!unordered_p);
   16104              :           p = "ficom";
   16105              :         }
   16106              :       else
   16107            0 :         p = unordered_p ? "fucom" : "fcom";
   16108              : 
   16109            0 :       strcpy (buf, p);
   16110              : 
   16111            0 :       p = "p%Z2\t%y2";
   16112            0 :       strcat (buf, p + !stack_top_dies);
   16113              :     }
   16114              : 
   16115            0 :   output_asm_insn (buf, operands);
   16116            0 :   return "fnstsw\t%0";
   16117              : }
   16118              : 
   16119              : void
   16120       112693 : ix86_output_addr_vec_elt (FILE *file, int value)
   16121              : {
   16122       112693 :   const char *directive = ASM_LONG;
   16123              : 
   16124              : #ifdef ASM_QUAD
   16125       112693 :   if (TARGET_LP64)
   16126       101051 :     directive = ASM_QUAD;
   16127              : #else
   16128              :   gcc_assert (!TARGET_64BIT);
   16129              : #endif
   16130              : 
   16131       112693 :   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
   16132       112693 : }
   16133              : 
   16134              : void
   16135        25762 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   16136              : {
   16137        25762 :   const char *directive = ASM_LONG;
   16138              : 
   16139              : #ifdef ASM_QUAD
   16140        38563 :   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
   16141              :     directive = ASM_QUAD;
   16142              : #else
   16143              :   gcc_assert (!TARGET_64BIT);
   16144              : #endif
   16145              :   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
   16146        25762 :   if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
   16147        12801 :     fprintf (file, "%s%s%d-%s%d\n",
   16148              :              directive, LPREFIX, value, LPREFIX, rel);
   16149              : #if TARGET_MACHO
   16150              :   else if (TARGET_MACHO)
   16151              :     {
   16152              :       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
   16153              :       machopic_output_function_base_name (file);
   16154              :       putc ('\n', file);
   16155              :     }
   16156              : #endif
   16157        12961 :   else if (HAVE_AS_GOTOFF_IN_DATA)
   16158        12961 :     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
   16159              :   else
   16160              :     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
   16161              :                  GOT_SYMBOL_NAME, LPREFIX, value);
   16162        25762 : }
   16163              : 
   16164              : #define LEA_MAX_STALL (3)
   16165              : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
   16166              : 
   16167              : /* Increase given DISTANCE in half-cycles according to
   16168              :    dependencies between PREV and NEXT instructions.
   16169              :    Add 1 half-cycle if there is no dependency and
   16170              :    go to next cycle if there is some dependecy.  */
   16171              : 
   16172              : static unsigned int
   16173         2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
   16174              : {
   16175         2129 :   df_ref def, use;
   16176              : 
   16177         2129 :   if (!prev || !next)
   16178          748 :     return distance + (distance & 1) + 2;
   16179              : 
   16180         1381 :   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
   16181          226 :     return distance + 1;
   16182              : 
   16183         1920 :   FOR_EACH_INSN_USE (use, next)
   16184         2448 :     FOR_EACH_INSN_DEF (def, prev)
   16185         1683 :       if (!DF_REF_IS_ARTIFICIAL (def)
   16186         1683 :           && DF_REF_REGNO (use) == DF_REF_REGNO (def))
   16187          735 :         return distance + (distance & 1) + 2;
   16188              : 
   16189          420 :   return distance + 1;
   16190              : }
   16191              : 
   16192              : /* Function checks if instruction INSN defines register number
   16193              :    REGNO1 or REGNO2.  */
   16194              : 
   16195              : bool
   16196         2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
   16197              :                   rtx_insn *insn)
   16198              : {
   16199         2073 :   df_ref def;
   16200              : 
   16201         3739 :   FOR_EACH_INSN_DEF (def, insn)
   16202         2070 :     if (DF_REF_REG_DEF_P (def)
   16203         2070 :         && !DF_REF_IS_ARTIFICIAL (def)
   16204         2070 :         && (regno1 == DF_REF_REGNO (def)
   16205         1682 :             || regno2 == DF_REF_REGNO (def)))
   16206              :       return true;
   16207              : 
   16208              :   return false;
   16209              : }
   16210              : 
   16211              : /* Function checks if instruction INSN uses register number
   16212              :    REGNO as a part of address expression.  */
   16213              : 
   16214              : static bool
   16215         1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
   16216              : {
   16217         1182 :   df_ref use;
   16218              : 
   16219         2475 :   FOR_EACH_INSN_USE (use, insn)
   16220         1384 :     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
   16221              :       return true;
   16222              : 
   16223              :   return false;
   16224              : }
   16225              : 
   16226              : /* Search backward for non-agu definition of register number REGNO1
   16227              :    or register number REGNO2 in basic block starting from instruction
   16228              :    START up to head of basic block or instruction INSN.
   16229              : 
   16230              :    Function puts true value into *FOUND var if definition was found
   16231              :    and false otherwise.
   16232              : 
   16233              :    Distance in half-cycles between START and found instruction or head
   16234              :    of BB is added to DISTANCE and returned.  */
   16235              : 
   16236              : static int
   16237          624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
   16238              :                                rtx_insn *insn, int distance,
   16239              :                                rtx_insn *start, bool *found)
   16240              : {
   16241          624 :   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
   16242          624 :   rtx_insn *prev = start;
   16243          624 :   rtx_insn *next = NULL;
   16244              : 
   16245          624 :   *found = false;
   16246              : 
   16247          624 :   while (prev
   16248         1861 :          && prev != insn
   16249         1861 :          && distance < LEA_SEARCH_THRESHOLD)
   16250              :     {
   16251         1660 :       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
   16252              :         {
   16253          947 :           distance = increase_distance (prev, next, distance);
   16254          947 :           if (insn_defines_reg (regno1, regno2, prev))
   16255              :             {
   16256          243 :               if (recog_memoized (prev) < 0
   16257          243 :                   || get_attr_type (prev) != TYPE_LEA)
   16258              :                 {
   16259          200 :                   *found = true;
   16260          200 :                   return distance;
   16261              :                 }
   16262              :             }
   16263              : 
   16264              :           next = prev;
   16265              :         }
   16266         1460 :       if (prev == BB_HEAD (bb))
   16267              :         break;
   16268              : 
   16269         1237 :       prev = PREV_INSN (prev);
   16270              :     }
   16271              : 
   16272              :   return distance;
   16273              : }
   16274              : 
   16275              : /* Search backward for non-agu definition of register number REGNO1
   16276              :    or register number REGNO2 in INSN's basic block until
   16277              :    1. Pass LEA_SEARCH_THRESHOLD instructions, or
   16278              :    2. Reach neighbor BBs boundary, or
   16279              :    3. Reach agu definition.
   16280              :    Returns the distance between the non-agu definition point and INSN.
   16281              :    If no definition point, returns -1.  */
   16282              : 
   16283              : static int
   16284          429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
   16285              :                          rtx_insn *insn)
   16286              : {
   16287          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16288          429 :   int distance = 0;
   16289          429 :   bool found = false;
   16290              : 
   16291          429 :   if (insn != BB_HEAD (bb))
   16292          429 :     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
   16293              :                                               distance, PREV_INSN (insn),
   16294              :                                               &found);
   16295              : 
   16296          429 :   if (!found && distance < LEA_SEARCH_THRESHOLD)
   16297              :     {
   16298          167 :       edge e;
   16299          167 :       edge_iterator ei;
   16300          167 :       bool simple_loop = false;
   16301              : 
   16302          336 :       FOR_EACH_EDGE (e, ei, bb->preds)
   16303          206 :         if (e->src == bb)
   16304              :           {
   16305              :             simple_loop = true;
   16306              :             break;
   16307              :           }
   16308              : 
   16309          167 :       if (simple_loop)
   16310           37 :         distance = distance_non_agu_define_in_bb (regno1, regno2,
   16311              :                                                   insn, distance,
   16312           37 :                                                   BB_END (bb), &found);
   16313              :       else
   16314              :         {
   16315          130 :           int shortest_dist = -1;
   16316          130 :           bool found_in_bb = false;
   16317              : 
   16318          288 :           FOR_EACH_EDGE (e, ei, bb->preds)
   16319              :             {
   16320          158 :               int bb_dist
   16321          316 :                 = distance_non_agu_define_in_bb (regno1, regno2,
   16322              :                                                  insn, distance,
   16323          158 :                                                  BB_END (e->src),
   16324              :                                                  &found_in_bb);
   16325          158 :               if (found_in_bb)
   16326              :                 {
   16327           24 :                   if (shortest_dist < 0)
   16328              :                     shortest_dist = bb_dist;
   16329            0 :                   else if (bb_dist > 0)
   16330            0 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16331              : 
   16332           24 :                   found = true;
   16333              :                 }
   16334              :             }
   16335              : 
   16336          130 :           distance = shortest_dist;
   16337              :         }
   16338              :     }
   16339              : 
   16340          429 :   if (!found)
   16341              :     return -1;
   16342              : 
   16343          200 :   return distance >> 1;
   16344              : }
   16345              : 
   16346              : /* Return the distance in half-cycles between INSN and the next
   16347              :    insn that uses register number REGNO in memory address added
   16348              :    to DISTANCE.  Return -1 if REGNO0 is set.
   16349              : 
   16350              :    Put true value into *FOUND if register usage was found and
   16351              :    false otherwise.
   16352              :    Put true value into *REDEFINED if register redefinition was
   16353              :    found and false otherwise.  */
   16354              : 
   16355              : static int
   16356          767 : distance_agu_use_in_bb (unsigned int regno,
   16357              :                         rtx_insn *insn, int distance, rtx_insn *start,
   16358              :                         bool *found, bool *redefined)
   16359              : {
   16360          767 :   basic_block bb = NULL;
   16361          767 :   rtx_insn *next = start;
   16362          767 :   rtx_insn *prev = NULL;
   16363              : 
   16364          767 :   *found = false;
   16365          767 :   *redefined = false;
   16366              : 
   16367          767 :   if (start != NULL_RTX)
   16368              :     {
   16369          750 :       bb = BLOCK_FOR_INSN (start);
   16370          750 :       if (start != BB_HEAD (bb))
   16371              :         /* If insn and start belong to the same bb, set prev to insn,
   16372              :            so the call to increase_distance will increase the distance
   16373              :            between insns by 1.  */
   16374          412 :         prev = insn;
   16375              :     }
   16376              : 
   16377         2566 :   while (next
   16378         2566 :          && next != insn
   16379         2566 :          && distance < LEA_SEARCH_THRESHOLD)
   16380              :     {
   16381         2378 :       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
   16382              :         {
   16383         1182 :           distance = increase_distance(prev, next, distance);
   16384         1182 :           if (insn_uses_reg_mem (regno, next))
   16385              :             {
   16386              :               /* Return DISTANCE if OP0 is used in memory
   16387              :                  address in NEXT.  */
   16388           91 :               *found = true;
   16389           91 :               return distance;
   16390              :             }
   16391              : 
   16392         1091 :           if (insn_defines_reg (regno, INVALID_REGNUM, next))
   16393              :             {
   16394              :               /* Return -1 if OP0 is set in NEXT.  */
   16395          156 :               *redefined = true;
   16396          156 :               return -1;
   16397              :             }
   16398              : 
   16399              :           prev = next;
   16400              :         }
   16401              : 
   16402         2131 :       if (next == BB_END (bb))
   16403              :         break;
   16404              : 
   16405         1799 :       next = NEXT_INSN (next);
   16406              :     }
   16407              : 
   16408              :   return distance;
   16409              : }
   16410              : 
   16411              : /* Return the distance between INSN and the next insn that uses
   16412              :    register number REGNO0 in memory address.  Return -1 if no such
   16413              :    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
   16414              : 
   16415              : static int
   16416          429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
   16417              : {
   16418          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16419          429 :   int distance = 0;
   16420          429 :   bool found = false;
   16421          429 :   bool redefined = false;
   16422              : 
   16423          429 :   if (insn != BB_END (bb))
   16424          412 :     distance = distance_agu_use_in_bb (regno0, insn, distance,
   16425              :                                        NEXT_INSN (insn),
   16426              :                                        &found, &redefined);
   16427              : 
   16428          429 :   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
   16429              :     {
   16430          250 :       edge e;
   16431          250 :       edge_iterator ei;
   16432          250 :       bool simple_loop = false;
   16433              : 
   16434          535 :       FOR_EACH_EDGE (e, ei, bb->succs)
   16435          355 :         if (e->dest == bb)
   16436              :           {
   16437              :             simple_loop = true;
   16438              :             break;
   16439              :           }
   16440              : 
   16441          250 :       if (simple_loop)
   16442           70 :         distance = distance_agu_use_in_bb (regno0, insn,
   16443              :                                            distance, BB_HEAD (bb),
   16444              :                                            &found, &redefined);
   16445              :       else
   16446              :         {
   16447          180 :           int shortest_dist = -1;
   16448          180 :           bool found_in_bb = false;
   16449          180 :           bool redefined_in_bb = false;
   16450              : 
   16451          465 :           FOR_EACH_EDGE (e, ei, bb->succs)
   16452              :             {
   16453          285 :               int bb_dist
   16454          570 :                 = distance_agu_use_in_bb (regno0, insn,
   16455          285 :                                           distance, BB_HEAD (e->dest),
   16456              :                                           &found_in_bb, &redefined_in_bb);
   16457          285 :               if (found_in_bb)
   16458              :                 {
   16459           17 :                   if (shortest_dist < 0)
   16460              :                     shortest_dist = bb_dist;
   16461            2 :                   else if (bb_dist > 0)
   16462            2 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16463              : 
   16464           17 :                   found = true;
   16465              :                 }
   16466              :             }
   16467              : 
   16468          180 :           distance = shortest_dist;
   16469              :         }
   16470              :     }
   16471              : 
   16472          429 :   if (!found || redefined)
   16473              :     return -1;
   16474              : 
   16475           89 :   return distance >> 1;
   16476              : }
   16477              : 
   16478              : /* Define this macro to tune LEA priority vs ADD, it take effect when
   16479              :    there is a dilemma of choosing LEA or ADD
   16480              :    Negative value: ADD is more preferred than LEA
   16481              :    Zero: Neutral
   16482              :    Positive value: LEA is more preferred than ADD.  */
   16483              : #define IX86_LEA_PRIORITY 0
   16484              : 
   16485              : /* Return true if usage of lea INSN has performance advantage
   16486              :    over a sequence of instructions.  Instructions sequence has
   16487              :    SPLIT_COST cycles higher latency than lea latency.  */
   16488              : 
   16489              : static bool
   16490         1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
   16491              :                       unsigned int regno2, int split_cost, bool has_scale)
   16492              : {
   16493         1629 :   int dist_define, dist_use;
   16494              : 
   16495              :   /* For Atom processors newer than Bonnell, if using a 2-source or
   16496              :      3-source LEA for non-destructive destination purposes, or due to
   16497              :      wanting ability to use SCALE, the use of LEA is justified.  */
   16498         1629 :   if (!TARGET_CPU_P (BONNELL))
   16499              :     {
   16500         1200 :       if (has_scale)
   16501              :         return true;
   16502         1181 :       if (split_cost < 1)
   16503              :         return false;
   16504          406 :       if (regno0 == regno1 || regno0 == regno2)
   16505              :         return false;
   16506              :       return true;
   16507              :     }
   16508              : 
   16509              :   /* Remember recog_data content.  */
   16510          429 :   struct recog_data_d recog_data_save = recog_data;
   16511              : 
   16512          429 :   dist_define = distance_non_agu_define (regno1, regno2, insn);
   16513          429 :   dist_use = distance_agu_use (regno0, insn);
   16514              : 
   16515              :   /* distance_non_agu_define can call get_attr_type which can call
   16516              :      recog_memoized, restore recog_data back to previous content.  */
   16517          429 :   recog_data = recog_data_save;
   16518              : 
   16519          429 :   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
   16520              :     {
   16521              :       /* If there is no non AGU operand definition, no AGU
   16522              :          operand usage and split cost is 0 then both lea
   16523              :          and non lea variants have same priority.  Currently
   16524              :          we prefer lea for 64 bit code and non lea on 32 bit
   16525              :          code.  */
   16526          232 :       if (dist_use < 0 && split_cost == 0)
   16527           98 :         return TARGET_64BIT || IX86_LEA_PRIORITY;
   16528              :       else
   16529              :         return true;
   16530              :     }
   16531              : 
   16532              :   /* With longer definitions distance lea is more preferable.
   16533              :      Here we change it to take into account splitting cost and
   16534              :      lea priority.  */
   16535          197 :   dist_define += split_cost + IX86_LEA_PRIORITY;
   16536              : 
   16537              :   /* If there is no use in memory addess then we just check
   16538              :      that split cost exceeds AGU stall.  */
   16539          197 :   if (dist_use < 0)
   16540          193 :     return dist_define > LEA_MAX_STALL;
   16541              : 
   16542              :   /* If this insn has both backward non-agu dependence and forward
   16543              :      agu dependence, the one with short distance takes effect.  */
   16544            4 :   return dist_define >= dist_use;
   16545              : }
   16546              : 
   16547              : /* Return true if we need to split op0 = op1 + op2 into a sequence of
   16548              :    move and add to avoid AGU stalls.  */
   16549              : 
   16550              : bool
   16551      9142047 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
   16552              : {
   16553      9142047 :   unsigned int regno0, regno1, regno2;
   16554              : 
   16555              :   /* Check if we need to optimize.  */
   16556      9142047 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16557      9141232 :     return false;
   16558              : 
   16559          815 :   regno0 = true_regnum (operands[0]);
   16560          815 :   regno1 = true_regnum (operands[1]);
   16561          815 :   regno2 = true_regnum (operands[2]);
   16562              : 
   16563              :   /* We need to split only adds with non destructive
   16564              :      destination operand.  */
   16565          815 :   if (regno0 == regno1 || regno0 == regno2)
   16566              :     return false;
   16567              :   else
   16568          245 :     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
   16569              : }
   16570              : 
   16571              : /* Return true if we should emit lea instruction instead of mov
   16572              :    instruction.  */
   16573              : 
   16574              : bool
   16575     29560590 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
   16576              : {
   16577     29560590 :   unsigned int regno0, regno1;
   16578              : 
   16579              :   /* Check if we need to optimize.  */
   16580     29560590 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16581     29558304 :     return false;
   16582              : 
   16583              :   /* Use lea for reg to reg moves only.  */
   16584         2286 :   if (!REG_P (operands[0]) || !REG_P (operands[1]))
   16585              :     return false;
   16586              : 
   16587          464 :   regno0 = true_regnum (operands[0]);
   16588          464 :   regno1 = true_regnum (operands[1]);
   16589              : 
   16590          464 :   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
   16591              : }
   16592              : 
   16593              : /* Return true if we need to split lea into a sequence of
   16594              :    instructions to avoid AGU stalls during peephole2. */
   16595              : 
   16596              : bool
   16597     11166010 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
   16598              : {
   16599     11166010 :   unsigned int regno0, regno1, regno2;
   16600     11166010 :   int split_cost;
   16601     11166010 :   struct ix86_address parts;
   16602     11166010 :   int ok;
   16603              : 
   16604              :   /* The "at least two components" test below might not catch simple
   16605              :      move or zero extension insns if parts.base is non-NULL and parts.disp
   16606              :      is const0_rtx as the only components in the address, e.g. if the
   16607              :      register is %rbp or %r13.  As this test is much cheaper and moves or
   16608              :      zero extensions are the common case, do this check first.  */
   16609     11166010 :   if (REG_P (operands[1])
   16610     11166010 :       || (SImode_address_operand (operands[1], VOIDmode)
   16611       148384 :           && REG_P (XEXP (operands[1], 0))))
   16612      4091817 :     return false;
   16613              : 
   16614      7074193 :   ok = ix86_decompose_address (operands[1], &parts);
   16615      7074193 :   gcc_assert (ok);
   16616              : 
   16617              :   /* There should be at least two components in the address.  */
   16618      7074193 :   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
   16619      7074193 :       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
   16620              :     return false;
   16621              : 
   16622              :   /* We should not split into add if non legitimate pic
   16623              :      operand is used as displacement. */
   16624      2684263 :   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
   16625              :     return false;
   16626              : 
   16627      2634261 :   regno0 = true_regnum (operands[0]) ;
   16628      2634261 :   regno1 = INVALID_REGNUM;
   16629      2634261 :   regno2 = INVALID_REGNUM;
   16630              : 
   16631      2634261 :   if (parts.base)
   16632      2559485 :     regno1 = true_regnum (parts.base);
   16633      2634261 :   if (parts.index)
   16634       486018 :     regno2 = true_regnum (parts.index);
   16635              : 
   16636              :   /* Use add for a = a + b and a = b + a since it is faster and shorter
   16637              :      than lea for most processors.  For the processors like BONNELL, if
   16638              :      the destination register of LEA holds an actual address which will
   16639              :      be used soon, LEA is better and otherwise ADD is better.  */
   16640      2634261 :   if (!TARGET_CPU_P (BONNELL)
   16641      2634132 :       && parts.scale == 1
   16642      2389693 :       && (!parts.disp || parts.disp == const0_rtx)
   16643       176900 :       && (regno0 == regno1 || regno0 == regno2))
   16644              :     return true;
   16645              : 
   16646              :   /* Split with -Oz if the encoding requires fewer bytes.  */
   16647      2628376 :   if (optimize_size > 1
   16648           27 :       && parts.scale > 1
   16649            4 :       && !parts.base
   16650            4 :       && (!parts.disp || parts.disp == const0_rtx))
   16651              :     return true;
   16652              : 
   16653              :   /* Check we need to optimize.  */
   16654      2628372 :   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
   16655      2628031 :     return false;
   16656              : 
   16657          341 :   split_cost = 0;
   16658              : 
   16659              :   /* Compute how many cycles we will add to execution time
   16660              :      if split lea into a sequence of instructions.  */
   16661          341 :   if (parts.base || parts.index)
   16662              :     {
   16663              :       /* Have to use mov instruction if non desctructive
   16664              :          destination form is used.  */
   16665          341 :       if (regno1 != regno0 && regno2 != regno0)
   16666          266 :         split_cost += 1;
   16667              : 
   16668              :       /* Have to add index to base if both exist.  */
   16669          341 :       if (parts.base && parts.index)
   16670           54 :         split_cost += 1;
   16671              : 
   16672              :       /* Have to use shift and adds if scale is 2 or greater.  */
   16673          341 :       if (parts.scale > 1)
   16674              :         {
   16675           29 :           if (regno0 != regno1)
   16676           23 :             split_cost += 1;
   16677            6 :           else if (regno2 == regno0)
   16678            0 :             split_cost += 4;
   16679              :           else
   16680            6 :             split_cost += parts.scale;
   16681              :         }
   16682              : 
   16683              :       /* Have to use add instruction with immediate if
   16684              :          disp is non zero.  */
   16685          341 :       if (parts.disp && parts.disp != const0_rtx)
   16686          280 :         split_cost += 1;
   16687              : 
   16688              :       /* Subtract the price of lea.  */
   16689          341 :       split_cost -= 1;
   16690              :     }
   16691              : 
   16692          341 :   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
   16693          341 :                                 parts.scale > 1);
   16694              : }
   16695              : 
   16696              : /* Return true if it is ok to optimize an ADD operation to LEA
   16697              :    operation to avoid flag register consumation.  For most processors,
   16698              :    ADD is faster than LEA.  For the processors like BONNELL, if the
   16699              :    destination register of LEA holds an actual address which will be
   16700              :    used soon, LEA is better and otherwise ADD is better.  */
   16701              : 
   16702              : bool
   16703      9199993 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
   16704              : {
   16705      9199993 :   unsigned int regno0 = true_regnum (operands[0]);
   16706      9199993 :   unsigned int regno1 = true_regnum (operands[1]);
   16707      9199993 :   unsigned int regno2 = true_regnum (operands[2]);
   16708              : 
   16709              :   /* If a = b + c, (a!=b && a!=c), must use lea form. */
   16710      9199993 :   if (regno0 != regno1 && regno0 != regno2)
   16711              :     return true;
   16712              : 
   16713      7163545 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16714      7162966 :     return false;
   16715              : 
   16716          579 :   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
   16717              : }
   16718              : 
   16719              : /* Return true if destination reg of SET_BODY is shift count of
   16720              :    USE_BODY.  */
   16721              : 
   16722              : static bool
   16723           89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
   16724              : {
   16725           89 :   rtx set_dest;
   16726           89 :   rtx shift_rtx;
   16727           89 :   int i;
   16728              : 
   16729              :   /* Retrieve destination of SET_BODY.  */
   16730           89 :   switch (GET_CODE (set_body))
   16731              :     {
   16732           73 :     case SET:
   16733           73 :       set_dest = SET_DEST (set_body);
   16734           73 :       if (!set_dest || !REG_P (set_dest))
   16735              :         return false;
   16736           72 :       break;
   16737            8 :     case PARALLEL:
   16738           24 :       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
   16739           16 :         if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
   16740              :                                           use_body))
   16741              :           return true;
   16742              :       /* FALLTHROUGH */
   16743              :     default:
   16744              :       return false;
   16745              :     }
   16746              : 
   16747              :   /* Retrieve shift count of USE_BODY.  */
   16748           72 :   switch (GET_CODE (use_body))
   16749              :     {
   16750           24 :     case SET:
   16751           24 :       shift_rtx = XEXP (use_body, 1);
   16752           24 :       break;
   16753           24 :     case PARALLEL:
   16754           72 :       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
   16755           48 :         if (ix86_dep_by_shift_count_body (set_body,
   16756           48 :                                           XVECEXP (use_body, 0, i)))
   16757              :           return true;
   16758              :       /* FALLTHROUGH */
   16759              :     default:
   16760              :       return false;
   16761              :     }
   16762              : 
   16763           24 :   if (shift_rtx
   16764           24 :       && (GET_CODE (shift_rtx) == ASHIFT
   16765           21 :           || GET_CODE (shift_rtx) == LSHIFTRT
   16766            5 :           || GET_CODE (shift_rtx) == ASHIFTRT
   16767            0 :           || GET_CODE (shift_rtx) == ROTATE
   16768            0 :           || GET_CODE (shift_rtx) == ROTATERT))
   16769              :     {
   16770           24 :       rtx shift_count = XEXP (shift_rtx, 1);
   16771              : 
   16772              :       /* Return true if shift count is dest of SET_BODY.  */
   16773           24 :       if (REG_P (shift_count))
   16774              :         {
   16775              :           /* Add check since it can be invoked before register
   16776              :              allocation in pre-reload schedule.  */
   16777            0 :           if (reload_completed
   16778            0 :               && true_regnum (set_dest) == true_regnum (shift_count))
   16779              :             return true;
   16780            0 :           else if (REGNO(set_dest) == REGNO(shift_count))
   16781              :             return true;
   16782              :         }
   16783              :     }
   16784              : 
   16785              :   return false;
   16786              : }
   16787              : 
   16788              : /* Return true if destination reg of SET_INSN is shift count of
   16789              :    USE_INSN.  */
   16790              : 
   16791              : bool
   16792           25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
   16793              : {
   16794           25 :   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
   16795           25 :                                        PATTERN (use_insn));
   16796              : }
   16797              : 
   16798              : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
   16799              :    are ok, keeping in mind the possible movddup alternative.  */
   16800              : 
   16801              : bool
   16802        92334 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
   16803              : {
   16804        92334 :   if (MEM_P (operands[0]))
   16805         2025 :     return rtx_equal_p (operands[0], operands[1 + high]);
   16806        90309 :   if (MEM_P (operands[1]) && MEM_P (operands[2]))
   16807         1009 :     return false;
   16808              :   return true;
   16809              : }
   16810              : 
   16811              : /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
   16812              :    then replicate the value for all elements of the vector
   16813              :    register.  */
   16814              : 
   16815              : rtx
   16816        74736 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
   16817              : {
   16818        74736 :   int i, n_elt;
   16819        74736 :   rtvec v;
   16820        74736 :   machine_mode scalar_mode;
   16821              : 
   16822        74736 :   switch (mode)
   16823              :     {
   16824         1280 :     case E_V64QImode:
   16825         1280 :     case E_V32QImode:
   16826         1280 :     case E_V16QImode:
   16827         1280 :     case E_V32HImode:
   16828         1280 :     case E_V16HImode:
   16829         1280 :     case E_V8HImode:
   16830         1280 :     case E_V16SImode:
   16831         1280 :     case E_V8SImode:
   16832         1280 :     case E_V4SImode:
   16833         1280 :     case E_V2SImode:
   16834         1280 :     case E_V8DImode:
   16835         1280 :     case E_V4DImode:
   16836         1280 :     case E_V2DImode:
   16837         1280 :       gcc_assert (vect);
   16838              :       /* FALLTHRU */
   16839        74736 :     case E_V2HFmode:
   16840        74736 :     case E_V4HFmode:
   16841        74736 :     case E_V8HFmode:
   16842        74736 :     case E_V16HFmode:
   16843        74736 :     case E_V32HFmode:
   16844        74736 :     case E_V16SFmode:
   16845        74736 :     case E_V8SFmode:
   16846        74736 :     case E_V4SFmode:
   16847        74736 :     case E_V2SFmode:
   16848        74736 :     case E_V8DFmode:
   16849        74736 :     case E_V4DFmode:
   16850        74736 :     case E_V2DFmode:
   16851        74736 :     case E_V32BFmode:
   16852        74736 :     case E_V16BFmode:
   16853        74736 :     case E_V8BFmode:
   16854        74736 :     case E_V4BFmode:
   16855        74736 :     case E_V2BFmode:
   16856        74736 :       n_elt = GET_MODE_NUNITS (mode);
   16857        74736 :       v = rtvec_alloc (n_elt);
   16858        74736 :       scalar_mode = GET_MODE_INNER (mode);
   16859              : 
   16860        74736 :       RTVEC_ELT (v, 0) = value;
   16861              : 
   16862       231856 :       for (i = 1; i < n_elt; ++i)
   16863       157120 :         RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
   16864              : 
   16865        74736 :       return gen_rtx_CONST_VECTOR (mode, v);
   16866              : 
   16867            0 :     default:
   16868            0 :       gcc_unreachable ();
   16869              :     }
   16870              : }
   16871              : 
   16872              : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
   16873              :    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
   16874              :    for an SSE register.  If VECT is true, then replicate the mask for
   16875              :    all elements of the vector register.  If INVERT is true, then create
   16876              :    a mask excluding the sign bit.  */
   16877              : 
   16878              : rtx
   16879        76117 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
   16880              : {
   16881        76117 :   machine_mode vec_mode, imode;
   16882        76117 :   wide_int w;
   16883        76117 :   rtx mask, v;
   16884              : 
   16885        76117 :   switch (mode)
   16886              :     {
   16887              :     case E_V2HFmode:
   16888              :     case E_V4HFmode:
   16889              :     case E_V8HFmode:
   16890              :     case E_V16HFmode:
   16891              :     case E_V32HFmode:
   16892              :     case E_V32BFmode:
   16893              :     case E_V16BFmode:
   16894              :     case E_V8BFmode:
   16895              :     case E_V4BFmode:
   16896              :     case E_V2BFmode:
   16897              :       vec_mode = mode;
   16898              :       imode = HImode;
   16899              :       break;
   16900              : 
   16901        34132 :     case E_V16SImode:
   16902        34132 :     case E_V16SFmode:
   16903        34132 :     case E_V8SImode:
   16904        34132 :     case E_V4SImode:
   16905        34132 :     case E_V8SFmode:
   16906        34132 :     case E_V4SFmode:
   16907        34132 :     case E_V2SFmode:
   16908        34132 :     case E_V2SImode:
   16909        34132 :       vec_mode = mode;
   16910        34132 :       imode = SImode;
   16911        34132 :       break;
   16912              : 
   16913        39126 :     case E_V8DImode:
   16914        39126 :     case E_V4DImode:
   16915        39126 :     case E_V2DImode:
   16916        39126 :     case E_V8DFmode:
   16917        39126 :     case E_V4DFmode:
   16918        39126 :     case E_V2DFmode:
   16919        39126 :       vec_mode = mode;
   16920        39126 :       imode = DImode;
   16921        39126 :       break;
   16922              : 
   16923         2360 :     case E_TImode:
   16924         2360 :     case E_TFmode:
   16925         2360 :       vec_mode = VOIDmode;
   16926         2360 :       imode = TImode;
   16927         2360 :       break;
   16928              : 
   16929            0 :     default:
   16930            0 :       gcc_unreachable ();
   16931              :     }
   16932              : 
   16933        76117 :   machine_mode inner_mode = GET_MODE_INNER (mode);
   16934       152234 :   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
   16935       152234 :                            GET_MODE_BITSIZE (inner_mode));
   16936        76117 :   if (invert)
   16937        39835 :     w = wi::bit_not (w);
   16938              : 
   16939              :   /* Force this value into the low part of a fp vector constant.  */
   16940        76117 :   mask = immed_wide_int_const (w, imode);
   16941        76117 :   mask = gen_lowpart (inner_mode, mask);
   16942              : 
   16943        76117 :   if (vec_mode == VOIDmode)
   16944         2360 :     return force_reg (inner_mode, mask);
   16945              : 
   16946        73757 :   v = ix86_build_const_vector (vec_mode, vect, mask);
   16947        73757 :   return force_reg (vec_mode, v);
   16948        76117 : }
   16949              : 
   16950              : /* Return HOST_WIDE_INT for const vector OP in MODE.  */
   16951              : 
   16952              : HOST_WIDE_INT
   16953       136383 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
   16954              : {
   16955       288980 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   16956            0 :     gcc_unreachable ();
   16957              : 
   16958       136383 :   int nunits = GET_MODE_NUNITS (mode);
   16959       272766 :   wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
   16960       136383 :   machine_mode innermode = GET_MODE_INNER (mode);
   16961       136383 :   unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
   16962              : 
   16963       136383 :   switch (mode)
   16964              :     {
   16965              :     case E_V2QImode:
   16966              :     case E_V4QImode:
   16967              :     case E_V2HImode:
   16968              :     case E_V8QImode:
   16969              :     case E_V4HImode:
   16970              :     case E_V2SImode:
   16971       468471 :       for (int i = 0; i < nunits; ++i)
   16972              :         {
   16973       336646 :           int v = INTVAL (XVECEXP (op, 0, i));
   16974       336646 :           wide_int wv = wi::shwi (v, innermode_bits);
   16975       336646 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16976       336646 :         }
   16977              :       break;
   16978           88 :     case E_V1SImode:
   16979           88 :     case E_V1DImode:
   16980           88 :       op = CONST_VECTOR_ELT (op, 0);
   16981           88 :       return INTVAL (op);
   16982              :     case E_V2HFmode:
   16983              :     case E_V2BFmode:
   16984              :     case E_V4HFmode:
   16985              :     case E_V4BFmode:
   16986              :     case E_V2SFmode:
   16987        13432 :       for (int i = 0; i < nunits; ++i)
   16988              :         {
   16989         8962 :           rtx x = XVECEXP (op, 0, i);
   16990         8962 :           int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   16991         8962 :                                   REAL_MODE_FORMAT (innermode));
   16992         8962 :           wide_int wv = wi::shwi (v, innermode_bits);
   16993         8962 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16994         8962 :         }
   16995              :       break;
   16996            0 :     default:
   16997            0 :       gcc_unreachable ();
   16998              :     }
   16999              : 
   17000       136295 :   return val.to_shwi ();
   17001       136383 : }
   17002              : 
   17003           32 : int ix86_get_flags_cc (rtx_code code)
   17004              : {
   17005           32 :   switch (code)
   17006              :     {
   17007              :       case NE: return X86_CCNE;
   17008              :       case EQ: return X86_CCE;
   17009              :       case GE: return X86_CCNL;
   17010              :       case GT: return X86_CCNLE;
   17011              :       case LE: return X86_CCLE;
   17012              :       case LT: return X86_CCL;
   17013              :       case GEU: return X86_CCNB;
   17014              :       case GTU: return X86_CCNBE;
   17015              :       case LEU: return X86_CCBE;
   17016              :       case LTU: return X86_CCB;
   17017              :       default: return -1;
   17018              :     }
   17019              : }
   17020              : 
   17021              : /* Return TRUE or FALSE depending on whether the first SET in INSN
   17022              :    has source and destination with matching CC modes, and that the
   17023              :    CC mode is at least as constrained as REQ_MODE.  */
   17024              : 
   17025              : bool
   17026     53906303 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
   17027              : {
   17028     53906303 :   rtx set;
   17029     53906303 :   machine_mode set_mode;
   17030              : 
   17031     53906303 :   set = PATTERN (insn);
   17032     53906303 :   if (GET_CODE (set) == PARALLEL)
   17033       496796 :     set = XVECEXP (set, 0, 0);
   17034     53906303 :   gcc_assert (GET_CODE (set) == SET);
   17035     53906303 :   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
   17036              : 
   17037     53906303 :   set_mode = GET_MODE (SET_DEST (set));
   17038     53906303 :   switch (set_mode)
   17039              :     {
   17040      1418412 :     case E_CCNOmode:
   17041      1418412 :       if (req_mode != CCNOmode
   17042        97669 :           && (req_mode != CCmode
   17043            0 :               || XEXP (SET_SRC (set), 1) != const0_rtx))
   17044              :         return false;
   17045              :       break;
   17046      5576464 :     case E_CCmode:
   17047      5576464 :       if (req_mode == CCGCmode)
   17048              :         return false;
   17049              :       /* FALLTHRU */
   17050      9178195 :     case E_CCGCmode:
   17051      9178195 :       if (req_mode == CCGOCmode || req_mode == CCNOmode)
   17052              :         return false;
   17053              :       /* FALLTHRU */
   17054     10234808 :     case E_CCGOCmode:
   17055     10234808 :       if (req_mode == CCZmode)
   17056              :         return false;
   17057              :       /* FALLTHRU */
   17058              :     case E_CCZmode:
   17059              :       break;
   17060              : 
   17061            0 :     case E_CCGZmode:
   17062              : 
   17063            0 :     case E_CCAmode:
   17064            0 :     case E_CCCmode:
   17065            0 :     case E_CCOmode:
   17066            0 :     case E_CCPmode:
   17067            0 :     case E_CCSmode:
   17068            0 :       if (set_mode != req_mode)
   17069              :         return false;
   17070              :       break;
   17071              : 
   17072            0 :     default:
   17073            0 :       gcc_unreachable ();
   17074              :     }
   17075              : 
   17076     53800017 :   return GET_MODE (SET_SRC (set)) == set_mode;
   17077              : }
   17078              : 
   17079              : machine_mode
   17080     13622090 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
   17081              : {
   17082     13622090 :   machine_mode mode = GET_MODE (op0);
   17083              : 
   17084     13622090 :   if (SCALAR_FLOAT_MODE_P (mode))
   17085              :     {
   17086       143988 :       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
   17087              :       return CCFPmode;
   17088              :     }
   17089              : 
   17090     13478102 :   switch (code)
   17091              :     {
   17092              :       /* Only zero flag is needed.  */
   17093              :     case EQ:                    /* ZF=0 */
   17094              :     case NE:                    /* ZF!=0 */
   17095              :       return CCZmode;
   17096              :       /* Codes needing carry flag.  */
   17097       986831 :     case GEU:                   /* CF=0 */
   17098       986831 :     case LTU:                   /* CF=1 */
   17099       986831 :       rtx geu;
   17100              :       /* Detect overflow checks.  They need just the carry flag.  */
   17101       986831 :       if (GET_CODE (op0) == PLUS
   17102       986831 :           && (rtx_equal_p (op1, XEXP (op0, 0))
   17103       129238 :               || rtx_equal_p (op1, XEXP (op0, 1))))
   17104        17412 :         return CCCmode;
   17105              :       /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
   17106              :          Match LTU of op0
   17107              :          (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   17108              :          and op1
   17109              :          (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
   17110              :          where CC_CCC is either CC or CCC.  */
   17111       969419 :       else if (code == LTU
   17112       372247 :                && GET_CODE (op0) == NEG
   17113           18 :                && GET_CODE (geu = XEXP (op0, 0)) == GEU
   17114            0 :                && REG_P (XEXP (geu, 0))
   17115            0 :                && (GET_MODE (XEXP (geu, 0)) == CCCmode
   17116            0 :                    || GET_MODE (XEXP (geu, 0)) == CCmode)
   17117            0 :                && REGNO (XEXP (geu, 0)) == FLAGS_REG
   17118            0 :                && XEXP (geu, 1) == const0_rtx
   17119            0 :                && GET_CODE (op1) == LTU
   17120            0 :                && REG_P (XEXP (op1, 0))
   17121            0 :                && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   17122            0 :                && REGNO (XEXP (op1, 0)) == FLAGS_REG
   17123       969419 :                && XEXP (op1, 1) == const0_rtx)
   17124              :         return CCCmode;
   17125              :       /* Similarly for *x86_cmc pattern.
   17126              :          Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   17127              :          and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
   17128              :          It is sufficient to test that the operand modes are CCCmode.  */
   17129       969419 :       else if (code == LTU
   17130       372247 :                && GET_CODE (op0) == NEG
   17131           18 :                && GET_CODE (XEXP (op0, 0)) == LTU
   17132            0 :                && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   17133            0 :                && GET_CODE (op1) == GEU
   17134            0 :                && GET_MODE (XEXP (op1, 0)) == CCCmode)
   17135              :         return CCCmode;
   17136              :       /* Similarly for the comparison of addcarry/subborrow pattern.  */
   17137       372247 :       else if (code == LTU
   17138       372247 :                && GET_CODE (op0) == ZERO_EXTEND
   17139        15254 :                && GET_CODE (op1) == PLUS
   17140        11004 :                && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
   17141        11004 :                && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
   17142              :         return CCCmode;
   17143              :       else
   17144       958415 :         return CCmode;
   17145              :     case GTU:                   /* CF=0 & ZF=0 */
   17146              :     case LEU:                   /* CF=1 | ZF=1 */
   17147              :       return CCmode;
   17148              :       /* Codes possibly doable only with sign flag when
   17149              :          comparing against zero.  */
   17150       784194 :     case GE:                    /* SF=OF   or   SF=0 */
   17151       784194 :     case LT:                    /* SF<>OF  or   SF=1 */
   17152       784194 :       if (op1 == const0_rtx)
   17153              :         return CCGOCmode;
   17154              :       else
   17155              :         /* For other cases Carry flag is not required.  */
   17156       443127 :         return CCGCmode;
   17157              :       /* Codes doable only with sign flag when comparing
   17158              :          against zero, but we miss jump instruction for it
   17159              :          so we need to use relational tests against overflow
   17160              :          that thus needs to be zero.  */
   17161       900356 :     case GT:                    /* ZF=0 & SF=OF */
   17162       900356 :     case LE:                    /* ZF=1 | SF<>OF */
   17163       900356 :       if (op1 == const0_rtx)
   17164              :         return CCNOmode;
   17165              :       else
   17166       597759 :         return CCGCmode;
   17167              :     default:
   17168              :       /* CCmode should be used in all other cases.  */
   17169              :       return CCmode;
   17170              :     }
   17171              : }
   17172              : 
   17173              : /* Return TRUE or FALSE depending on whether the ptest instruction
   17174              :    INSN has source and destination with suitable matching CC modes.  */
   17175              : 
   17176              : bool
   17177        91619 : ix86_match_ptest_ccmode (rtx insn)
   17178              : {
   17179        91619 :   rtx set, src;
   17180        91619 :   machine_mode set_mode;
   17181              : 
   17182        91619 :   set = PATTERN (insn);
   17183        91619 :   gcc_assert (GET_CODE (set) == SET);
   17184        91619 :   src = SET_SRC (set);
   17185        91619 :   gcc_assert (GET_CODE (src) == UNSPEC
   17186              :               && XINT (src, 1) == UNSPEC_PTEST);
   17187              : 
   17188        91619 :   set_mode = GET_MODE (src);
   17189        91619 :   if (set_mode != CCZmode
   17190              :       && set_mode != CCCmode
   17191              :       && set_mode != CCmode)
   17192              :     return false;
   17193        91619 :   return GET_MODE (SET_DEST (set)) == set_mode;
   17194              : }
   17195              : 
   17196              : /* Return the fixed registers used for condition codes.  */
   17197              : 
   17198              : static bool
   17199     18731686 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
   17200              : {
   17201     18731686 :   *p1 = FLAGS_REG;
   17202     18731686 :   *p2 = INVALID_REGNUM;
   17203     18731686 :   return true;
   17204              : }
   17205              : 
   17206              : /* If two condition code modes are compatible, return a condition code
   17207              :    mode which is compatible with both.  Otherwise, return
   17208              :    VOIDmode.  */
   17209              : 
   17210              : static machine_mode
   17211        30435 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
   17212              : {
   17213        30435 :   if (m1 == m2)
   17214              :     return m1;
   17215              : 
   17216        29663 :   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
   17217              :     return VOIDmode;
   17218              : 
   17219        29663 :   if ((m1 == CCGCmode && m2 == CCGOCmode)
   17220        29663 :       || (m1 == CCGOCmode && m2 == CCGCmode))
   17221              :     return CCGCmode;
   17222              : 
   17223        29663 :   if ((m1 == CCNOmode && m2 == CCGOCmode)
   17224        29482 :       || (m1 == CCGOCmode && m2 == CCNOmode))
   17225              :     return CCNOmode;
   17226              : 
   17227        29371 :   if (m1 == CCZmode
   17228        15667 :       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
   17229              :     return m2;
   17230        16910 :   else if (m2 == CCZmode
   17231        13448 :            && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
   17232              :     return m1;
   17233              : 
   17234         6777 :   switch (m1)
   17235              :     {
   17236            0 :     default:
   17237            0 :       gcc_unreachable ();
   17238              : 
   17239         6777 :     case E_CCmode:
   17240         6777 :     case E_CCGCmode:
   17241         6777 :     case E_CCGOCmode:
   17242         6777 :     case E_CCNOmode:
   17243         6777 :     case E_CCAmode:
   17244         6777 :     case E_CCCmode:
   17245         6777 :     case E_CCOmode:
   17246         6777 :     case E_CCPmode:
   17247         6777 :     case E_CCSmode:
   17248         6777 :     case E_CCZmode:
   17249         6777 :       switch (m2)
   17250              :         {
   17251              :         default:
   17252              :           return VOIDmode;
   17253              : 
   17254              :         case E_CCmode:
   17255              :         case E_CCGCmode:
   17256              :         case E_CCGOCmode:
   17257              :         case E_CCNOmode:
   17258              :         case E_CCAmode:
   17259              :         case E_CCCmode:
   17260              :         case E_CCOmode:
   17261              :         case E_CCPmode:
   17262              :         case E_CCSmode:
   17263              :         case E_CCZmode:
   17264              :           return CCmode;
   17265              :         }
   17266              : 
   17267              :     case E_CCFPmode:
   17268              :       /* These are only compatible with themselves, which we already
   17269              :          checked above.  */
   17270              :       return VOIDmode;
   17271              :     }
   17272              : }
   17273              : 
   17274              : /* Return strategy to use for floating-point.  We assume that fcomi is always
   17275              :    preferrable where available, since that is also true when looking at size
   17276              :    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
   17277              : 
   17278              : enum ix86_fpcmp_strategy
   17279      5538155 : ix86_fp_comparison_strategy (enum rtx_code)
   17280              : {
   17281              :   /* Do fcomi/sahf based test when profitable.  */
   17282              : 
   17283      5538155 :   if (TARGET_CMOVE)
   17284              :     return IX86_FPCMP_COMI;
   17285              : 
   17286            0 :   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
   17287            0 :     return IX86_FPCMP_SAHF;
   17288              : 
   17289              :   return IX86_FPCMP_ARITH;
   17290              : }
   17291              : 
   17292              : /* Convert comparison codes we use to represent FP comparison to integer
   17293              :    code that will result in proper branch.  Return UNKNOWN if no such code
   17294              :    is available.  */
   17295              : 
   17296              : enum rtx_code
   17297       583971 : ix86_fp_compare_code_to_integer (enum rtx_code code)
   17298              : {
   17299       583971 :   switch (code)
   17300              :     {
   17301              :     case GT:
   17302              :       return GTU;
   17303        17870 :     case GE:
   17304        17870 :       return GEU;
   17305              :     case ORDERED:
   17306              :     case UNORDERED:
   17307              :       return code;
   17308       118836 :     case UNEQ:
   17309       118836 :       return EQ;
   17310        17368 :     case UNLT:
   17311        17368 :       return LTU;
   17312        31457 :     case UNLE:
   17313        31457 :       return LEU;
   17314       113455 :     case LTGT:
   17315       113455 :       return NE;
   17316          683 :     case EQ:
   17317          683 :     case NE:
   17318          683 :       if (TARGET_AVX10_2)
   17319              :         return code;
   17320              :       /* FALLTHRU.  */
   17321          225 :     default:
   17322          225 :       return UNKNOWN;
   17323              :     }
   17324              : }
   17325              : 
   17326              : /* Zero extend possibly SImode EXP to Pmode register.  */
   17327              : rtx
   17328        45971 : ix86_zero_extend_to_Pmode (rtx exp)
   17329              : {
   17330        57867 :   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
   17331              : }
   17332              : 
   17333              : /* Return true if the function is called via PLT.   */
   17334              : 
   17335              : bool
   17336       996153 : ix86_call_use_plt_p (rtx call_op)
   17337              : {
   17338       996153 :   if (SYMBOL_REF_LOCAL_P (call_op))
   17339              :     {
   17340       198677 :       if (SYMBOL_REF_DECL (call_op)
   17341       198677 :           && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
   17342              :         {
   17343              :           /* NB: All ifunc functions must be called via PLT.  */
   17344       115449 :           cgraph_node *node
   17345       115449 :             = cgraph_node::get (SYMBOL_REF_DECL (call_op));
   17346       115449 :           if (node && node->ifunc_resolver)
   17347              :             return true;
   17348              :         }
   17349       198657 :       return false;
   17350              :     }
   17351              :   return true;
   17352              : }
   17353              : 
   17354              : /* Implement TARGET_IFUNC_REF_LOCAL_OK.  If this hook returns true,
   17355              :    the PLT entry will be used as the function address for local IFUNC
   17356              :    functions.  When the PIC register is needed for PLT call, indirect
   17357              :    call via the PLT entry will fail since the PIC register may not be
   17358              :    set up properly for indirect call.  In this case, we should return
   17359              :    false.  */
   17360              : 
   17361              : static bool
   17362    770335642 : ix86_ifunc_ref_local_ok (void)
   17363              : {
   17364    770335642 :   return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
   17365              : }
   17366              : 
   17367              : /* Return true if the function being called was marked with attribute
   17368              :    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
   17369              :    to handle the non-PIC case in the backend because there is no easy
   17370              :    interface for the front-end to force non-PLT calls to use the GOT.
   17371              :    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
   17372              :    to call the function marked "noplt" indirectly.  */
   17373              : 
   17374              : bool
   17375      5925460 : ix86_nopic_noplt_attribute_p (rtx call_op)
   17376              : {
   17377      5433042 :   if (flag_pic || ix86_cmodel == CM_LARGE
   17378              :       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
   17379              :       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
   17380     11358502 :       || SYMBOL_REF_LOCAL_P (call_op))
   17381              :     return false;
   17382              : 
   17383      3806704 :   tree symbol_decl = SYMBOL_REF_DECL (call_op);
   17384              : 
   17385      3806704 :   if (!flag_plt
   17386      3806704 :       || (symbol_decl != NULL_TREE
   17387      3806672 :           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
   17388           34 :     return true;
   17389              : 
   17390              :   return false;
   17391              : }
   17392              : 
   17393              : /* Helper to output the jmp/call.  */
   17394              : static void
   17395           33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
   17396              : {
   17397           33 :   if (thunk_name != NULL)
   17398              :     {
   17399           22 :       if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   17400            1 :           && ix86_indirect_branch_cs_prefix)
   17401            1 :         fprintf (asm_out_file, "\tcs\n");
   17402           22 :       fprintf (asm_out_file, "\tjmp\t");
   17403           22 :       assemble_name (asm_out_file, thunk_name);
   17404           22 :       putc ('\n', asm_out_file);
   17405           22 :       if ((ix86_harden_sls & harden_sls_indirect_jmp))
   17406            2 :         fputs ("\tint3\n", asm_out_file);
   17407              :     }
   17408              :   else
   17409           11 :     output_indirect_thunk (regno);
   17410           33 : }
   17411              : 
   17412              : /* Output indirect branch via a call and return thunk.  CALL_OP is a
   17413              :    register which contains the branch target.  XASM is the assembly
   17414              :    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
   17415              :    A normal call is converted to:
   17416              : 
   17417              :         call __x86_indirect_thunk_reg
   17418              : 
   17419              :    and a tail call is converted to:
   17420              : 
   17421              :         jmp __x86_indirect_thunk_reg
   17422              :  */
   17423              : 
   17424              : static void
   17425           50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
   17426              : {
   17427           50 :   char thunk_name_buf[32];
   17428           50 :   char *thunk_name;
   17429           50 :   enum indirect_thunk_prefix need_prefix
   17430           50 :     = indirect_thunk_need_prefix (current_output_insn);
   17431           50 :   int regno = REGNO (call_op);
   17432              : 
   17433           50 :   if (cfun->machine->indirect_branch_type
   17434           50 :       != indirect_branch_thunk_inline)
   17435              :     {
   17436           39 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17437           16 :         SET_HARD_REG_BIT (indirect_thunks_used, regno);
   17438              : 
   17439           39 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17440           39 :       thunk_name = thunk_name_buf;
   17441              :     }
   17442              :   else
   17443              :     thunk_name = NULL;
   17444              : 
   17445           50 :   if (sibcall_p)
   17446           27 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17447              :   else
   17448              :     {
   17449           23 :       if (thunk_name != NULL)
   17450              :         {
   17451           17 :           if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
   17452            1 :               && ix86_indirect_branch_cs_prefix)
   17453            1 :             fprintf (asm_out_file, "\tcs\n");
   17454           17 :           fprintf (asm_out_file, "\tcall\t");
   17455           17 :           assemble_name (asm_out_file, thunk_name);
   17456           17 :           putc ('\n', asm_out_file);
   17457           17 :           return;
   17458              :         }
   17459              : 
   17460            6 :       char indirectlabel1[32];
   17461            6 :       char indirectlabel2[32];
   17462              : 
   17463            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17464              :                                    INDIRECT_LABEL,
   17465              :                                    indirectlabelno++);
   17466            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17467              :                                    INDIRECT_LABEL,
   17468              :                                    indirectlabelno++);
   17469              : 
   17470              :       /* Jump.  */
   17471            6 :       fputs ("\tjmp\t", asm_out_file);
   17472            6 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17473            6 :       fputc ('\n', asm_out_file);
   17474              : 
   17475            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17476              : 
   17477            6 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17478              : 
   17479            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17480              : 
   17481              :       /* Call.  */
   17482            6 :       fputs ("\tcall\t", asm_out_file);
   17483            6 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17484            6 :       fputc ('\n', asm_out_file);
   17485              :     }
   17486              : }
   17487              : 
   17488              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17489              :    the branch target.  XASM is the assembly template for CALL_OP.
   17490              :    Branch is a tail call if SIBCALL_P is true.  A normal call is
   17491              :    converted to:
   17492              : 
   17493              :         jmp L2
   17494              :    L1:
   17495              :         push CALL_OP
   17496              :         jmp __x86_indirect_thunk
   17497              :    L2:
   17498              :         call L1
   17499              : 
   17500              :    and a tail call is converted to:
   17501              : 
   17502              :         push CALL_OP
   17503              :         jmp __x86_indirect_thunk
   17504              :  */
   17505              : 
   17506              : static void
   17507            0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
   17508              :                                       bool sibcall_p)
   17509              : {
   17510            0 :   char thunk_name_buf[32];
   17511            0 :   char *thunk_name;
   17512            0 :   char push_buf[64];
   17513            0 :   enum indirect_thunk_prefix need_prefix
   17514            0 :     = indirect_thunk_need_prefix (current_output_insn);
   17515            0 :   int regno = -1;
   17516              : 
   17517            0 :   if (cfun->machine->indirect_branch_type
   17518            0 :       != indirect_branch_thunk_inline)
   17519              :     {
   17520            0 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17521            0 :         indirect_thunk_needed = true;
   17522            0 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17523            0 :       thunk_name = thunk_name_buf;
   17524              :     }
   17525              :   else
   17526              :     thunk_name = NULL;
   17527              : 
   17528            0 :   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
   17529            0 :             TARGET_64BIT ? 'q' : 'l', xasm);
   17530              : 
   17531            0 :   if (sibcall_p)
   17532              :     {
   17533            0 :       output_asm_insn (push_buf, &call_op);
   17534            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17535              :     }
   17536              :   else
   17537              :     {
   17538            0 :       char indirectlabel1[32];
   17539            0 :       char indirectlabel2[32];
   17540              : 
   17541            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17542              :                                    INDIRECT_LABEL,
   17543              :                                    indirectlabelno++);
   17544            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17545              :                                    INDIRECT_LABEL,
   17546              :                                    indirectlabelno++);
   17547              : 
   17548              :       /* Jump.  */
   17549            0 :       fputs ("\tjmp\t", asm_out_file);
   17550            0 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17551            0 :       fputc ('\n', asm_out_file);
   17552              : 
   17553            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17554              : 
   17555              :       /* An external function may be called via GOT, instead of PLT.  */
   17556            0 :       if (MEM_P (call_op))
   17557              :         {
   17558            0 :           struct ix86_address parts;
   17559            0 :           rtx addr = XEXP (call_op, 0);
   17560            0 :           if (ix86_decompose_address (addr, &parts)
   17561            0 :               && parts.base == stack_pointer_rtx)
   17562              :             {
   17563              :               /* Since call will adjust stack by -UNITS_PER_WORD,
   17564              :                  we must convert "disp(stack, index, scale)" to
   17565              :                  "disp+UNITS_PER_WORD(stack, index, scale)".  */
   17566            0 :               if (parts.index)
   17567              :                 {
   17568            0 :                   addr = gen_rtx_MULT (Pmode, parts.index,
   17569              :                                        GEN_INT (parts.scale));
   17570            0 :                   addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   17571              :                                        addr);
   17572              :                 }
   17573              :               else
   17574              :                 addr = stack_pointer_rtx;
   17575              : 
   17576            0 :               rtx disp;
   17577            0 :               if (parts.disp != NULL_RTX)
   17578            0 :                 disp = plus_constant (Pmode, parts.disp,
   17579            0 :                                       UNITS_PER_WORD);
   17580              :               else
   17581            0 :                 disp = GEN_INT (UNITS_PER_WORD);
   17582              : 
   17583            0 :               addr = gen_rtx_PLUS (Pmode, addr, disp);
   17584            0 :               call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
   17585              :             }
   17586              :         }
   17587              : 
   17588            0 :       output_asm_insn (push_buf, &call_op);
   17589              : 
   17590            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17591              : 
   17592            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17593              : 
   17594              :       /* Call.  */
   17595            0 :       fputs ("\tcall\t", asm_out_file);
   17596            0 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17597            0 :       fputc ('\n', asm_out_file);
   17598              :     }
   17599            0 : }
   17600              : 
   17601              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17602              :    the branch target.  XASM is the assembly template for CALL_OP.
   17603              :    Branch is a tail call if SIBCALL_P is true.   */
   17604              : 
   17605              : static void
   17606           50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
   17607              :                              bool sibcall_p)
   17608              : {
   17609           50 :   if (REG_P (call_op))
   17610           50 :     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
   17611              :   else
   17612            0 :     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
   17613           50 : }
   17614              : 
   17615              : /* Output indirect jump.  CALL_OP is the jump target.  */
   17616              : 
   17617              : const char *
   17618         7633 : ix86_output_indirect_jmp (rtx call_op)
   17619              : {
   17620         7633 :   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
   17621              :     {
   17622              :       /* We can't have red-zone since "call" in the indirect thunk
   17623              :          pushes the return address onto stack, destroying red-zone.  */
   17624            4 :       if (ix86_red_zone_used)
   17625            0 :         gcc_unreachable ();
   17626              : 
   17627            4 :       ix86_output_indirect_branch (call_op, "%0", true);
   17628              :     }
   17629              :   else
   17630         7629 :     output_asm_insn ("%!jmp\t%A0", &call_op);
   17631         7633 :   return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
   17632              : }
   17633              : 
   17634              : /* Output return instrumentation for current function if needed.  */
   17635              : 
   17636              : static void
   17637      1709774 : output_return_instrumentation (void)
   17638              : {
   17639      1709774 :   if (ix86_instrument_return != instrument_return_none
   17640            6 :       && flag_fentry
   17641      1709780 :       && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
   17642              :     {
   17643            5 :       if (ix86_flag_record_return)
   17644            5 :         fprintf (asm_out_file, "1:\n");
   17645            5 :       switch (ix86_instrument_return)
   17646              :         {
   17647            2 :         case instrument_return_call:
   17648            2 :           fprintf (asm_out_file, "\tcall\t__return__\n");
   17649            2 :           break;
   17650            3 :         case instrument_return_nop5:
   17651              :           /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
   17652            3 :           fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
   17653            3 :           break;
   17654              :         case instrument_return_none:
   17655              :           break;
   17656              :         }
   17657              : 
   17658            5 :       if (ix86_flag_record_return)
   17659              :         {
   17660            5 :           fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
   17661            5 :           fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   17662            5 :           fprintf (asm_out_file, "\t.previous\n");
   17663              :         }
   17664              :     }
   17665      1709774 : }
   17666              : 
   17667              : /* Output function return.  CALL_OP is the jump target.  Add a REP
   17668              :    prefix to RET if LONG_P is true and function return is kept.  */
   17669              : 
   17670              : const char *
   17671      1580582 : ix86_output_function_return (bool long_p)
   17672              : {
   17673      1580582 :   output_return_instrumentation ();
   17674              : 
   17675      1580582 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17676              :     {
   17677           17 :       char thunk_name[32];
   17678           17 :       enum indirect_thunk_prefix need_prefix
   17679           17 :         = indirect_thunk_need_prefix (current_output_insn);
   17680              : 
   17681           17 :       if (cfun->machine->function_return_type
   17682           17 :           != indirect_branch_thunk_inline)
   17683              :         {
   17684           12 :           bool need_thunk = (cfun->machine->function_return_type
   17685              :                              == indirect_branch_thunk);
   17686           12 :           indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
   17687              :                                true);
   17688           12 :           indirect_return_needed |= need_thunk;
   17689           12 :           fprintf (asm_out_file, "\tjmp\t");
   17690           12 :           assemble_name (asm_out_file, thunk_name);
   17691           12 :           putc ('\n', asm_out_file);
   17692              :         }
   17693              :       else
   17694            5 :         output_indirect_thunk (INVALID_REGNUM);
   17695              : 
   17696           17 :       return "";
   17697              :     }
   17698              : 
   17699      3160641 :   output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
   17700      1580565 :   return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
   17701              : }
   17702              : 
   17703              : /* Output indirect function return.  RET_OP is the function return
   17704              :    target.  */
   17705              : 
   17706              : const char *
   17707           17 : ix86_output_indirect_function_return (rtx ret_op)
   17708              : {
   17709           17 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17710              :     {
   17711            0 :       char thunk_name[32];
   17712            0 :       enum indirect_thunk_prefix need_prefix
   17713            0 :         = indirect_thunk_need_prefix (current_output_insn);
   17714            0 :       unsigned int regno = REGNO (ret_op);
   17715            0 :       gcc_assert (regno == CX_REG);
   17716              : 
   17717            0 :       if (cfun->machine->function_return_type
   17718            0 :           != indirect_branch_thunk_inline)
   17719              :         {
   17720            0 :           bool need_thunk = (cfun->machine->function_return_type
   17721              :                              == indirect_branch_thunk);
   17722            0 :           indirect_thunk_name (thunk_name, regno, need_prefix, true);
   17723              : 
   17724            0 :           if (need_thunk)
   17725              :             {
   17726            0 :               indirect_return_via_cx = true;
   17727            0 :               SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
   17728              :             }
   17729            0 :           fprintf (asm_out_file, "\tjmp\t");
   17730            0 :           assemble_name (asm_out_file, thunk_name);
   17731            0 :           putc ('\n', asm_out_file);
   17732              :         }
   17733              :       else
   17734            0 :         output_indirect_thunk (regno);
   17735              :     }
   17736              :   else
   17737              :     {
   17738           17 :       output_asm_insn ("%!jmp\t%A0", &ret_op);
   17739           17 :       if (ix86_harden_sls & harden_sls_indirect_jmp)
   17740            1 :         fputs ("\tint3\n", asm_out_file);
   17741              :     }
   17742           17 :   return "";
   17743              : }
   17744              : 
   17745              : /* Output the assembly for a call instruction.  */
   17746              : 
   17747              : const char *
   17748      6106824 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   17749              : {
   17750      6106824 :   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
   17751      6106824 :   bool output_indirect_p
   17752              :     = (!TARGET_SEH
   17753      6106824 :        && cfun->machine->indirect_branch_type != indirect_branch_keep);
   17754      6106824 :   bool seh_nop_p = false;
   17755      6106824 :   const char *xasm;
   17756              : 
   17757      6106824 :   if (SIBLING_CALL_P (insn))
   17758              :     {
   17759       129192 :       output_return_instrumentation ();
   17760       129192 :       if (direct_p)
   17761              :         {
   17762       119610 :           if (ix86_nopic_noplt_attribute_p (call_op))
   17763              :             {
   17764            4 :               direct_p = false;
   17765            4 :               if (TARGET_64BIT)
   17766              :                 {
   17767            4 :                   if (output_indirect_p)
   17768              :                     xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17769              :                   else
   17770            4 :                     xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17771              :                 }
   17772              :               else
   17773              :                 {
   17774            0 :                   if (output_indirect_p)
   17775              :                     xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17776              :                   else
   17777            0 :                     xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17778              :                 }
   17779              :             }
   17780              :           else
   17781              :             xasm = "%!jmp\t%P0";
   17782              :         }
   17783              :       /* SEH epilogue detection requires the indirect branch case
   17784              :          to include REX.W.  */
   17785         9582 :       else if (TARGET_SEH)
   17786              :         xasm = "%!rex.W jmp\t%A0";
   17787              :       else
   17788              :         {
   17789         9582 :           if (output_indirect_p)
   17790              :             xasm = "%0";
   17791              :           else
   17792         9559 :             xasm = "%!jmp\t%A0";
   17793              :         }
   17794              : 
   17795       129192 :       if (output_indirect_p && !direct_p)
   17796           23 :         ix86_output_indirect_branch (call_op, xasm, true);
   17797              :       else
   17798              :         {
   17799       129169 :           output_asm_insn (xasm, &call_op);
   17800       129169 :           if (!direct_p
   17801         9563 :               && (ix86_harden_sls & harden_sls_indirect_jmp))
   17802              :             return "int3";
   17803              :         }
   17804       129191 :       return "";
   17805              :     }
   17806              : 
   17807              :   /* SEH unwinding can require an extra nop to be emitted in several
   17808              :      circumstances.  Determine if we have one of those.  */
   17809      5977632 :   if (TARGET_SEH)
   17810              :     {
   17811              :       rtx_insn *i;
   17812              : 
   17813              :       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
   17814              :         {
   17815              :           /* Prevent a catch region from being adjacent to a jump that would
   17816              :              be interpreted as an epilogue sequence by the unwinder.  */
   17817              :           if (JUMP_P(i) && CROSSING_JUMP_P (i))
   17818              :             {
   17819              :               seh_nop_p = true;
   17820              :               break;
   17821              :             }
   17822              : 
   17823              :           /* If we get to another real insn, we don't need the nop.  */
   17824              :           if (INSN_P (i))
   17825              :             break;
   17826              : 
   17827              :           /* If we get to the epilogue note, prevent a catch region from
   17828              :              being adjacent to the standard epilogue sequence.  Note that,
   17829              :              if non-call exceptions are enabled, we already did it during
   17830              :              epilogue expansion, or else, if the insn can throw internally,
   17831              :              we already did it during the reorg pass.  */
   17832              :           if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
   17833              :               && !flag_non_call_exceptions
   17834              :               && !can_throw_internal (insn))
   17835              :             {
   17836              :               seh_nop_p = true;
   17837              :               break;
   17838              :             }
   17839              :         }
   17840              : 
   17841              :       /* If we didn't find a real insn following the call, prevent the
   17842              :          unwinder from looking into the next function.  */
   17843              :       if (i == NULL)
   17844              :         seh_nop_p = true;
   17845              :     }
   17846              : 
   17847      5977632 :   if (direct_p)
   17848              :     {
   17849      5804828 :       if (ix86_nopic_noplt_attribute_p (call_op))
   17850              :         {
   17851            6 :           direct_p = false;
   17852            6 :           if (TARGET_64BIT)
   17853              :             {
   17854            6 :               if (output_indirect_p)
   17855              :                 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17856              :               else
   17857            6 :                 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17858              :             }
   17859              :           else
   17860              :             {
   17861            0 :               if (output_indirect_p)
   17862              :                 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17863              :               else
   17864            0 :                 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17865              :             }
   17866              :         }
   17867              :       else
   17868              :         xasm = "%!call\t%P0";
   17869              :     }
   17870              :   else
   17871              :     {
   17872       172804 :       if (output_indirect_p)
   17873              :         xasm = "%0";
   17874              :       else
   17875       172781 :         xasm = "%!call\t%A0";
   17876              :     }
   17877              : 
   17878      5977632 :   if (output_indirect_p && !direct_p)
   17879           23 :     ix86_output_indirect_branch (call_op, xasm, false);
   17880              :   else
   17881      5977609 :     output_asm_insn (xasm, &call_op);
   17882              : 
   17883              :   if (seh_nop_p)
   17884              :     return "nop";
   17885              : 
   17886              :   return "";
   17887              : }
   17888              : 
   17889              : /* Return a MEM corresponding to a stack slot with mode MODE.
   17890              :    Allocate a new slot if necessary.
   17891              : 
   17892              :    The RTL for a function can have several slots available: N is
   17893              :    which slot to use.  */
   17894              : 
   17895              : rtx
   17896        22370 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
   17897              : {
   17898        22370 :   struct stack_local_entry *s;
   17899              : 
   17900        22370 :   gcc_assert (n < MAX_386_STACK_LOCALS);
   17901              : 
   17902        33733 :   for (s = ix86_stack_locals; s; s = s->next)
   17903        31150 :     if (s->mode == mode && s->n == n)
   17904        19787 :       return validize_mem (copy_rtx (s->rtl));
   17905              : 
   17906         2583 :   int align = 0;
   17907              :   /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
   17908              :      alignment with -m32 -mpreferred-stack-boundary=2.  */
   17909         2583 :   if (mode == DImode
   17910          329 :       && !TARGET_64BIT
   17911          329 :       && n == SLOT_FLOATxFDI_387
   17912         2912 :       && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
   17913              :     align = 32;
   17914         2583 :   s = ggc_alloc<stack_local_entry> ();
   17915         2583 :   s->n = n;
   17916         2583 :   s->mode = mode;
   17917         5166 :   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
   17918              : 
   17919         2583 :   s->next = ix86_stack_locals;
   17920         2583 :   ix86_stack_locals = s;
   17921         2583 :   return validize_mem (copy_rtx (s->rtl));
   17922              : }
   17923              : 
   17924              : static void
   17925      1481478 : ix86_instantiate_decls (void)
   17926              : {
   17927      1481478 :   struct stack_local_entry *s;
   17928              : 
   17929      1481478 :   for (s = ix86_stack_locals; s; s = s->next)
   17930            0 :     if (s->rtl != NULL_RTX)
   17931            0 :       instantiate_decl_rtl (s->rtl);
   17932      1481478 : }
   17933              : 
   17934              : /* Check whether x86 address PARTS is a pc-relative address.  */
   17935              : 
   17936              : bool
   17937     27343073 : ix86_rip_relative_addr_p (struct ix86_address *parts)
   17938              : {
   17939     27343073 :   rtx base, index, disp;
   17940              : 
   17941     27343073 :   base = parts->base;
   17942     27343073 :   index = parts->index;
   17943     27343073 :   disp = parts->disp;
   17944              : 
   17945     27343073 :   if (disp && !base && !index)
   17946              :     {
   17947     25571187 :       if (TARGET_64BIT)
   17948              :         {
   17949     23906482 :           rtx symbol = disp;
   17950              : 
   17951     23906482 :           if (GET_CODE (disp) == CONST)
   17952      7848612 :             symbol = XEXP (disp, 0);
   17953     23906482 :           if (GET_CODE (symbol) == PLUS
   17954      7333810 :               && CONST_INT_P (XEXP (symbol, 1)))
   17955      7333810 :             symbol = XEXP (symbol, 0);
   17956              : 
   17957     23906482 :           if (LABEL_REF_P (symbol)
   17958     23898955 :               || (SYMBOL_REF_P (symbol)
   17959     22633741 :                   && SYMBOL_REF_TLS_MODEL (symbol) == 0)
   17960     25171696 :               || (GET_CODE (symbol) == UNSPEC
   17961       533703 :                   && (XINT (symbol, 1) == UNSPEC_GOTPCREL
   17962              :                       || XINT (symbol, 1) == UNSPEC_PCREL
   17963              :                       || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
   17964     23147488 :             return true;
   17965              :         }
   17966              :     }
   17967              :   return false;
   17968              : }
   17969              : 
   17970              : /* Calculate the length of the memory address in the instruction encoding.
   17971              :    Includes addr32 prefix, does not include the one-byte modrm, opcode,
   17972              :    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
   17973              : 
   17974              : int
   17975    272086731 : memory_address_length (rtx addr, bool lea)
   17976              : {
   17977    272086731 :   struct ix86_address parts;
   17978    272086731 :   rtx base, index, disp;
   17979    272086731 :   int len;
   17980    272086731 :   int ok;
   17981              : 
   17982    272086731 :   if (GET_CODE (addr) == PRE_DEC
   17983    263522485 :       || GET_CODE (addr) == POST_INC
   17984    259011414 :       || GET_CODE (addr) == PRE_MODIFY
   17985    259011414 :       || GET_CODE (addr) == POST_MODIFY)
   17986              :     return 0;
   17987              : 
   17988    259011414 :   ok = ix86_decompose_address (addr, &parts);
   17989    259011414 :   gcc_assert (ok);
   17990              : 
   17991    259011414 :   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
   17992              : 
   17993              :   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
   17994    220504876 :   if (TARGET_64BIT && !lea
   17995    454466819 :       && (SImode_address_operand (addr, VOIDmode)
   17996    195455252 :           || (parts.base && GET_MODE (parts.base) == SImode)
   17997    195445022 :           || (parts.index && GET_MODE (parts.index) == SImode)))
   17998        10383 :     len++;
   17999              : 
   18000    259011414 :   base = parts.base;
   18001    259011414 :   index = parts.index;
   18002    259011414 :   disp = parts.disp;
   18003              : 
   18004    259011414 :   if (base && SUBREG_P (base))
   18005            2 :     base = SUBREG_REG (base);
   18006    259011414 :   if (index && SUBREG_P (index))
   18007            0 :     index = SUBREG_REG (index);
   18008              : 
   18009    259011414 :   gcc_assert (base == NULL_RTX || REG_P (base));
   18010    259011414 :   gcc_assert (index == NULL_RTX || REG_P (index));
   18011              : 
   18012              :   /* Rule of thumb:
   18013              :        - esp as the base always wants an index,
   18014              :        - ebp as the base always wants a displacement,
   18015              :        - r12 as the base always wants an index,
   18016              :        - r13 as the base always wants a displacement.  */
   18017              : 
   18018              :   /* Register Indirect.  */
   18019    259011414 :   if (base && !index && !disp)
   18020              :     {
   18021              :       /* esp (for its index) and ebp (for its displacement) need
   18022              :          the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
   18023              :          code.  */
   18024     16806469 :       if (base == arg_pointer_rtx
   18025     16806469 :           || base == frame_pointer_rtx
   18026     16806469 :           || REGNO (base) == SP_REG
   18027     10031527 :           || REGNO (base) == BP_REG
   18028     10031527 :           || REGNO (base) == R12_REG
   18029     26380625 :           || REGNO (base) == R13_REG)
   18030      7232313 :         len++;
   18031              :     }
   18032              : 
   18033              :   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
   18034              :      is not disp32, but disp32(%rip), so for disp32
   18035              :      SIB byte is needed, unless print_operand_address
   18036              :      optimizes it into disp32(%rip) or (%rip) is implied
   18037              :      by UNSPEC.  */
   18038    242204945 :   else if (disp && !base && !index)
   18039              :     {
   18040     24595615 :       len += 4;
   18041     24595615 :       if (!ix86_rip_relative_addr_p (&parts))
   18042      1850984 :         len++;
   18043              :     }
   18044              :   else
   18045              :     {
   18046              :       /* Find the length of the displacement constant.  */
   18047    217609330 :       if (disp)
   18048              :         {
   18049    213530792 :           if (base && satisfies_constraint_K (disp))
   18050    123956006 :             len += 1;
   18051              :           else
   18052     89574786 :             len += 4;
   18053              :         }
   18054              :       /* ebp always wants a displacement.  Similarly r13.  */
   18055      4078538 :       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
   18056         7909 :         len++;
   18057              : 
   18058              :       /* An index requires the two-byte modrm form....  */
   18059    217609330 :       if (index
   18060              :           /* ...like esp (or r12), which always wants an index.  */
   18061    206783812 :           || base == arg_pointer_rtx
   18062    206783812 :           || base == frame_pointer_rtx
   18063    424393142 :           || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
   18064    155798873 :         len++;
   18065              :     }
   18066              : 
   18067              :   return len;
   18068              : }
   18069              : 
   18070              : /* Compute default value for "length_immediate" attribute.  When SHORTFORM
   18071              :    is set, expect that insn have 8bit immediate alternative.  */
   18072              : int
   18073    317372903 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
   18074              : {
   18075    317372903 :   int len = 0;
   18076    317372903 :   int i;
   18077    317372903 :   extract_insn_cached (insn);
   18078    990246595 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18079    672873692 :     if (CONSTANT_P (recog_data.operand[i]))
   18080              :       {
   18081    139933145 :         enum attr_mode mode = get_attr_mode (insn);
   18082              : 
   18083    139933145 :         gcc_assert (!len);
   18084    139933145 :         if (shortform && CONST_INT_P (recog_data.operand[i]))
   18085              :           {
   18086     37492769 :             HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
   18087     37492769 :             switch (mode)
   18088              :               {
   18089      1274037 :               case MODE_QI:
   18090      1274037 :                 len = 1;
   18091      1274037 :                 continue;
   18092       437496 :               case MODE_HI:
   18093       437496 :                 ival = trunc_int_for_mode (ival, HImode);
   18094       437496 :                 break;
   18095     15884976 :               case MODE_SI:
   18096     15884976 :                 ival = trunc_int_for_mode (ival, SImode);
   18097     15884976 :                 break;
   18098              :               default:
   18099              :                 break;
   18100              :               }
   18101     36218732 :             if (IN_RANGE (ival, -128, 127))
   18102              :               {
   18103     32121221 :                 len = 1;
   18104     32121221 :                 continue;
   18105              :               }
   18106              :           }
   18107    106537887 :         switch (mode)
   18108              :           {
   18109              :           case MODE_QI:
   18110              :             len = 1;
   18111              :             break;
   18112              :           case MODE_HI:
   18113    672873692 :             len = 2;
   18114              :             break;
   18115              :           case MODE_SI:
   18116    100686468 :             len = 4;
   18117              :             break;
   18118              :           /* Immediates for DImode instructions are encoded
   18119              :              as 32bit sign extended values.  */
   18120              :           case MODE_DI:
   18121    100686468 :             len = 4;
   18122              :             break;
   18123            0 :           default:
   18124            0 :             fatal_insn ("unknown insn mode", insn);
   18125              :         }
   18126              :       }
   18127    317372903 :   return len;
   18128              : }
   18129              : 
   18130              : /* Compute default value for "length_address" attribute.  */
   18131              : int
   18132    445219408 : ix86_attr_length_address_default (rtx_insn *insn)
   18133              : {
   18134    445219408 :   int i;
   18135              : 
   18136    445219408 :   if (get_attr_type (insn) == TYPE_LEA)
   18137              :     {
   18138     27768778 :       rtx set = PATTERN (insn), addr;
   18139              : 
   18140     27768778 :       if (GET_CODE (set) == PARALLEL)
   18141        86753 :         set = XVECEXP (set, 0, 0);
   18142              : 
   18143     27768778 :       gcc_assert (GET_CODE (set) == SET);
   18144              : 
   18145     27768778 :       addr = SET_SRC (set);
   18146              : 
   18147     27768778 :       return memory_address_length (addr, true);
   18148              :     }
   18149              : 
   18150    417450630 :   extract_insn_cached (insn);
   18151    958266058 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18152              :     {
   18153    784847669 :       rtx op = recog_data.operand[i];
   18154    784847669 :       if (MEM_P (op))
   18155              :         {
   18156    244311252 :           constrain_operands_cached (insn, reload_completed);
   18157    244311252 :           if (which_alternative != -1)
   18158              :             {
   18159    244311252 :               const char *constraints = recog_data.constraints[i];
   18160    244311252 :               int alt = which_alternative;
   18161              : 
   18162    387598041 :               while (*constraints == '=' || *constraints == '+')
   18163    143286789 :                 constraints++;
   18164   1111732793 :               while (alt-- > 0)
   18165   2127061670 :                 while (*constraints++ != ',')
   18166              :                   ;
   18167              :               /* Skip ignored operands.  */
   18168    244311252 :               if (*constraints == 'X')
   18169       279011 :                 continue;
   18170              :             }
   18171              : 
   18172    244032241 :           int len = memory_address_length (XEXP (op, 0), false);
   18173              : 
   18174              :           /* Account for segment prefix for non-default addr spaces.  */
   18175    257419134 :           if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
   18176       783906 :             len++;
   18177              : 
   18178    244032241 :           return len;
   18179              :         }
   18180              :     }
   18181              :   return 0;
   18182              : }
   18183              : 
   18184              : /* Compute default value for "length_vex" attribute. It includes
   18185              :    2 or 3 byte VEX prefix and 1 opcode byte.  */
   18186              : 
   18187              : int
   18188      5050849 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
   18189              :                               bool has_vex_w)
   18190              : {
   18191      5050849 :   int i, reg_only = 2 + 1;
   18192      5050849 :   bool has_mem = false;
   18193              : 
   18194              :   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
   18195              :      byte VEX prefix.  */
   18196      5050849 :   if (!has_0f_opcode || has_vex_w)
   18197              :     return 3 + 1;
   18198              : 
   18199              :  /* We can always use 2 byte VEX prefix in 32bit.  */
   18200      4616094 :   if (!TARGET_64BIT)
   18201              :     return 2 + 1;
   18202              : 
   18203      3524662 :   extract_insn_cached (insn);
   18204              : 
   18205     10995720 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18206      7777781 :     if (REG_P (recog_data.operand[i]))
   18207              :       {
   18208              :         /* REX.W bit uses 3 byte VEX prefix.
   18209              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18210      5124758 :         if (GET_MODE (recog_data.operand[i]) == DImode
   18211      5124758 :             && GENERAL_REG_P (recog_data.operand[i]))
   18212              :           return 3 + 1;
   18213              : 
   18214              :         /* REX.B bit requires 3-byte VEX. Right here we don't know which
   18215              :            operand will be encoded using VEX.B, so be conservative.
   18216              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18217      5112670 :         if (REX_INT_REGNO_P (recog_data.operand[i])
   18218      5112670 :             || REX2_INT_REGNO_P (recog_data.operand[i])
   18219      5112670 :             || REX_SSE_REGNO_P (recog_data.operand[i]))
   18220            0 :           reg_only = 3 + 1;
   18221              :       }
   18222      2653023 :     else if (MEM_P (recog_data.operand[i]))
   18223              :       {
   18224              :         /* REX2.X or REX2.B bits use 3 byte VEX prefix.  */
   18225      2066312 :         if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
   18226              :           return 4;
   18227              : 
   18228              :         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
   18229      2066053 :         if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
   18230              :           return 3 + 1;
   18231              : 
   18232              :         has_mem = true;
   18233              :       }
   18234              : 
   18235      3217939 :   return has_mem ? 2 + 1 : reg_only;
   18236              : }
   18237              : 
   18238              : 
   18239              : static bool
   18240              : ix86_class_likely_spilled_p (reg_class_t);
   18241              : 
   18242              : /* Returns true if lhs of insn is HW function argument register and set up
   18243              :    is_spilled to true if it is likely spilled HW register.  */
   18244              : static bool
   18245         1149 : insn_is_function_arg (rtx insn, bool* is_spilled)
   18246              : {
   18247         1149 :   rtx dst;
   18248              : 
   18249         1149 :   if (!NONDEBUG_INSN_P (insn))
   18250              :     return false;
   18251              :   /* Call instructions are not movable, ignore it.  */
   18252         1149 :   if (CALL_P (insn))
   18253              :     return false;
   18254         1075 :   insn = PATTERN (insn);
   18255         1075 :   if (GET_CODE (insn) == PARALLEL)
   18256           73 :     insn = XVECEXP (insn, 0, 0);
   18257         1075 :   if (GET_CODE (insn) != SET)
   18258              :     return false;
   18259         1075 :   dst = SET_DEST (insn);
   18260          979 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   18261         1948 :       && ix86_function_arg_regno_p (REGNO (dst)))
   18262              :     {
   18263              :       /* Is it likely spilled HW register?  */
   18264          873 :       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
   18265          873 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
   18266          829 :         *is_spilled = true;
   18267          873 :       return true;
   18268              :     }
   18269              :   return false;
   18270              : }
   18271              : 
   18272              : /* Add output dependencies for chain of function adjacent arguments if only
   18273              :    there is a move to likely spilled HW register.  Return first argument
   18274              :    if at least one dependence was added or NULL otherwise.  */
   18275              : static rtx_insn *
   18276          415 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
   18277              : {
   18278          415 :   rtx_insn *insn;
   18279          415 :   rtx_insn *last = call;
   18280          415 :   rtx_insn *first_arg = NULL;
   18281          415 :   bool is_spilled = false;
   18282              : 
   18283          415 :   head = PREV_INSN (head);
   18284              : 
   18285              :   /* Find nearest to call argument passing instruction.  */
   18286          415 :   while (true)
   18287              :     {
   18288          415 :       last = PREV_INSN (last);
   18289          415 :       if (last == head)
   18290              :         return NULL;
   18291          415 :       if (!NONDEBUG_INSN_P (last))
   18292            0 :         continue;
   18293          415 :       if (insn_is_function_arg (last, &is_spilled))
   18294              :         break;
   18295              :       return NULL;
   18296              :     }
   18297              : 
   18298              :   first_arg = last;
   18299         1054 :   while (true)
   18300              :     {
   18301         1054 :       insn = PREV_INSN (last);
   18302         1054 :       if (!INSN_P (insn))
   18303              :         break;
   18304          956 :       if (insn == head)
   18305              :         break;
   18306          915 :       if (!NONDEBUG_INSN_P (insn))
   18307              :         {
   18308          181 :           last = insn;
   18309          181 :           continue;
   18310              :         }
   18311          734 :       if (insn_is_function_arg (insn, &is_spilled))
   18312              :         {
   18313              :           /* Add output depdendence between two function arguments if chain
   18314              :              of output arguments contains likely spilled HW registers.  */
   18315          466 :           if (is_spilled)
   18316          466 :             add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18317              :           first_arg = last = insn;
   18318              :         }
   18319              :       else
   18320              :         break;
   18321              :     }
   18322          407 :   if (!is_spilled)
   18323              :     return NULL;
   18324              :   return first_arg;
   18325              : }
   18326              : 
   18327              : /* Add output or anti dependency from insn to first_arg to restrict its code
   18328              :    motion.  */
   18329              : static void
   18330         2333 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
   18331              : {
   18332         2333 :   rtx set;
   18333         2333 :   rtx tmp;
   18334              : 
   18335         2333 :   set = single_set (insn);
   18336         2333 :   if (!set)
   18337              :     return;
   18338         1453 :   tmp = SET_DEST (set);
   18339         1453 :   if (REG_P (tmp))
   18340              :     {
   18341              :       /* Add output dependency to the first function argument.  */
   18342         1258 :       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18343         1258 :       return;
   18344              :     }
   18345              :   /* Add anti dependency.  */
   18346          195 :   add_dependence (first_arg, insn, REG_DEP_ANTI);
   18347              : }
   18348              : 
   18349              : /* Avoid cross block motion of function argument through adding dependency
   18350              :    from the first non-jump instruction in bb.  */
   18351              : static void
   18352           68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
   18353              : {
   18354           68 :   rtx_insn *insn = BB_END (bb);
   18355              : 
   18356          134 :   while (insn)
   18357              :     {
   18358          134 :       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
   18359              :         {
   18360           67 :           rtx set = single_set (insn);
   18361           67 :           if (set)
   18362              :             {
   18363           67 :               avoid_func_arg_motion (arg, insn);
   18364           67 :               return;
   18365              :             }
   18366              :         }
   18367           67 :       if (insn == BB_HEAD (bb))
   18368              :         return;
   18369           66 :       insn = PREV_INSN (insn);
   18370              :     }
   18371              : }
   18372              : 
   18373              : /* Hook for pre-reload schedule - avoid motion of function arguments
   18374              :    passed in likely spilled HW registers.  */
   18375              : static void
   18376     10267590 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
   18377              : {
   18378     10267590 :   rtx_insn *insn;
   18379     10267590 :   rtx_insn *first_arg = NULL;
   18380     10267590 :   if (reload_completed)
   18381              :     return;
   18382         2268 :   while (head != tail && DEBUG_INSN_P (head))
   18383          766 :     head = NEXT_INSN (head);
   18384        10883 :   for (insn = tail; insn != head; insn = PREV_INSN (insn))
   18385         9517 :     if (INSN_P (insn) && CALL_P (insn))
   18386              :       {
   18387          415 :         first_arg = add_parameter_dependencies (insn, head);
   18388          415 :         if (first_arg)
   18389              :           {
   18390              :             /* Add dependee for first argument to predecessors if only
   18391              :                region contains more than one block.  */
   18392          407 :             basic_block bb =  BLOCK_FOR_INSN (insn);
   18393          407 :             int rgn = CONTAINING_RGN (bb->index);
   18394          407 :             int nr_blks = RGN_NR_BLOCKS (rgn);
   18395              :             /* Skip trivial regions and region head blocks that can have
   18396              :                predecessors outside of region.  */
   18397          407 :             if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
   18398              :               {
   18399           67 :                 edge e;
   18400           67 :                 edge_iterator ei;
   18401              : 
   18402              :                 /* Regions are SCCs with the exception of selective
   18403              :                    scheduling with pipelining of outer blocks enabled.
   18404              :                    So also check that immediate predecessors of a non-head
   18405              :                    block are in the same region.  */
   18406          137 :                 FOR_EACH_EDGE (e, ei, bb->preds)
   18407              :                   {
   18408              :                     /* Avoid creating of loop-carried dependencies through
   18409              :                        using topological ordering in the region.  */
   18410           70 :                     if (rgn == CONTAINING_RGN (e->src->index)
   18411           69 :                         && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
   18412           68 :                       add_dependee_for_func_arg (first_arg, e->src);
   18413              :                   }
   18414              :               }
   18415          407 :             insn = first_arg;
   18416          407 :             if (insn == head)
   18417              :               break;
   18418              :           }
   18419              :       }
   18420         9102 :     else if (first_arg)
   18421         2266 :       avoid_func_arg_motion (first_arg, insn);
   18422              : }
   18423              : 
   18424              : /* Hook for pre-reload schedule - set priority of moves from likely spilled
   18425              :    HW registers to maximum, to schedule them at soon as possible. These are
   18426              :    moves from function argument registers at the top of the function entry
   18427              :    and moves from function return value registers after call.  */
   18428              : static int
   18429    108819635 : ix86_adjust_priority (rtx_insn *insn, int priority)
   18430              : {
   18431    108819635 :   rtx set;
   18432              : 
   18433    108819635 :   if (reload_completed)
   18434              :     return priority;
   18435              : 
   18436        14748 :   if (!NONDEBUG_INSN_P (insn))
   18437              :     return priority;
   18438              : 
   18439        12690 :   set = single_set (insn);
   18440        12690 :   if (set)
   18441              :     {
   18442        12082 :       rtx tmp = SET_SRC (set);
   18443        12082 :       if (REG_P (tmp)
   18444         2532 :           && HARD_REGISTER_P (tmp)
   18445          500 :           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
   18446        12082 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
   18447          449 :         return current_sched_info->sched_max_insns_priority;
   18448              :     }
   18449              : 
   18450              :   return priority;
   18451              : }
   18452              : 
   18453              : /* Prepare for scheduling pass.  */
   18454              : static void
   18455       966549 : ix86_sched_init_global (FILE *, int, int)
   18456              : {
   18457              :   /* Install scheduling hooks for current CPU.  Some of these hooks are used
   18458              :      in time-critical parts of the scheduler, so we only set them up when
   18459              :      they are actually used.  */
   18460       966549 :   switch (ix86_tune)
   18461              :     {
   18462       920005 :     case PROCESSOR_CORE2:
   18463       920005 :     case PROCESSOR_NEHALEM:
   18464       920005 :     case PROCESSOR_SANDYBRIDGE:
   18465       920005 :     case PROCESSOR_HASWELL:
   18466       920005 :     case PROCESSOR_TREMONT:
   18467       920005 :     case PROCESSOR_ALDERLAKE:
   18468       920005 :     case PROCESSOR_GENERIC:
   18469              :       /* Do not perform multipass scheduling for pre-reload schedule
   18470              :          to save compile time.  */
   18471       920005 :       if (reload_completed)
   18472              :         {
   18473       919518 :           ix86_core2i7_init_hooks ();
   18474       919518 :           break;
   18475              :         }
   18476              :       /* Fall through.  */
   18477        47031 :     default:
   18478        47031 :       targetm.sched.dfa_post_advance_cycle = NULL;
   18479        47031 :       targetm.sched.first_cycle_multipass_init = NULL;
   18480        47031 :       targetm.sched.first_cycle_multipass_begin = NULL;
   18481        47031 :       targetm.sched.first_cycle_multipass_issue = NULL;
   18482        47031 :       targetm.sched.first_cycle_multipass_backtrack = NULL;
   18483        47031 :       targetm.sched.first_cycle_multipass_end = NULL;
   18484        47031 :       targetm.sched.first_cycle_multipass_fini = NULL;
   18485        47031 :       break;
   18486              :     }
   18487       966549 : }
   18488              : 
   18489              : 
   18490              : /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
   18491              : 
   18492              : static HOST_WIDE_INT
   18493       722836 : ix86_static_rtx_alignment (machine_mode mode)
   18494              : {
   18495       722836 :   if (mode == DFmode)
   18496              :     return 64;
   18497              :   if (ALIGN_MODE_128 (mode))
   18498       155308 :     return MAX (128, GET_MODE_ALIGNMENT (mode));
   18499       482507 :   return GET_MODE_ALIGNMENT (mode);
   18500              : }
   18501              : 
   18502              : /* Implement TARGET_CONSTANT_ALIGNMENT.  */
   18503              : 
   18504              : static HOST_WIDE_INT
   18505      6872667 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   18506              : {
   18507      6872667 :   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
   18508              :       || TREE_CODE (exp) == INTEGER_CST)
   18509              :     {
   18510       368397 :       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
   18511       368397 :       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
   18512       368397 :       return MAX (mode_align, align);
   18513              :     }
   18514      6362450 :   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
   18515      9599673 :            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
   18516              :     return BITS_PER_WORD;
   18517              : 
   18518              :   return align;
   18519              : }
   18520              : 
   18521              : /* Implement TARGET_EMPTY_RECORD_P.  */
   18522              : 
   18523              : static bool
   18524   1444410249 : ix86_is_empty_record (const_tree type)
   18525              : {
   18526   1444410249 :   if (!TARGET_64BIT)
   18527              :     return false;
   18528   1413506134 :   return default_is_empty_record (type);
   18529              : }
   18530              : 
   18531              : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
   18532              : 
   18533              : static void
   18534     15211455 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   18535              : {
   18536     15211455 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   18537              : 
   18538     15211455 :   if (!cum->warn_empty)
   18539              :     return;
   18540              : 
   18541     13041017 :   if (!TYPE_EMPTY_P (type))
   18542              :     return;
   18543              : 
   18544              :   /* Don't warn if the function isn't visible outside of the TU.  */
   18545        14636 :   if (cum->decl && !TREE_PUBLIC (cum->decl))
   18546              :     return;
   18547              : 
   18548        13188 :   tree decl = cum->decl;
   18549        13188 :   if (!decl)
   18550              :     /* If we don't know the target, look at the current TU.  */
   18551           39 :     decl = current_function_decl;
   18552              : 
   18553        13188 :   const_tree ctx = get_ultimate_context (decl);
   18554        13188 :   if (ctx == NULL_TREE
   18555        26342 :       || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
   18556              :     return;
   18557              : 
   18558              :   /* If the actual size of the type is zero, then there is no change
   18559              :      in how objects of this size are passed.  */
   18560           72 :   if (int_size_in_bytes (type) == 0)
   18561              :     return;
   18562              : 
   18563           66 :   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
   18564              :            "changes in %<-fabi-version=12%> (GCC 8)", type);
   18565              : 
   18566              :   /* Only warn once.  */
   18567           66 :   cum->warn_empty = false;
   18568              : }
   18569              : 
   18570              : /* This hook returns name of multilib ABI.  */
   18571              : 
   18572              : static const char *
   18573      3407835 : ix86_get_multilib_abi_name (void)
   18574              : {
   18575      3407835 :   if (!(TARGET_64BIT_P (ix86_isa_flags)))
   18576              :     return "i386";
   18577      3363879 :   else if (TARGET_X32_P (ix86_isa_flags))
   18578              :     return "x32";
   18579              :   else
   18580      3363879 :     return "x86_64";
   18581              : }
   18582              : 
   18583              : /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
   18584              :    the data type, and ALIGN is the alignment that the object would
   18585              :    ordinarily have.  */
   18586              : 
   18587              : static int
   18588            0 : iamcu_alignment (tree type, int align)
   18589              : {
   18590            0 :   machine_mode mode;
   18591              : 
   18592            0 :   if (align < 32 || TYPE_USER_ALIGN (type))
   18593              :     return align;
   18594              : 
   18595              :   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
   18596              :      bytes.  */
   18597            0 :   type = strip_array_types (type);
   18598            0 :   if (TYPE_ATOMIC (type))
   18599              :     return align;
   18600              : 
   18601            0 :   mode = TYPE_MODE (type);
   18602            0 :   switch (GET_MODE_CLASS (mode))
   18603              :     {
   18604              :     case MODE_INT:
   18605              :     case MODE_COMPLEX_INT:
   18606              :     case MODE_COMPLEX_FLOAT:
   18607              :     case MODE_FLOAT:
   18608              :     case MODE_DECIMAL_FLOAT:
   18609              :       return 32;
   18610              :     default:
   18611              :       return align;
   18612              :     }
   18613              : }
   18614              : 
   18615              : /* Compute the alignment for a static variable.
   18616              :    TYPE is the data type, and ALIGN is the alignment that
   18617              :    the object would ordinarily have.  The value of this function is used
   18618              :    instead of that alignment to align the object.  */
   18619              : 
   18620              : int
   18621     12041939 : ix86_data_alignment (tree type, unsigned int align, bool opt)
   18622              : {
   18623              :   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
   18624              :      for symbols from other compilation units or symbols that don't need
   18625              :      to bind locally.  In order to preserve some ABI compatibility with
   18626              :      those compilers, ensure we don't decrease alignment from what we
   18627              :      used to assume.  */
   18628              : 
   18629     12041939 :   unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
   18630              : 
   18631              :   /* A data structure, equal or greater than the size of a cache line
   18632              :      (64 bytes in the Pentium 4 and other recent Intel processors, including
   18633              :      processors based on Intel Core microarchitecture) should be aligned
   18634              :      so that its base address is a multiple of a cache line size.  */
   18635              : 
   18636     24083878 :   unsigned int max_align
   18637     12041939 :     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
   18638              : 
   18639     14631717 :   if (max_align < BITS_PER_WORD)
   18640            0 :     max_align = BITS_PER_WORD;
   18641              : 
   18642     12041939 :   switch (ix86_align_data_type)
   18643              :     {
   18644     12041939 :     case ix86_align_data_type_abi: opt = false; break;
   18645     12041919 :     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
   18646              :     case ix86_align_data_type_cacheline: break;
   18647              :     }
   18648              : 
   18649     12041939 :   if (TARGET_IAMCU)
   18650            0 :     align = iamcu_alignment (type, align);
   18651              : 
   18652     12041939 :   if (opt
   18653      5794613 :       && AGGREGATE_TYPE_P (type)
   18654      3716409 :       && TYPE_SIZE (type)
   18655     15758296 :       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
   18656              :     {
   18657      6731441 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
   18658      3716357 :           && align < max_align_compat)
   18659       701273 :         align = max_align_compat;
   18660      7369909 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
   18661      3716357 :           && align < max_align)
   18662        62805 :         align = max_align;
   18663              :     }
   18664              : 
   18665              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18666              :      to 16byte boundary.  */
   18667     12041939 :   if (TARGET_64BIT)
   18668              :     {
   18669      4951679 :       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
   18670      3264940 :           && TYPE_SIZE (type)
   18671      3264878 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18672     10887676 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18673     11503014 :           && align < 128)
   18674       615338 :         return 128;
   18675              :     }
   18676              : 
   18677     11426601 :   if (!opt)
   18678      6052168 :     return align;
   18679              : 
   18680      5374433 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18681              :     {
   18682      1102403 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18683              :         return 64;
   18684      1102403 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18685              :         return 128;
   18686              :     }
   18687      4272030 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18688              :     {
   18689              : 
   18690        12972 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18691              :         return 64;
   18692        12972 :       if ((TYPE_MODE (type) == XCmode
   18693        12972 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18694              :         return 128;
   18695              :     }
   18696      4259058 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18697      4259058 :            && TYPE_FIELDS (type))
   18698              :     {
   18699      2192083 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18700              :         return 64;
   18701      2192083 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18702              :         return 128;
   18703              :     }
   18704      2066975 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18705              :            || TREE_CODE (type) == INTEGER_TYPE)
   18706              :     {
   18707      1918494 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18708              :         return 64;
   18709      1918494 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18710              :         return 128;
   18711              :     }
   18712              : 
   18713      5374320 :   return align;
   18714              : }
   18715              : 
   18716              : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT.  */
   18717              : static void
   18718     31539727 : ix86_lower_local_decl_alignment (tree decl)
   18719              : {
   18720     31539727 :   unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
   18721     31539727 :                                                  DECL_ALIGN (decl), true);
   18722     31539727 :   if (new_align < DECL_ALIGN (decl))
   18723            0 :     SET_DECL_ALIGN (decl, new_align);
   18724     31539727 : }
   18725              : 
   18726              : /* Compute the alignment for a local variable or a stack slot.  EXP is
   18727              :    the data type or decl itself, MODE is the widest mode available and
   18728              :    ALIGN is the alignment that the object would ordinarily have.  The
   18729              :    value of this macro is used instead of that alignment to align the
   18730              :    object.  */
   18731              : 
   18732              : unsigned int
   18733     49009384 : ix86_local_alignment (tree exp, machine_mode mode,
   18734              :                       unsigned int align, bool may_lower)
   18735              : {
   18736     49009384 :   tree type, decl;
   18737              : 
   18738     49009384 :   if (exp && DECL_P (exp))
   18739              :     {
   18740     46852850 :       type = TREE_TYPE (exp);
   18741     46852850 :       decl = exp;
   18742              :     }
   18743              :   else
   18744              :     {
   18745              :       type = exp;
   18746              :       decl = NULL;
   18747              :     }
   18748              : 
   18749              :   /* Don't do dynamic stack realignment for long long objects with
   18750              :      -mpreferred-stack-boundary=2.  */
   18751     49009384 :   if (may_lower
   18752     31539727 :       && !TARGET_64BIT
   18753       249231 :       && align == 64
   18754        38964 :       && ix86_preferred_stack_boundary < 64
   18755            0 :       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
   18756            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18757            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18758     49009384 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18759              :     align = 32;
   18760              : 
   18761              :   /* If TYPE is NULL, we are allocating a stack slot for caller-save
   18762              :      register in MODE.  We will return the largest alignment of XF
   18763              :      and DF.  */
   18764     49009384 :   if (!type)
   18765              :     {
   18766      1408878 :       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
   18767         1479 :         align = GET_MODE_ALIGNMENT (DFmode);
   18768      1408878 :       return align;
   18769              :     }
   18770              : 
   18771              :   /* Don't increase alignment for Intel MCU psABI.  */
   18772     47600506 :   if (TARGET_IAMCU)
   18773              :     return align;
   18774              : 
   18775              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18776              :      to 16byte boundary.  Exact wording is:
   18777              : 
   18778              :      An array uses the same alignment as its elements, except that a local or
   18779              :      global array variable of length at least 16 bytes or
   18780              :      a C99 variable-length array variable always has alignment of at least 16 bytes.
   18781              : 
   18782              :      This was added to allow use of aligned SSE instructions at arrays.  This
   18783              :      rule is meant for static storage (where compiler cannot do the analysis
   18784              :      by itself).  We follow it for automatic variables only when convenient.
   18785              :      We fully control everything in the function compiled and functions from
   18786              :      other unit cannot rely on the alignment.
   18787              : 
   18788              :      Exclude va_list type.  It is the common case of local array where
   18789              :      we cannot benefit from the alignment.
   18790              : 
   18791              :      TODO: Probably one should optimize for size only when var is not escaping.  */
   18792     44766247 :   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
   18793     92011320 :       && TARGET_SSE)
   18794              :     {
   18795     44371143 :       if (AGGREGATE_TYPE_P (type)
   18796      9350305 :           && (va_list_type_node == NULL_TREE
   18797      9350305 :               || (TYPE_MAIN_VARIANT (type)
   18798      9350305 :                   != TYPE_MAIN_VARIANT (va_list_type_node)))
   18799      9251674 :           && TYPE_SIZE (type)
   18800      9251674 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18801     45438313 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18802     51209506 :           && align < 128)
   18803      5771193 :         return 128;
   18804              :     }
   18805     41829313 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18806              :     {
   18807       793242 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18808              :         return 64;
   18809       793242 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18810              :         return 128;
   18811              :     }
   18812     41036071 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18813              :     {
   18814       154326 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18815              :         return 64;
   18816       154326 :       if ((TYPE_MODE (type) == XCmode
   18817       154326 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18818              :         return 128;
   18819              :     }
   18820     40881745 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18821     40881745 :            && TYPE_FIELDS (type))
   18822              :     {
   18823      4760068 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18824              :         return 64;
   18825      4756963 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18826              :         return 128;
   18827              :     }
   18828     36121677 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18829              :            || TREE_CODE (type) == INTEGER_TYPE)
   18830              :     {
   18831              : 
   18832     29774278 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18833              :         return 64;
   18834     29774278 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18835              :         return 128;
   18836              :     }
   18837              :   return align;
   18838              : }
   18839              : 
   18840              : /* Compute the minimum required alignment for dynamic stack realignment
   18841              :    purposes for a local variable, parameter or a stack slot.  EXP is
   18842              :    the data type or decl itself, MODE is its mode and ALIGN is the
   18843              :    alignment that the object would ordinarily have.  */
   18844              : 
   18845              : unsigned int
   18846     47929092 : ix86_minimum_alignment (tree exp, machine_mode mode,
   18847              :                         unsigned int align)
   18848              : {
   18849     47929092 :   tree type, decl;
   18850              : 
   18851     47929092 :   if (exp && DECL_P (exp))
   18852              :     {
   18853     15128780 :       type = TREE_TYPE (exp);
   18854     15128780 :       decl = exp;
   18855              :     }
   18856              :   else
   18857              :     {
   18858              :       type = exp;
   18859              :       decl = NULL;
   18860              :     }
   18861              : 
   18862     47929092 :   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
   18863              :     return align;
   18864              : 
   18865              :   /* Don't do dynamic stack realignment for long long objects with
   18866              :      -mpreferred-stack-boundary=2.  */
   18867            0 :   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
   18868            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18869            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18870            0 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18871              :     {
   18872            0 :       gcc_checking_assert (!TARGET_STV);
   18873              :       return 32;
   18874              :     }
   18875              : 
   18876              :   return align;
   18877              : }
   18878              : 
   18879              : /* Find a location for the static chain incoming to a nested function.
   18880              :    This is a register, unless all free registers are used by arguments.  */
   18881              : 
   18882              : static rtx
   18883       269897 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
   18884              : {
   18885       269897 :   unsigned regno;
   18886              : 
   18887       269897 :   if (TARGET_64BIT)
   18888              :     {
   18889              :       /* We always use R10 in 64-bit mode.  */
   18890              :       regno = R10_REG;
   18891              :     }
   18892              :   else
   18893              :     {
   18894        88535 :       const_tree fntype, fndecl;
   18895        88535 :       unsigned int ccvt;
   18896              : 
   18897              :       /* By default in 32-bit mode we use ECX to pass the static chain.  */
   18898        88535 :       regno = CX_REG;
   18899              : 
   18900        88535 :       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
   18901              :         {
   18902        78559 :           fntype = TREE_TYPE (fndecl_or_type);
   18903        78559 :           fndecl = fndecl_or_type;
   18904              :         }
   18905              :       else
   18906              :         {
   18907              :           fntype = fndecl_or_type;
   18908              :           fndecl = NULL;
   18909              :         }
   18910              : 
   18911        88535 :       ccvt = ix86_get_callcvt (fntype);
   18912        88535 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   18913              :         {
   18914              :           /* Fastcall functions use ecx/edx for arguments, which leaves
   18915              :              us with EAX for the static chain.
   18916              :              Thiscall functions use ecx for arguments, which also
   18917              :              leaves us with EAX for the static chain.  */
   18918              :           regno = AX_REG;
   18919              :         }
   18920        88535 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   18921              :         {
   18922              :           /* Thiscall functions use ecx for arguments, which leaves
   18923              :              us with EAX and EDX for the static chain.
   18924              :              We are using for abi-compatibility EAX.  */
   18925              :           regno = AX_REG;
   18926              :         }
   18927        88535 :       else if (ix86_function_regparm (fntype, fndecl) == 3)
   18928              :         {
   18929              :           /* For regparm 3, we have no free call-clobbered registers in
   18930              :              which to store the static chain.  In order to implement this,
   18931              :              we have the trampoline push the static chain to the stack.
   18932              :              However, we can't push a value below the return address when
   18933              :              we call the nested function directly, so we have to use an
   18934              :              alternate entry point.  For this we use ESI, and have the
   18935              :              alternate entry point push ESI, so that things appear the
   18936              :              same once we're executing the nested function.  */
   18937            0 :           if (incoming_p)
   18938              :             {
   18939            0 :               if (fndecl == current_function_decl
   18940            0 :                   && !ix86_static_chain_on_stack)
   18941              :                 {
   18942            0 :                   gcc_assert (!reload_completed);
   18943            0 :                   ix86_static_chain_on_stack = true;
   18944              :                 }
   18945            0 :               return gen_frame_mem (SImode,
   18946            0 :                                     plus_constant (Pmode,
   18947              :                                                    arg_pointer_rtx, -8));
   18948              :             }
   18949              :           regno = SI_REG;
   18950              :         }
   18951              :     }
   18952              : 
   18953       358445 :   return gen_rtx_REG (Pmode, regno);
   18954              : }
   18955              : 
   18956              : /* Emit RTL insns to initialize the variable parts of a trampoline.
   18957              :    FNDECL is the decl of the target address; M_TRAMP is a MEM for
   18958              :    the trampoline, and CHAIN_VALUE is an RTX for the static chain
   18959              :    to be passed to the target function.  */
   18960              : 
   18961              : static void
   18962          303 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   18963              : {
   18964          303 :   rtx mem, fnaddr;
   18965          303 :   int opcode;
   18966          303 :   int offset = 0;
   18967          303 :   bool need_endbr = (flag_cf_protection & CF_BRANCH);
   18968              : 
   18969          303 :   fnaddr = XEXP (DECL_RTL (fndecl), 0);
   18970              : 
   18971          303 :   if (TARGET_64BIT)
   18972              :     {
   18973          303 :       int size;
   18974              : 
   18975          303 :       if (need_endbr)
   18976              :         {
   18977              :           /* Insert ENDBR64.  */
   18978            1 :           mem = adjust_address (m_tramp, SImode, offset);
   18979            1 :           emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
   18980            1 :           offset += 4;
   18981              :         }
   18982              : 
   18983              :       /* Load the function address to r11.  Try to load address using
   18984              :          the shorter movl instead of movabs.  We may want to support
   18985              :          movq for kernel mode, but kernel does not use trampolines at
   18986              :          the moment.  FNADDR is a 32bit address and may not be in
   18987              :          DImode when ptr_mode == SImode.  Always use movl in this
   18988              :          case.  */
   18989          303 :       if (ptr_mode == SImode
   18990          303 :           || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
   18991              :         {
   18992          271 :           fnaddr = copy_addr_to_reg (fnaddr);
   18993              : 
   18994          271 :           mem = adjust_address (m_tramp, HImode, offset);
   18995          271 :           emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
   18996              : 
   18997          271 :           mem = adjust_address (m_tramp, SImode, offset + 2);
   18998          271 :           emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
   18999          271 :           offset += 6;
   19000              :         }
   19001              :       else
   19002              :         {
   19003           32 :           mem = adjust_address (m_tramp, HImode, offset);
   19004           32 :           emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
   19005              : 
   19006           32 :           mem = adjust_address (m_tramp, DImode, offset + 2);
   19007           32 :           emit_move_insn (mem, fnaddr);
   19008           32 :           offset += 10;
   19009              :         }
   19010              : 
   19011              :       /* Load static chain using movabs to r10.  Use the shorter movl
   19012              :          instead of movabs when ptr_mode == SImode.  */
   19013          303 :       if (ptr_mode == SImode)
   19014              :         {
   19015              :           opcode = 0xba41;
   19016              :           size = 6;
   19017              :         }
   19018              :       else
   19019              :         {
   19020          303 :           opcode = 0xba49;
   19021          303 :           size = 10;
   19022              :         }
   19023              : 
   19024          303 :       mem = adjust_address (m_tramp, HImode, offset);
   19025          303 :       emit_move_insn (mem, gen_int_mode (opcode, HImode));
   19026              : 
   19027          303 :       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
   19028          303 :       emit_move_insn (mem, chain_value);
   19029          303 :       offset += size;
   19030              : 
   19031              :       /* Jump to r11; the last (unused) byte is a nop, only there to
   19032              :          pad the write out to a single 32-bit store.  */
   19033          303 :       mem = adjust_address (m_tramp, SImode, offset);
   19034          303 :       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
   19035          303 :       offset += 4;
   19036              :     }
   19037              :   else
   19038              :     {
   19039            0 :       rtx disp, chain;
   19040              : 
   19041              :       /* Depending on the static chain location, either load a register
   19042              :          with a constant, or push the constant to the stack.  All of the
   19043              :          instructions are the same size.  */
   19044            0 :       chain = ix86_static_chain (fndecl, true);
   19045            0 :       if (REG_P (chain))
   19046              :         {
   19047            0 :           switch (REGNO (chain))
   19048              :             {
   19049              :             case AX_REG:
   19050              :               opcode = 0xb8; break;
   19051            0 :             case CX_REG:
   19052            0 :               opcode = 0xb9; break;
   19053            0 :             default:
   19054            0 :               gcc_unreachable ();
   19055              :             }
   19056              :         }
   19057              :       else
   19058              :         opcode = 0x68;
   19059              : 
   19060            0 :       if (need_endbr)
   19061              :         {
   19062              :           /* Insert ENDBR32.  */
   19063            0 :           mem = adjust_address (m_tramp, SImode, offset);
   19064            0 :           emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
   19065            0 :           offset += 4;
   19066              :         }
   19067              : 
   19068            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19069            0 :       emit_move_insn (mem, gen_int_mode (opcode, QImode));
   19070              : 
   19071            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19072            0 :       emit_move_insn (mem, chain_value);
   19073            0 :       offset += 5;
   19074              : 
   19075            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19076            0 :       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
   19077              : 
   19078            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19079              : 
   19080              :       /* Compute offset from the end of the jmp to the target function.
   19081              :          In the case in which the trampoline stores the static chain on
   19082              :          the stack, we need to skip the first insn which pushes the
   19083              :          (call-saved) register static chain; this push is 1 byte.  */
   19084            0 :       offset += 5;
   19085            0 :       int skip = MEM_P (chain) ? 1 : 0;
   19086              :       /* Skip ENDBR32 at the entry of the target function.  */
   19087            0 :       if (need_endbr
   19088            0 :           && !cgraph_node::get (fndecl)->only_called_directly_p ())
   19089            0 :         skip += 4;
   19090            0 :       disp = expand_binop (SImode, sub_optab, fnaddr,
   19091            0 :                            plus_constant (Pmode, XEXP (m_tramp, 0),
   19092            0 :                                           offset - skip),
   19093              :                            NULL_RTX, 1, OPTAB_DIRECT);
   19094            0 :       emit_move_insn (mem, disp);
   19095              :     }
   19096              : 
   19097          303 :   gcc_assert (offset <= TRAMPOLINE_SIZE);
   19098              : 
   19099              : #ifdef HAVE_ENABLE_EXECUTE_STACK
   19100              : #ifdef CHECK_EXECUTE_STACK_ENABLED
   19101              :   if (CHECK_EXECUTE_STACK_ENABLED)
   19102              : #endif
   19103              :   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
   19104              :                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
   19105              : #endif
   19106          303 : }
   19107              : 
   19108              : static bool
   19109     54047059 : ix86_allocate_stack_slots_for_args (void)
   19110              : {
   19111              :   /* Naked functions should not allocate stack slots for arguments.  */
   19112     54047059 :   return !ix86_function_naked (current_function_decl);
   19113              : }
   19114              : 
   19115              : static bool
   19116     37899932 : ix86_warn_func_return (tree decl)
   19117              : {
   19118              :   /* Naked functions are implemented entirely in assembly, including the
   19119              :      return sequence, so suppress warnings about this.  */
   19120     37899932 :   return !ix86_function_naked (decl);
   19121              : }
   19122              : 
   19123              : /* Return the shift count of a vector by scalar shift builtin second argument
   19124              :    ARG1.  */
   19125              : static tree
   19126        14142 : ix86_vector_shift_count (tree arg1)
   19127              : {
   19128        14142 :   if (tree_fits_uhwi_p (arg1))
   19129              :     return arg1;
   19130         8316 :   else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
   19131              :     {
   19132              :       /* The count argument is weird, passed in as various 128-bit
   19133              :          (or 64-bit) vectors, the low 64 bits from it are the count.  */
   19134          162 :       unsigned char buf[16];
   19135          162 :       int len = native_encode_expr (arg1, buf, 16);
   19136          162 :       if (len == 0)
   19137          162 :         return NULL_TREE;
   19138          162 :       tree t = native_interpret_expr (uint64_type_node, buf, len);
   19139          162 :       if (t && tree_fits_uhwi_p (t))
   19140              :         return t;
   19141              :     }
   19142              :   return NULL_TREE;
   19143              : }
   19144              : 
   19145              : /* Return true if arg_mask is all ones, ELEMS is elements number of
   19146              :    corresponding vector.  */
   19147              : static bool
   19148        25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
   19149              : {
   19150        25042 :   if (TREE_CODE (arg_mask) != INTEGER_CST)
   19151              :     return false;
   19152              : 
   19153         7462 :   unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
   19154         7462 :   if (elems == HOST_BITS_PER_WIDE_INT)
   19155           33 :     return  mask == HOST_WIDE_INT_M1U;
   19156         7429 :   if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
   19157         2681 :     return false;
   19158              : 
   19159              :   return true;
   19160              : }
   19161              : 
   19162              : static tree
   19163     68146688 : ix86_fold_builtin (tree fndecl, int n_args,
   19164              :                    tree *args, bool ignore ATTRIBUTE_UNUSED)
   19165              : {
   19166     68146688 :   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
   19167              :     {
   19168     68146688 :       enum ix86_builtins fn_code
   19169     68146688 :         = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19170     68146688 :       enum rtx_code rcode;
   19171     68146688 :       bool is_vshift;
   19172     68146688 :       enum tree_code tcode;
   19173     68146688 :       bool is_scalar;
   19174     68146688 :       unsigned HOST_WIDE_INT mask;
   19175              : 
   19176     68146688 :       switch (fn_code)
   19177              :         {
   19178         8883 :         case IX86_BUILTIN_CPU_IS:
   19179         8883 :         case IX86_BUILTIN_CPU_SUPPORTS:
   19180         8883 :           gcc_assert (n_args == 1);
   19181         8883 :           return fold_builtin_cpu (fndecl, args);
   19182              : 
   19183        24859 :         case IX86_BUILTIN_NANQ:
   19184        24859 :         case IX86_BUILTIN_NANSQ:
   19185        24859 :           {
   19186        24859 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19187        24859 :             const char *str = c_getstr (*args);
   19188        24859 :             int quiet = fn_code == IX86_BUILTIN_NANQ;
   19189        24859 :             REAL_VALUE_TYPE real;
   19190              : 
   19191        24859 :             if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
   19192        24859 :               return build_real (type, real);
   19193            0 :             return NULL_TREE;
   19194              :           }
   19195              : 
   19196          108 :         case IX86_BUILTIN_INFQ:
   19197          108 :         case IX86_BUILTIN_HUGE_VALQ:
   19198          108 :           {
   19199          108 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19200          108 :             REAL_VALUE_TYPE inf;
   19201          108 :             real_inf (&inf);
   19202          108 :             return build_real (type, inf);
   19203              :           }
   19204              : 
   19205        62447 :         case IX86_BUILTIN_TZCNT16:
   19206        62447 :         case IX86_BUILTIN_CTZS:
   19207        62447 :         case IX86_BUILTIN_TZCNT32:
   19208        62447 :         case IX86_BUILTIN_TZCNT64:
   19209        62447 :           gcc_assert (n_args == 1);
   19210        62447 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19211              :             {
   19212           45 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19213           45 :               tree arg = args[0];
   19214           45 :               if (fn_code == IX86_BUILTIN_TZCNT16
   19215           45 :                   || fn_code == IX86_BUILTIN_CTZS)
   19216            3 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19217           45 :               if (integer_zerop (arg))
   19218            6 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19219              :               else
   19220           39 :                 return fold_const_call (CFN_CTZ, type, arg);
   19221              :             }
   19222              :           break;
   19223              : 
   19224        52002 :         case IX86_BUILTIN_LZCNT16:
   19225        52002 :         case IX86_BUILTIN_CLZS:
   19226        52002 :         case IX86_BUILTIN_LZCNT32:
   19227        52002 :         case IX86_BUILTIN_LZCNT64:
   19228        52002 :           gcc_assert (n_args == 1);
   19229        52002 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19230              :             {
   19231           54 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19232           54 :               tree arg = args[0];
   19233           54 :               if (fn_code == IX86_BUILTIN_LZCNT16
   19234           54 :                   || fn_code == IX86_BUILTIN_CLZS)
   19235           18 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19236           54 :               if (integer_zerop (arg))
   19237            3 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19238              :               else
   19239           51 :                 return fold_const_call (CFN_CLZ, type, arg);
   19240              :             }
   19241              :           break;
   19242              : 
   19243        61227 :         case IX86_BUILTIN_BEXTR32:
   19244        61227 :         case IX86_BUILTIN_BEXTR64:
   19245        61227 :         case IX86_BUILTIN_BEXTRI32:
   19246        61227 :         case IX86_BUILTIN_BEXTRI64:
   19247        61227 :           gcc_assert (n_args == 2);
   19248        61227 :           if (tree_fits_uhwi_p (args[1]))
   19249              :             {
   19250          152 :               unsigned HOST_WIDE_INT res = 0;
   19251          152 :               unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
   19252          152 :               unsigned int start = tree_to_uhwi (args[1]);
   19253          152 :               unsigned int len = (start & 0xff00) >> 8;
   19254          152 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19255          152 :               start &= 0xff;
   19256          152 :               if (start >= prec || len == 0)
   19257          111 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19258              :                                          args[0]);
   19259           41 :               else if (!tree_fits_uhwi_p (args[0]))
   19260              :                 break;
   19261              :               else
   19262           24 :                 res = tree_to_uhwi (args[0]) >> start;
   19263           24 :               if (len > prec)
   19264              :                 len = prec;
   19265           24 :               if (len < HOST_BITS_PER_WIDE_INT)
   19266           15 :                 res &= (HOST_WIDE_INT_1U << len) - 1;
   19267           24 :               return build_int_cstu (lhs_type, res);
   19268              :             }
   19269              :           break;
   19270              : 
   19271        21034 :         case IX86_BUILTIN_BZHI32:
   19272        21034 :         case IX86_BUILTIN_BZHI64:
   19273        21034 :           gcc_assert (n_args == 2);
   19274        21034 :           if (tree_fits_uhwi_p (args[1]))
   19275              :             {
   19276          190 :               unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
   19277          190 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19278          190 :               if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
   19279              :                 return args[0];
   19280          190 :               if (idx == 0)
   19281           52 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19282              :                                          args[0]);
   19283          138 :               if (!tree_fits_uhwi_p (args[0]))
   19284              :                 break;
   19285           12 :               unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
   19286           12 :               res &= ~(HOST_WIDE_INT_M1U << idx);
   19287           12 :               return build_int_cstu (lhs_type, res);
   19288              :             }
   19289              :           break;
   19290              : 
   19291        20792 :         case IX86_BUILTIN_PDEP32:
   19292        20792 :         case IX86_BUILTIN_PDEP64:
   19293        20792 :           gcc_assert (n_args == 2);
   19294        20792 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19295              :             {
   19296           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19297           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19298           46 :               unsigned HOST_WIDE_INT res = 0;
   19299           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19300         2990 :               for (m = 1; m; m <<= 1)
   19301         2944 :                 if ((mask & m) != 0)
   19302              :                   {
   19303         1440 :                     if ((src & k) != 0)
   19304          789 :                       res |= m;
   19305         1440 :                     k <<= 1;
   19306              :                   }
   19307           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19308              :             }
   19309              :           break;
   19310              : 
   19311        20794 :         case IX86_BUILTIN_PEXT32:
   19312        20794 :         case IX86_BUILTIN_PEXT64:
   19313        20794 :           gcc_assert (n_args == 2);
   19314        20794 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19315              :             {
   19316           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19317           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19318           46 :               unsigned HOST_WIDE_INT res = 0;
   19319           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19320         2990 :               for (m = 1; m; m <<= 1)
   19321         2944 :                 if ((mask & m) != 0)
   19322              :                   {
   19323         2016 :                     if ((src & m) != 0)
   19324         1063 :                       res |= k;
   19325         2016 :                     k <<= 1;
   19326              :                   }
   19327           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19328              :             }
   19329              :           break;
   19330              : 
   19331       100841 :         case IX86_BUILTIN_MOVMSKPS:
   19332       100841 :         case IX86_BUILTIN_PMOVMSKB:
   19333       100841 :         case IX86_BUILTIN_MOVMSKPD:
   19334       100841 :         case IX86_BUILTIN_PMOVMSKB128:
   19335       100841 :         case IX86_BUILTIN_MOVMSKPD256:
   19336       100841 :         case IX86_BUILTIN_MOVMSKPS256:
   19337       100841 :         case IX86_BUILTIN_PMOVMSKB256:
   19338       100841 :           gcc_assert (n_args == 1);
   19339       100841 :           if (TREE_CODE (args[0]) == VECTOR_CST)
   19340              :             {
   19341              :               HOST_WIDE_INT res = 0;
   19342         1492 :               for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
   19343              :                 {
   19344         1242 :                   tree e = VECTOR_CST_ELT (args[0], i);
   19345         1242 :                   if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
   19346              :                     {
   19347          624 :                       if (wi::neg_p (wi::to_wide (e)))
   19348          575 :                         res |= HOST_WIDE_INT_1 << i;
   19349              :                     }
   19350          618 :                   else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
   19351              :                     {
   19352          618 :                       if (TREE_REAL_CST (e).sign)
   19353          517 :                         res |= HOST_WIDE_INT_1 << i;
   19354              :                     }
   19355              :                   else
   19356              :                     return NULL_TREE;
   19357              :                 }
   19358          250 :               return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19359              :             }
   19360              :           break;
   19361              : 
   19362       659648 :         case IX86_BUILTIN_PSLLD:
   19363       659648 :         case IX86_BUILTIN_PSLLD128:
   19364       659648 :         case IX86_BUILTIN_PSLLD128_MASK:
   19365       659648 :         case IX86_BUILTIN_PSLLD256:
   19366       659648 :         case IX86_BUILTIN_PSLLD256_MASK:
   19367       659648 :         case IX86_BUILTIN_PSLLD512:
   19368       659648 :         case IX86_BUILTIN_PSLLDI:
   19369       659648 :         case IX86_BUILTIN_PSLLDI128:
   19370       659648 :         case IX86_BUILTIN_PSLLDI128_MASK:
   19371       659648 :         case IX86_BUILTIN_PSLLDI256:
   19372       659648 :         case IX86_BUILTIN_PSLLDI256_MASK:
   19373       659648 :         case IX86_BUILTIN_PSLLDI512:
   19374       659648 :         case IX86_BUILTIN_PSLLQ:
   19375       659648 :         case IX86_BUILTIN_PSLLQ128:
   19376       659648 :         case IX86_BUILTIN_PSLLQ128_MASK:
   19377       659648 :         case IX86_BUILTIN_PSLLQ256:
   19378       659648 :         case IX86_BUILTIN_PSLLQ256_MASK:
   19379       659648 :         case IX86_BUILTIN_PSLLQ512:
   19380       659648 :         case IX86_BUILTIN_PSLLQI:
   19381       659648 :         case IX86_BUILTIN_PSLLQI128:
   19382       659648 :         case IX86_BUILTIN_PSLLQI128_MASK:
   19383       659648 :         case IX86_BUILTIN_PSLLQI256:
   19384       659648 :         case IX86_BUILTIN_PSLLQI256_MASK:
   19385       659648 :         case IX86_BUILTIN_PSLLQI512:
   19386       659648 :         case IX86_BUILTIN_PSLLW:
   19387       659648 :         case IX86_BUILTIN_PSLLW128:
   19388       659648 :         case IX86_BUILTIN_PSLLW128_MASK:
   19389       659648 :         case IX86_BUILTIN_PSLLW256:
   19390       659648 :         case IX86_BUILTIN_PSLLW256_MASK:
   19391       659648 :         case IX86_BUILTIN_PSLLW512_MASK:
   19392       659648 :         case IX86_BUILTIN_PSLLWI:
   19393       659648 :         case IX86_BUILTIN_PSLLWI128:
   19394       659648 :         case IX86_BUILTIN_PSLLWI128_MASK:
   19395       659648 :         case IX86_BUILTIN_PSLLWI256:
   19396       659648 :         case IX86_BUILTIN_PSLLWI256_MASK:
   19397       659648 :         case IX86_BUILTIN_PSLLWI512_MASK:
   19398       659648 :           rcode = ASHIFT;
   19399       659648 :           is_vshift = false;
   19400       659648 :           goto do_shift;
   19401       601251 :         case IX86_BUILTIN_PSRAD:
   19402       601251 :         case IX86_BUILTIN_PSRAD128:
   19403       601251 :         case IX86_BUILTIN_PSRAD128_MASK:
   19404       601251 :         case IX86_BUILTIN_PSRAD256:
   19405       601251 :         case IX86_BUILTIN_PSRAD256_MASK:
   19406       601251 :         case IX86_BUILTIN_PSRAD512:
   19407       601251 :         case IX86_BUILTIN_PSRADI:
   19408       601251 :         case IX86_BUILTIN_PSRADI128:
   19409       601251 :         case IX86_BUILTIN_PSRADI128_MASK:
   19410       601251 :         case IX86_BUILTIN_PSRADI256:
   19411       601251 :         case IX86_BUILTIN_PSRADI256_MASK:
   19412       601251 :         case IX86_BUILTIN_PSRADI512:
   19413       601251 :         case IX86_BUILTIN_PSRAQ128_MASK:
   19414       601251 :         case IX86_BUILTIN_PSRAQ256_MASK:
   19415       601251 :         case IX86_BUILTIN_PSRAQ512:
   19416       601251 :         case IX86_BUILTIN_PSRAQI128_MASK:
   19417       601251 :         case IX86_BUILTIN_PSRAQI256_MASK:
   19418       601251 :         case IX86_BUILTIN_PSRAQI512:
   19419       601251 :         case IX86_BUILTIN_PSRAW:
   19420       601251 :         case IX86_BUILTIN_PSRAW128:
   19421       601251 :         case IX86_BUILTIN_PSRAW128_MASK:
   19422       601251 :         case IX86_BUILTIN_PSRAW256:
   19423       601251 :         case IX86_BUILTIN_PSRAW256_MASK:
   19424       601251 :         case IX86_BUILTIN_PSRAW512:
   19425       601251 :         case IX86_BUILTIN_PSRAWI:
   19426       601251 :         case IX86_BUILTIN_PSRAWI128:
   19427       601251 :         case IX86_BUILTIN_PSRAWI128_MASK:
   19428       601251 :         case IX86_BUILTIN_PSRAWI256:
   19429       601251 :         case IX86_BUILTIN_PSRAWI256_MASK:
   19430       601251 :         case IX86_BUILTIN_PSRAWI512:
   19431       601251 :           rcode = ASHIFTRT;
   19432       601251 :           is_vshift = false;
   19433       601251 :           goto do_shift;
   19434       633525 :         case IX86_BUILTIN_PSRLD:
   19435       633525 :         case IX86_BUILTIN_PSRLD128:
   19436       633525 :         case IX86_BUILTIN_PSRLD128_MASK:
   19437       633525 :         case IX86_BUILTIN_PSRLD256:
   19438       633525 :         case IX86_BUILTIN_PSRLD256_MASK:
   19439       633525 :         case IX86_BUILTIN_PSRLD512:
   19440       633525 :         case IX86_BUILTIN_PSRLDI:
   19441       633525 :         case IX86_BUILTIN_PSRLDI128:
   19442       633525 :         case IX86_BUILTIN_PSRLDI128_MASK:
   19443       633525 :         case IX86_BUILTIN_PSRLDI256:
   19444       633525 :         case IX86_BUILTIN_PSRLDI256_MASK:
   19445       633525 :         case IX86_BUILTIN_PSRLDI512:
   19446       633525 :         case IX86_BUILTIN_PSRLQ:
   19447       633525 :         case IX86_BUILTIN_PSRLQ128:
   19448       633525 :         case IX86_BUILTIN_PSRLQ128_MASK:
   19449       633525 :         case IX86_BUILTIN_PSRLQ256:
   19450       633525 :         case IX86_BUILTIN_PSRLQ256_MASK:
   19451       633525 :         case IX86_BUILTIN_PSRLQ512:
   19452       633525 :         case IX86_BUILTIN_PSRLQI:
   19453       633525 :         case IX86_BUILTIN_PSRLQI128:
   19454       633525 :         case IX86_BUILTIN_PSRLQI128_MASK:
   19455       633525 :         case IX86_BUILTIN_PSRLQI256:
   19456       633525 :         case IX86_BUILTIN_PSRLQI256_MASK:
   19457       633525 :         case IX86_BUILTIN_PSRLQI512:
   19458       633525 :         case IX86_BUILTIN_PSRLW:
   19459       633525 :         case IX86_BUILTIN_PSRLW128:
   19460       633525 :         case IX86_BUILTIN_PSRLW128_MASK:
   19461       633525 :         case IX86_BUILTIN_PSRLW256:
   19462       633525 :         case IX86_BUILTIN_PSRLW256_MASK:
   19463       633525 :         case IX86_BUILTIN_PSRLW512:
   19464       633525 :         case IX86_BUILTIN_PSRLWI:
   19465       633525 :         case IX86_BUILTIN_PSRLWI128:
   19466       633525 :         case IX86_BUILTIN_PSRLWI128_MASK:
   19467       633525 :         case IX86_BUILTIN_PSRLWI256:
   19468       633525 :         case IX86_BUILTIN_PSRLWI256_MASK:
   19469       633525 :         case IX86_BUILTIN_PSRLWI512:
   19470       633525 :           rcode = LSHIFTRT;
   19471       633525 :           is_vshift = false;
   19472       633525 :           goto do_shift;
   19473       276009 :         case IX86_BUILTIN_PSLLVV16HI:
   19474       276009 :         case IX86_BUILTIN_PSLLVV16SI:
   19475       276009 :         case IX86_BUILTIN_PSLLVV2DI:
   19476       276009 :         case IX86_BUILTIN_PSLLVV2DI_MASK:
   19477       276009 :         case IX86_BUILTIN_PSLLVV32HI:
   19478       276009 :         case IX86_BUILTIN_PSLLVV4DI:
   19479       276009 :         case IX86_BUILTIN_PSLLVV4DI_MASK:
   19480       276009 :         case IX86_BUILTIN_PSLLVV4SI:
   19481       276009 :         case IX86_BUILTIN_PSLLVV4SI_MASK:
   19482       276009 :         case IX86_BUILTIN_PSLLVV8DI:
   19483       276009 :         case IX86_BUILTIN_PSLLVV8HI:
   19484       276009 :         case IX86_BUILTIN_PSLLVV8SI:
   19485       276009 :         case IX86_BUILTIN_PSLLVV8SI_MASK:
   19486       276009 :           rcode = ASHIFT;
   19487       276009 :           is_vshift = true;
   19488       276009 :           goto do_shift;
   19489       275588 :         case IX86_BUILTIN_PSRAVQ128:
   19490       275588 :         case IX86_BUILTIN_PSRAVQ256:
   19491       275588 :         case IX86_BUILTIN_PSRAVV16HI:
   19492       275588 :         case IX86_BUILTIN_PSRAVV16SI:
   19493       275588 :         case IX86_BUILTIN_PSRAVV32HI:
   19494       275588 :         case IX86_BUILTIN_PSRAVV4SI:
   19495       275588 :         case IX86_BUILTIN_PSRAVV4SI_MASK:
   19496       275588 :         case IX86_BUILTIN_PSRAVV8DI:
   19497       275588 :         case IX86_BUILTIN_PSRAVV8HI:
   19498       275588 :         case IX86_BUILTIN_PSRAVV8SI:
   19499       275588 :         case IX86_BUILTIN_PSRAVV8SI_MASK:
   19500       275588 :           rcode = ASHIFTRT;
   19501       275588 :           is_vshift = true;
   19502       275588 :           goto do_shift;
   19503       276000 :         case IX86_BUILTIN_PSRLVV16HI:
   19504       276000 :         case IX86_BUILTIN_PSRLVV16SI:
   19505       276000 :         case IX86_BUILTIN_PSRLVV2DI:
   19506       276000 :         case IX86_BUILTIN_PSRLVV2DI_MASK:
   19507       276000 :         case IX86_BUILTIN_PSRLVV32HI:
   19508       276000 :         case IX86_BUILTIN_PSRLVV4DI:
   19509       276000 :         case IX86_BUILTIN_PSRLVV4DI_MASK:
   19510       276000 :         case IX86_BUILTIN_PSRLVV4SI:
   19511       276000 :         case IX86_BUILTIN_PSRLVV4SI_MASK:
   19512       276000 :         case IX86_BUILTIN_PSRLVV8DI:
   19513       276000 :         case IX86_BUILTIN_PSRLVV8HI:
   19514       276000 :         case IX86_BUILTIN_PSRLVV8SI:
   19515       276000 :         case IX86_BUILTIN_PSRLVV8SI_MASK:
   19516       276000 :           rcode = LSHIFTRT;
   19517       276000 :           is_vshift = true;
   19518       276000 :           goto do_shift;
   19519              : 
   19520      2722021 :         do_shift:
   19521      2722021 :           gcc_assert (n_args >= 2);
   19522      2722021 :           if (TREE_CODE (args[0]) != VECTOR_CST)
   19523              :             break;
   19524          927 :           mask = HOST_WIDE_INT_M1U;
   19525          927 :           if (n_args > 2)
   19526              :             {
   19527              :               /* This is masked shift.  */
   19528          678 :               if (!tree_fits_uhwi_p (args[n_args - 1])
   19529          678 :                   || TREE_SIDE_EFFECTS (args[n_args - 2]))
   19530              :                 break;
   19531          678 :               mask = tree_to_uhwi (args[n_args - 1]);
   19532          678 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19533          678 :               mask |= HOST_WIDE_INT_M1U << elems;
   19534          678 :               if (mask != HOST_WIDE_INT_M1U
   19535          567 :                   && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
   19536              :                 break;
   19537          633 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19538              :                 return args[n_args - 2];
   19539              :             }
   19540          879 :           if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
   19541              :             break;
   19542          879 :           if (tree tem = (is_vshift ? integer_one_node
   19543          879 :                           : ix86_vector_shift_count (args[1])))
   19544              :             {
   19545          558 :               unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
   19546          558 :               unsigned HOST_WIDE_INT prec
   19547          558 :                 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
   19548          558 :               if (count == 0 && mask == HOST_WIDE_INT_M1U)
   19549              :                 return args[0];
   19550          558 :               if (count >= prec)
   19551              :                 {
   19552           72 :                   if (rcode == ASHIFTRT)
   19553           27 :                     count = prec - 1;
   19554           45 :                   else if (mask == HOST_WIDE_INT_M1U)
   19555            3 :                     return build_zero_cst (TREE_TYPE (args[0]));
   19556              :                 }
   19557          555 :               tree countt = NULL_TREE;
   19558          555 :               if (!is_vshift)
   19559              :                 {
   19560          377 :                   if (count >= prec)
   19561           42 :                     countt = integer_zero_node;
   19562              :                   else
   19563          335 :                     countt = build_int_cst (integer_type_node, count);
   19564              :                 }
   19565          555 :               tree_vector_builder builder;
   19566          555 :               if (mask != HOST_WIDE_INT_M1U || is_vshift)
   19567          392 :                 builder.new_vector (TREE_TYPE (args[0]),
   19568          784 :                                     TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
   19569              :                                     1);
   19570              :               else
   19571          163 :                 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
   19572              :                                              false);
   19573          555 :               unsigned int cnt = builder.encoded_nelts ();
   19574         5967 :               for (unsigned int i = 0; i < cnt; ++i)
   19575              :                 {
   19576         5412 :                   tree elt = VECTOR_CST_ELT (args[0], i);
   19577         5412 :                   if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
   19578            0 :                     return NULL_TREE;
   19579         5412 :                   tree type = TREE_TYPE (elt);
   19580         5412 :                   if (rcode == LSHIFTRT)
   19581         2040 :                     elt = fold_convert (unsigned_type_for (type), elt);
   19582         5412 :                   if (is_vshift)
   19583              :                     {
   19584         1846 :                       countt = VECTOR_CST_ELT (args[1], i);
   19585         1846 :                       if (TREE_CODE (countt) != INTEGER_CST
   19586         1846 :                           || TREE_OVERFLOW (countt))
   19587              :                         return NULL_TREE;
   19588         1846 :                       if (wi::neg_p (wi::to_wide (countt))
   19589         3610 :                           || wi::to_widest (countt) >= prec)
   19590              :                         {
   19591          325 :                           if (rcode == ASHIFTRT)
   19592          108 :                             countt = build_int_cst (TREE_TYPE (countt),
   19593          108 :                                                     prec - 1);
   19594              :                           else
   19595              :                             {
   19596          217 :                               elt = build_zero_cst (TREE_TYPE (elt));
   19597          217 :                               countt = build_zero_cst (TREE_TYPE (countt));
   19598              :                             }
   19599              :                         }
   19600              :                     }
   19601         3566 :                   else if (count >= prec)
   19602          504 :                     elt = build_zero_cst (TREE_TYPE (elt));
   19603         8950 :                   elt = const_binop (rcode == ASHIFT
   19604              :                                      ? LSHIFT_EXPR : RSHIFT_EXPR,
   19605         5412 :                                      TREE_TYPE (elt), elt, countt);
   19606         5412 :                   if (!elt || TREE_CODE (elt) != INTEGER_CST)
   19607              :                     return NULL_TREE;
   19608         5412 :                   if (rcode == LSHIFTRT)
   19609         2040 :                     elt = fold_convert (type, elt);
   19610         5412 :                   if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
   19611              :                     {
   19612         1566 :                       elt = VECTOR_CST_ELT (args[n_args - 2], i);
   19613         1566 :                       if (TREE_CODE (elt) != INTEGER_CST
   19614         1566 :                           || TREE_OVERFLOW (elt))
   19615              :                         return NULL_TREE;
   19616              :                     }
   19617         5412 :                   builder.quick_push (elt);
   19618              :                 }
   19619          555 :               return builder.build ();
   19620          555 :             }
   19621              :           break;
   19622              : 
   19623        32718 :         case IX86_BUILTIN_MINSS:
   19624        32718 :         case IX86_BUILTIN_MINSH_MASK:
   19625        32718 :           tcode = LT_EXPR;
   19626        32718 :           is_scalar = true;
   19627        32718 :           goto do_minmax;
   19628              : 
   19629        32718 :         case IX86_BUILTIN_MAXSS:
   19630        32718 :         case IX86_BUILTIN_MAXSH_MASK:
   19631        32718 :           tcode = GT_EXPR;
   19632        32718 :           is_scalar = true;
   19633        32718 :           goto do_minmax;
   19634              : 
   19635       350576 :         case IX86_BUILTIN_MINPS:
   19636       350576 :         case IX86_BUILTIN_MINPD:
   19637       350576 :         case IX86_BUILTIN_MINPS256:
   19638       350576 :         case IX86_BUILTIN_MINPD256:
   19639       350576 :         case IX86_BUILTIN_MINPS512:
   19640       350576 :         case IX86_BUILTIN_MINPD512:
   19641       350576 :         case IX86_BUILTIN_MINPS128_MASK:
   19642       350576 :         case IX86_BUILTIN_MINPD128_MASK:
   19643       350576 :         case IX86_BUILTIN_MINPS256_MASK:
   19644       350576 :         case IX86_BUILTIN_MINPD256_MASK:
   19645       350576 :         case IX86_BUILTIN_MINPH128_MASK:
   19646       350576 :         case IX86_BUILTIN_MINPH256_MASK:
   19647       350576 :         case IX86_BUILTIN_MINPH512_MASK:
   19648       350576 :           tcode = LT_EXPR;
   19649       350576 :           is_scalar = false;
   19650       350576 :           goto do_minmax;
   19651              : 
   19652              :         case IX86_BUILTIN_MAXPS:
   19653              :         case IX86_BUILTIN_MAXPD:
   19654              :         case IX86_BUILTIN_MAXPS256:
   19655              :         case IX86_BUILTIN_MAXPD256:
   19656              :         case IX86_BUILTIN_MAXPS512:
   19657              :         case IX86_BUILTIN_MAXPD512:
   19658              :         case IX86_BUILTIN_MAXPS128_MASK:
   19659              :         case IX86_BUILTIN_MAXPD128_MASK:
   19660              :         case IX86_BUILTIN_MAXPS256_MASK:
   19661              :         case IX86_BUILTIN_MAXPD256_MASK:
   19662              :         case IX86_BUILTIN_MAXPH128_MASK:
   19663              :         case IX86_BUILTIN_MAXPH256_MASK:
   19664              :         case IX86_BUILTIN_MAXPH512_MASK:
   19665              :           tcode = GT_EXPR;
   19666              :           is_scalar = false;
   19667       766608 :         do_minmax:
   19668       766608 :           gcc_assert (n_args >= 2);
   19669       766608 :           if (TREE_CODE (args[0]) != VECTOR_CST
   19670           76 :               || TREE_CODE (args[1]) != VECTOR_CST)
   19671              :             break;
   19672           76 :           mask = HOST_WIDE_INT_M1U;
   19673           76 :           if (n_args > 2)
   19674              :             {
   19675           36 :               gcc_assert (n_args >= 4);
   19676              :               /* This is masked minmax.  */
   19677           36 :               if (TREE_CODE (args[3]) != INTEGER_CST
   19678           36 :                   || TREE_SIDE_EFFECTS (args[2]))
   19679              :                 break;
   19680           36 :               mask = TREE_INT_CST_LOW (args[3]);
   19681           36 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19682           36 :               mask |= HOST_WIDE_INT_M1U << elems;
   19683           36 :               if (mask != HOST_WIDE_INT_M1U
   19684           32 :                   && TREE_CODE (args[2]) != VECTOR_CST)
   19685              :                 break;
   19686           36 :               if (n_args >= 5)
   19687              :                 {
   19688           20 :                   if (!tree_fits_uhwi_p (args[4]))
   19689              :                     break;
   19690           20 :                   if (tree_to_uhwi (args[4]) != 4
   19691            0 :                       && tree_to_uhwi (args[4]) != 8)
   19692              :                     break;
   19693              :                 }
   19694           36 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19695              :                 return args[2];
   19696              :             }
   19697              :           /* Punt on NaNs, unless exceptions are disabled.  */
   19698           76 :           if (HONOR_NANS (args[0])
   19699           76 :               && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
   19700          184 :             for (int i = 0; i < 2; ++i)
   19701              :               {
   19702          134 :                 unsigned count = vector_cst_encoded_nelts (args[i]);
   19703          957 :                 for (unsigned j = 0; j < count; ++j)
   19704          849 :                   if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
   19705              :                     return NULL_TREE;
   19706              :               }
   19707           50 :           {
   19708           50 :             tree res = const_binop (tcode,
   19709           50 :                                     truth_type_for (TREE_TYPE (args[0])),
   19710              :                                     args[0], args[1]);
   19711           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19712              :               break;
   19713           50 :             res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
   19714              :                                 args[0], args[1]);
   19715           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19716              :               break;
   19717           50 :             if (mask != HOST_WIDE_INT_M1U)
   19718              :               {
   19719           32 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19720           32 :                 vec_perm_builder sel (nelts, nelts, 1);
   19721          328 :                 for (unsigned int i = 0; i < nelts; i++)
   19722          296 :                   if (mask & (HOST_WIDE_INT_1U << i))
   19723          160 :                     sel.quick_push (i);
   19724              :                   else
   19725          136 :                     sel.quick_push (nelts + i);
   19726           32 :                 vec_perm_indices indices (sel, 2, nelts);
   19727           32 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
   19728              :                                      indices);
   19729           32 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19730              :                   break;
   19731           32 :               }
   19732           50 :             if (is_scalar)
   19733              :               {
   19734           10 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19735           10 :                 vec_perm_builder sel (nelts, nelts, 1);
   19736           10 :                 sel.quick_push (0);
   19737           40 :                 for (unsigned int i = 1; i < nelts; i++)
   19738           30 :                   sel.quick_push (nelts + i);
   19739           10 :                 vec_perm_indices indices (sel, 2, nelts);
   19740           10 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
   19741              :                                      indices);
   19742           10 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19743              :                   break;
   19744           10 :               }
   19745           50 :             return res;
   19746              :           }
   19747              : 
   19748              :         default:
   19749              :           break;
   19750              :         }
   19751              :     }
   19752              : 
   19753              : #ifdef SUBTARGET_FOLD_BUILTIN
   19754              :   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
   19755              : #endif
   19756              : 
   19757              :   return NULL_TREE;
   19758              : }
   19759              : 
   19760              : /* Fold a MD builtin (use ix86_fold_builtin for folding into
   19761              :    constant) in GIMPLE.  */
   19762              : 
   19763              : bool
   19764      1121511 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   19765              : {
   19766      1121511 :   gimple *stmt = gsi_stmt (*gsi), *g;
   19767      1121511 :   gimple_seq stmts = NULL;
   19768      1121511 :   tree fndecl = gimple_call_fndecl (stmt);
   19769      1121511 :   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   19770      1121511 :   int n_args = gimple_call_num_args (stmt);
   19771      1121511 :   enum ix86_builtins fn_code
   19772      1121511 :     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19773      1121511 :   tree decl = NULL_TREE;
   19774      1121511 :   tree arg0, arg1, arg2;
   19775      1121511 :   enum rtx_code rcode;
   19776      1121511 :   enum tree_code tcode;
   19777      1121511 :   unsigned HOST_WIDE_INT count;
   19778      1121511 :   bool is_vshift;
   19779      1121511 :   unsigned HOST_WIDE_INT elems;
   19780      1121511 :   location_t loc;
   19781              : 
   19782              :   /* Don't fold when there's isa mismatch.  */
   19783      1121511 :   if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
   19784              :     return false;
   19785              : 
   19786      1121384 :   switch (fn_code)
   19787              :     {
   19788          288 :     case IX86_BUILTIN_TZCNT32:
   19789          288 :       decl = builtin_decl_implicit (BUILT_IN_CTZ);
   19790          288 :       goto fold_tzcnt_lzcnt;
   19791              : 
   19792          237 :     case IX86_BUILTIN_TZCNT64:
   19793          237 :       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
   19794          237 :       goto fold_tzcnt_lzcnt;
   19795              : 
   19796          215 :     case IX86_BUILTIN_LZCNT32:
   19797          215 :       decl = builtin_decl_implicit (BUILT_IN_CLZ);
   19798          215 :       goto fold_tzcnt_lzcnt;
   19799              : 
   19800          224 :     case IX86_BUILTIN_LZCNT64:
   19801          224 :       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
   19802          224 :       goto fold_tzcnt_lzcnt;
   19803              : 
   19804          964 :     fold_tzcnt_lzcnt:
   19805          964 :       gcc_assert (n_args == 1);
   19806          964 :       arg0 = gimple_call_arg (stmt, 0);
   19807          964 :       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
   19808              :         {
   19809          799 :           int prec = TYPE_PRECISION (TREE_TYPE (arg0));
   19810              :           /* If arg0 is provably non-zero, optimize into generic
   19811              :              __builtin_c[tl]z{,ll} function the middle-end handles
   19812              :              better.  */
   19813          799 :           if (!expr_not_equal_to (arg0, wi::zero (prec)))
   19814              :             return false;
   19815              : 
   19816            9 :           loc = gimple_location (stmt);
   19817            9 :           g = gimple_build_call (decl, 1, arg0);
   19818            9 :           gimple_set_location (g, loc);
   19819            9 :           tree lhs = make_ssa_name (integer_type_node);
   19820            9 :           gimple_call_set_lhs (g, lhs);
   19821            9 :           gsi_insert_before (gsi, g, GSI_SAME_STMT);
   19822            9 :           g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
   19823            9 :           gimple_set_location (g, loc);
   19824            9 :           gsi_replace (gsi, g, false);
   19825            9 :           return true;
   19826              :         }
   19827              :       break;
   19828              : 
   19829          491 :     case IX86_BUILTIN_BZHI32:
   19830          491 :     case IX86_BUILTIN_BZHI64:
   19831          491 :       gcc_assert (n_args == 2);
   19832          491 :       arg1 = gimple_call_arg (stmt, 1);
   19833          491 :       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
   19834              :         {
   19835          195 :           unsigned int idx = tree_to_uhwi (arg1) & 0xff;
   19836          195 :           arg0 = gimple_call_arg (stmt, 0);
   19837          195 :           if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
   19838              :             break;
   19839           31 :           loc = gimple_location (stmt);
   19840           31 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19841           31 :           gimple_set_location (g, loc);
   19842           31 :           gsi_replace (gsi, g, false);
   19843           31 :           return true;
   19844              :         }
   19845              :       break;
   19846              : 
   19847          502 :     case IX86_BUILTIN_PDEP32:
   19848          502 :     case IX86_BUILTIN_PDEP64:
   19849          502 :     case IX86_BUILTIN_PEXT32:
   19850          502 :     case IX86_BUILTIN_PEXT64:
   19851          502 :       gcc_assert (n_args == 2);
   19852          502 :       arg1 = gimple_call_arg (stmt, 1);
   19853          502 :       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
   19854              :         {
   19855            4 :           loc = gimple_location (stmt);
   19856            4 :           arg0 = gimple_call_arg (stmt, 0);
   19857            4 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19858            4 :           gimple_set_location (g, loc);
   19859            4 :           gsi_replace (gsi, g, false);
   19860            4 :           return true;
   19861              :         }
   19862              :       break;
   19863              : 
   19864          145 :     case IX86_BUILTIN_PBLENDVB256:
   19865          145 :     case IX86_BUILTIN_BLENDVPS256:
   19866          145 :     case IX86_BUILTIN_BLENDVPD256:
   19867              :       /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
   19868              :          to scalar operations and not combined back.  */
   19869          145 :       if (!TARGET_AVX2)
   19870              :         break;
   19871              : 
   19872              :       /* FALLTHRU.  */
   19873          112 :     case IX86_BUILTIN_BLENDVPD:
   19874              :       /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
   19875              :          w/o sse4.2, it's veclowered to scalar operations and
   19876              :          not combined back.  */
   19877          112 :       if (!TARGET_SSE4_2)
   19878              :         break;
   19879              :       /* FALLTHRU.  */
   19880          166 :     case IX86_BUILTIN_PBLENDVB128:
   19881          166 :     case IX86_BUILTIN_BLENDVPS:
   19882          166 :       gcc_assert (n_args == 3);
   19883          166 :       arg0 = gimple_call_arg (stmt, 0);
   19884          166 :       arg1 = gimple_call_arg (stmt, 1);
   19885          166 :       arg2 = gimple_call_arg (stmt, 2);
   19886          166 :       if (gimple_call_lhs (stmt))
   19887              :         {
   19888          166 :           loc = gimple_location (stmt);
   19889          166 :           tree type = TREE_TYPE (arg2);
   19890          166 :           if (VECTOR_FLOAT_TYPE_P (type))
   19891              :             {
   19892           73 :               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
   19893           73 :                 ? intSI_type_node : intDI_type_node;
   19894           73 :               type = get_same_sized_vectype (itype, type);
   19895              :             }
   19896              :           else
   19897           93 :             type = signed_type_for (type);
   19898          166 :           arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
   19899          166 :           tree zero_vec = build_zero_cst (type);
   19900          166 :           tree cmp_type = truth_type_for (type);
   19901          166 :           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
   19902          166 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19903          166 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19904              :                                    VEC_COND_EXPR, cmp,
   19905              :                                    arg1, arg0);
   19906          166 :           gimple_set_location (g, loc);
   19907          166 :           gsi_replace (gsi, g, false);
   19908              :         }
   19909              :       else
   19910            0 :         gsi_replace (gsi, gimple_build_nop (), false);
   19911              :       return true;
   19912              : 
   19913              : 
   19914           16 :     case IX86_BUILTIN_PCMPEQB128:
   19915           16 :     case IX86_BUILTIN_PCMPEQW128:
   19916           16 :     case IX86_BUILTIN_PCMPEQD128:
   19917           16 :     case IX86_BUILTIN_PCMPEQQ:
   19918           16 :     case IX86_BUILTIN_PCMPEQB256:
   19919           16 :     case IX86_BUILTIN_PCMPEQW256:
   19920           16 :     case IX86_BUILTIN_PCMPEQD256:
   19921           16 :     case IX86_BUILTIN_PCMPEQQ256:
   19922           16 :       tcode = EQ_EXPR;
   19923           16 :       goto do_cmp;
   19924              : 
   19925              :     case IX86_BUILTIN_PCMPGTB128:
   19926              :     case IX86_BUILTIN_PCMPGTW128:
   19927              :     case IX86_BUILTIN_PCMPGTD128:
   19928              :     case IX86_BUILTIN_PCMPGTQ:
   19929              :     case IX86_BUILTIN_PCMPGTB256:
   19930              :     case IX86_BUILTIN_PCMPGTW256:
   19931              :     case IX86_BUILTIN_PCMPGTD256:
   19932              :     case IX86_BUILTIN_PCMPGTQ256:
   19933              :       tcode = GT_EXPR;
   19934              : 
   19935           33 :     do_cmp:
   19936           33 :       gcc_assert (n_args == 2);
   19937           33 :       arg0 = gimple_call_arg (stmt, 0);
   19938           33 :       arg1 = gimple_call_arg (stmt, 1);
   19939           33 :       if (gimple_call_lhs (stmt))
   19940              :         {
   19941           32 :           loc = gimple_location (stmt);
   19942           32 :           tree type = TREE_TYPE (arg0);
   19943           32 :           tree zero_vec = build_zero_cst (type);
   19944           32 :           tree minus_one_vec = build_minus_one_cst (type);
   19945           32 :           tree cmp_type = truth_type_for (type);
   19946           32 :           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
   19947           32 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19948           32 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19949              :                                    VEC_COND_EXPR, cmp,
   19950              :                                    minus_one_vec, zero_vec);
   19951           32 :           gimple_set_location (g, loc);
   19952           32 :           gsi_replace (gsi, g, false);
   19953              :         }
   19954              :       else
   19955            1 :         gsi_replace (gsi, gimple_build_nop (), false);
   19956              :       return true;
   19957              : 
   19958         9297 :     case IX86_BUILTIN_PSLLD:
   19959         9297 :     case IX86_BUILTIN_PSLLD128:
   19960         9297 :     case IX86_BUILTIN_PSLLD128_MASK:
   19961         9297 :     case IX86_BUILTIN_PSLLD256:
   19962         9297 :     case IX86_BUILTIN_PSLLD256_MASK:
   19963         9297 :     case IX86_BUILTIN_PSLLD512:
   19964         9297 :     case IX86_BUILTIN_PSLLDI:
   19965         9297 :     case IX86_BUILTIN_PSLLDI128:
   19966         9297 :     case IX86_BUILTIN_PSLLDI128_MASK:
   19967         9297 :     case IX86_BUILTIN_PSLLDI256:
   19968         9297 :     case IX86_BUILTIN_PSLLDI256_MASK:
   19969         9297 :     case IX86_BUILTIN_PSLLDI512:
   19970         9297 :     case IX86_BUILTIN_PSLLQ:
   19971         9297 :     case IX86_BUILTIN_PSLLQ128:
   19972         9297 :     case IX86_BUILTIN_PSLLQ128_MASK:
   19973         9297 :     case IX86_BUILTIN_PSLLQ256:
   19974         9297 :     case IX86_BUILTIN_PSLLQ256_MASK:
   19975         9297 :     case IX86_BUILTIN_PSLLQ512:
   19976         9297 :     case IX86_BUILTIN_PSLLQI:
   19977         9297 :     case IX86_BUILTIN_PSLLQI128:
   19978         9297 :     case IX86_BUILTIN_PSLLQI128_MASK:
   19979         9297 :     case IX86_BUILTIN_PSLLQI256:
   19980         9297 :     case IX86_BUILTIN_PSLLQI256_MASK:
   19981         9297 :     case IX86_BUILTIN_PSLLQI512:
   19982         9297 :     case IX86_BUILTIN_PSLLW:
   19983         9297 :     case IX86_BUILTIN_PSLLW128:
   19984         9297 :     case IX86_BUILTIN_PSLLW128_MASK:
   19985         9297 :     case IX86_BUILTIN_PSLLW256:
   19986         9297 :     case IX86_BUILTIN_PSLLW256_MASK:
   19987         9297 :     case IX86_BUILTIN_PSLLW512_MASK:
   19988         9297 :     case IX86_BUILTIN_PSLLWI:
   19989         9297 :     case IX86_BUILTIN_PSLLWI128:
   19990         9297 :     case IX86_BUILTIN_PSLLWI128_MASK:
   19991         9297 :     case IX86_BUILTIN_PSLLWI256:
   19992         9297 :     case IX86_BUILTIN_PSLLWI256_MASK:
   19993         9297 :     case IX86_BUILTIN_PSLLWI512_MASK:
   19994         9297 :       rcode = ASHIFT;
   19995         9297 :       is_vshift = false;
   19996         9297 :       goto do_shift;
   19997         6495 :     case IX86_BUILTIN_PSRAD:
   19998         6495 :     case IX86_BUILTIN_PSRAD128:
   19999         6495 :     case IX86_BUILTIN_PSRAD128_MASK:
   20000         6495 :     case IX86_BUILTIN_PSRAD256:
   20001         6495 :     case IX86_BUILTIN_PSRAD256_MASK:
   20002         6495 :     case IX86_BUILTIN_PSRAD512:
   20003         6495 :     case IX86_BUILTIN_PSRADI:
   20004         6495 :     case IX86_BUILTIN_PSRADI128:
   20005         6495 :     case IX86_BUILTIN_PSRADI128_MASK:
   20006         6495 :     case IX86_BUILTIN_PSRADI256:
   20007         6495 :     case IX86_BUILTIN_PSRADI256_MASK:
   20008         6495 :     case IX86_BUILTIN_PSRADI512:
   20009         6495 :     case IX86_BUILTIN_PSRAQ128_MASK:
   20010         6495 :     case IX86_BUILTIN_PSRAQ256_MASK:
   20011         6495 :     case IX86_BUILTIN_PSRAQ512:
   20012         6495 :     case IX86_BUILTIN_PSRAQI128_MASK:
   20013         6495 :     case IX86_BUILTIN_PSRAQI256_MASK:
   20014         6495 :     case IX86_BUILTIN_PSRAQI512:
   20015         6495 :     case IX86_BUILTIN_PSRAW:
   20016         6495 :     case IX86_BUILTIN_PSRAW128:
   20017         6495 :     case IX86_BUILTIN_PSRAW128_MASK:
   20018         6495 :     case IX86_BUILTIN_PSRAW256:
   20019         6495 :     case IX86_BUILTIN_PSRAW256_MASK:
   20020         6495 :     case IX86_BUILTIN_PSRAW512:
   20021         6495 :     case IX86_BUILTIN_PSRAWI:
   20022         6495 :     case IX86_BUILTIN_PSRAWI128:
   20023         6495 :     case IX86_BUILTIN_PSRAWI128_MASK:
   20024         6495 :     case IX86_BUILTIN_PSRAWI256:
   20025         6495 :     case IX86_BUILTIN_PSRAWI256_MASK:
   20026         6495 :     case IX86_BUILTIN_PSRAWI512:
   20027         6495 :       rcode = ASHIFTRT;
   20028         6495 :       is_vshift = false;
   20029         6495 :       goto do_shift;
   20030         7960 :     case IX86_BUILTIN_PSRLD:
   20031         7960 :     case IX86_BUILTIN_PSRLD128:
   20032         7960 :     case IX86_BUILTIN_PSRLD128_MASK:
   20033         7960 :     case IX86_BUILTIN_PSRLD256:
   20034         7960 :     case IX86_BUILTIN_PSRLD256_MASK:
   20035         7960 :     case IX86_BUILTIN_PSRLD512:
   20036         7960 :     case IX86_BUILTIN_PSRLDI:
   20037         7960 :     case IX86_BUILTIN_PSRLDI128:
   20038         7960 :     case IX86_BUILTIN_PSRLDI128_MASK:
   20039         7960 :     case IX86_BUILTIN_PSRLDI256:
   20040         7960 :     case IX86_BUILTIN_PSRLDI256_MASK:
   20041         7960 :     case IX86_BUILTIN_PSRLDI512:
   20042         7960 :     case IX86_BUILTIN_PSRLQ:
   20043         7960 :     case IX86_BUILTIN_PSRLQ128:
   20044         7960 :     case IX86_BUILTIN_PSRLQ128_MASK:
   20045         7960 :     case IX86_BUILTIN_PSRLQ256:
   20046         7960 :     case IX86_BUILTIN_PSRLQ256_MASK:
   20047         7960 :     case IX86_BUILTIN_PSRLQ512:
   20048         7960 :     case IX86_BUILTIN_PSRLQI:
   20049         7960 :     case IX86_BUILTIN_PSRLQI128:
   20050         7960 :     case IX86_BUILTIN_PSRLQI128_MASK:
   20051         7960 :     case IX86_BUILTIN_PSRLQI256:
   20052         7960 :     case IX86_BUILTIN_PSRLQI256_MASK:
   20053         7960 :     case IX86_BUILTIN_PSRLQI512:
   20054         7960 :     case IX86_BUILTIN_PSRLW:
   20055         7960 :     case IX86_BUILTIN_PSRLW128:
   20056         7960 :     case IX86_BUILTIN_PSRLW128_MASK:
   20057         7960 :     case IX86_BUILTIN_PSRLW256:
   20058         7960 :     case IX86_BUILTIN_PSRLW256_MASK:
   20059         7960 :     case IX86_BUILTIN_PSRLW512:
   20060         7960 :     case IX86_BUILTIN_PSRLWI:
   20061         7960 :     case IX86_BUILTIN_PSRLWI128:
   20062         7960 :     case IX86_BUILTIN_PSRLWI128_MASK:
   20063         7960 :     case IX86_BUILTIN_PSRLWI256:
   20064         7960 :     case IX86_BUILTIN_PSRLWI256_MASK:
   20065         7960 :     case IX86_BUILTIN_PSRLWI512:
   20066         7960 :       rcode = LSHIFTRT;
   20067         7960 :       is_vshift = false;
   20068         7960 :       goto do_shift;
   20069         2384 :     case IX86_BUILTIN_PSLLVV16HI:
   20070         2384 :     case IX86_BUILTIN_PSLLVV16SI:
   20071         2384 :     case IX86_BUILTIN_PSLLVV2DI:
   20072         2384 :     case IX86_BUILTIN_PSLLVV2DI_MASK:
   20073         2384 :     case IX86_BUILTIN_PSLLVV32HI:
   20074         2384 :     case IX86_BUILTIN_PSLLVV4DI:
   20075         2384 :     case IX86_BUILTIN_PSLLVV4DI_MASK:
   20076         2384 :     case IX86_BUILTIN_PSLLVV4SI:
   20077         2384 :     case IX86_BUILTIN_PSLLVV4SI_MASK:
   20078         2384 :     case IX86_BUILTIN_PSLLVV8DI:
   20079         2384 :     case IX86_BUILTIN_PSLLVV8HI:
   20080         2384 :     case IX86_BUILTIN_PSLLVV8SI:
   20081         2384 :     case IX86_BUILTIN_PSLLVV8SI_MASK:
   20082         2384 :       rcode = ASHIFT;
   20083         2384 :       is_vshift = true;
   20084         2384 :       goto do_shift;
   20085         2341 :     case IX86_BUILTIN_PSRAVQ128:
   20086         2341 :     case IX86_BUILTIN_PSRAVQ256:
   20087         2341 :     case IX86_BUILTIN_PSRAVV16HI:
   20088         2341 :     case IX86_BUILTIN_PSRAVV16SI:
   20089         2341 :     case IX86_BUILTIN_PSRAVV32HI:
   20090         2341 :     case IX86_BUILTIN_PSRAVV4SI:
   20091         2341 :     case IX86_BUILTIN_PSRAVV4SI_MASK:
   20092         2341 :     case IX86_BUILTIN_PSRAVV8DI:
   20093         2341 :     case IX86_BUILTIN_PSRAVV8HI:
   20094         2341 :     case IX86_BUILTIN_PSRAVV8SI:
   20095         2341 :     case IX86_BUILTIN_PSRAVV8SI_MASK:
   20096         2341 :       rcode = ASHIFTRT;
   20097         2341 :       is_vshift = true;
   20098         2341 :       goto do_shift;
   20099         2380 :     case IX86_BUILTIN_PSRLVV16HI:
   20100         2380 :     case IX86_BUILTIN_PSRLVV16SI:
   20101         2380 :     case IX86_BUILTIN_PSRLVV2DI:
   20102         2380 :     case IX86_BUILTIN_PSRLVV2DI_MASK:
   20103         2380 :     case IX86_BUILTIN_PSRLVV32HI:
   20104         2380 :     case IX86_BUILTIN_PSRLVV4DI:
   20105         2380 :     case IX86_BUILTIN_PSRLVV4DI_MASK:
   20106         2380 :     case IX86_BUILTIN_PSRLVV4SI:
   20107         2380 :     case IX86_BUILTIN_PSRLVV4SI_MASK:
   20108         2380 :     case IX86_BUILTIN_PSRLVV8DI:
   20109         2380 :     case IX86_BUILTIN_PSRLVV8HI:
   20110         2380 :     case IX86_BUILTIN_PSRLVV8SI:
   20111         2380 :     case IX86_BUILTIN_PSRLVV8SI_MASK:
   20112         2380 :       rcode = LSHIFTRT;
   20113         2380 :       is_vshift = true;
   20114         2380 :       goto do_shift;
   20115              : 
   20116        30857 :     do_shift:
   20117        30857 :       gcc_assert (n_args >= 2);
   20118        30857 :       if (!gimple_call_lhs (stmt))
   20119              :         {
   20120            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20121            1 :           return true;
   20122              :         }
   20123        30856 :       arg0 = gimple_call_arg (stmt, 0);
   20124        30856 :       arg1 = gimple_call_arg (stmt, 1);
   20125        30856 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20126              :       /* For masked shift, only optimize if the mask is all ones.  */
   20127        30856 :       if (n_args > 2
   20128        30856 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20129              :         break;
   20130        16081 :       if (is_vshift)
   20131              :         {
   20132         2640 :           if (TREE_CODE (arg1) != VECTOR_CST)
   20133              :             break;
   20134           69 :           count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
   20135           69 :           if (integer_zerop (arg1))
   20136           27 :             count = 0;
   20137           42 :           else if (rcode == ASHIFTRT)
   20138              :             break;
   20139              :           else
   20140          230 :             for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
   20141              :               {
   20142          212 :                 tree elt = VECTOR_CST_ELT (arg1, i);
   20143          212 :                 if (!wi::neg_p (wi::to_wide (elt))
   20144          375 :                     && wi::to_widest (elt) < count)
   20145           16 :                   return false;
   20146              :               }
   20147              :         }
   20148              :       else
   20149              :         {
   20150        13441 :           arg1 = ix86_vector_shift_count (arg1);
   20151        13441 :           if (!arg1)
   20152              :             break;
   20153         5608 :           count = tree_to_uhwi (arg1);
   20154              :         }
   20155         5653 :       if (count == 0)
   20156              :         {
   20157              :           /* Just return the first argument for shift by 0.  */
   20158           93 :           loc = gimple_location (stmt);
   20159           93 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   20160           93 :           gimple_set_location (g, loc);
   20161           93 :           gsi_replace (gsi, g, false);
   20162           93 :           return true;
   20163              :         }
   20164         5560 :       if (rcode != ASHIFTRT
   20165         5560 :           && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
   20166              :         {
   20167              :           /* For shift counts equal or greater than precision, except for
   20168              :              arithmetic right shift the result is zero.  */
   20169           78 :           loc = gimple_location (stmt);
   20170           78 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20171           78 :                                    build_zero_cst (TREE_TYPE (arg0)));
   20172           78 :           gimple_set_location (g, loc);
   20173           78 :           gsi_replace (gsi, g, false);
   20174           78 :           return true;
   20175              :         }
   20176              :       break;
   20177              : 
   20178          531 :     case IX86_BUILTIN_SHUFPD512:
   20179          531 :     case IX86_BUILTIN_SHUFPS512:
   20180          531 :     case IX86_BUILTIN_SHUFPD:
   20181          531 :     case IX86_BUILTIN_SHUFPD256:
   20182          531 :     case IX86_BUILTIN_SHUFPS:
   20183          531 :     case IX86_BUILTIN_SHUFPS256:
   20184          531 :       arg0 = gimple_call_arg (stmt, 0);
   20185          531 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20186              :       /* This is masked shuffle.  Only optimize if the mask is all ones.  */
   20187          531 :       if (n_args > 3
   20188          895 :           && !ix86_masked_all_ones (elems,
   20189          364 :                                     gimple_call_arg (stmt, n_args - 1)))
   20190              :         break;
   20191          203 :       arg2 = gimple_call_arg (stmt, 2);
   20192          203 :       if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
   20193              :         {
   20194          146 :           unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
   20195              :           /* Check valid imm, refer to gcc.target/i386/testimm-10.c.  */
   20196          146 :           if (shuffle_mask > 255)
   20197              :             return false;
   20198              : 
   20199          144 :           machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
   20200          144 :           loc = gimple_location (stmt);
   20201          144 :           tree itype = (imode == E_DFmode
   20202          144 :                         ? long_long_integer_type_node : integer_type_node);
   20203          144 :           tree vtype = build_vector_type (itype, elems);
   20204          144 :           tree_vector_builder elts (vtype, elems, 1);
   20205              : 
   20206              : 
   20207              :           /* Transform integer shuffle_mask to vector perm_mask which
   20208              :              is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md.  */
   20209          840 :           for (unsigned i = 0; i != elems; i++)
   20210              :             {
   20211          696 :               unsigned sel_idx;
   20212              :               /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
   20213              :                  provide 2 select constrols for each element of the
   20214              :                  destination.  */
   20215          696 :               if (imode == E_DFmode)
   20216          240 :                 sel_idx = (i & 1) * elems + (i & ~1)
   20217          240 :                           + ((shuffle_mask >> i) & 1);
   20218              :               else
   20219              :                 {
   20220              :                   /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
   20221              :                      controls for each element of the destination.  */
   20222          456 :                   unsigned j = i % 4;
   20223          456 :                   sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
   20224          456 :                             + ((shuffle_mask >> 2 * j) & 3);
   20225              :                 }
   20226          696 :               elts.quick_push (build_int_cst (itype, sel_idx));
   20227              :             }
   20228              : 
   20229          144 :           tree perm_mask = elts.build ();
   20230          144 :           arg1 = gimple_call_arg (stmt, 1);
   20231          144 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20232              :                                    VEC_PERM_EXPR,
   20233              :                                    arg0, arg1, perm_mask);
   20234          144 :           gimple_set_location (g, loc);
   20235          144 :           gsi_replace (gsi, g, false);
   20236          144 :           return true;
   20237          144 :         }
   20238              :       // Do not error yet, the constant could be propagated later?
   20239              :       break;
   20240              : 
   20241           48 :     case IX86_BUILTIN_PABSB:
   20242           48 :     case IX86_BUILTIN_PABSW:
   20243           48 :     case IX86_BUILTIN_PABSD:
   20244              :       /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
   20245           48 :       if (!TARGET_MMX_WITH_SSE)
   20246              :         break;
   20247              :       /* FALLTHRU.  */
   20248         2189 :     case IX86_BUILTIN_PABSB128:
   20249         2189 :     case IX86_BUILTIN_PABSB256:
   20250         2189 :     case IX86_BUILTIN_PABSB512:
   20251         2189 :     case IX86_BUILTIN_PABSW128:
   20252         2189 :     case IX86_BUILTIN_PABSW256:
   20253         2189 :     case IX86_BUILTIN_PABSW512:
   20254         2189 :     case IX86_BUILTIN_PABSD128:
   20255         2189 :     case IX86_BUILTIN_PABSD256:
   20256         2189 :     case IX86_BUILTIN_PABSD512:
   20257         2189 :     case IX86_BUILTIN_PABSQ128:
   20258         2189 :     case IX86_BUILTIN_PABSQ256:
   20259         2189 :     case IX86_BUILTIN_PABSQ512:
   20260         2189 :     case IX86_BUILTIN_PABSB128_MASK:
   20261         2189 :     case IX86_BUILTIN_PABSB256_MASK:
   20262         2189 :     case IX86_BUILTIN_PABSW128_MASK:
   20263         2189 :     case IX86_BUILTIN_PABSW256_MASK:
   20264         2189 :     case IX86_BUILTIN_PABSD128_MASK:
   20265         2189 :     case IX86_BUILTIN_PABSD256_MASK:
   20266         2189 :       gcc_assert (n_args >= 1);
   20267         2189 :       if (!gimple_call_lhs (stmt))
   20268              :         {
   20269            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20270            1 :           return true;
   20271              :         }
   20272         2188 :       arg0 = gimple_call_arg (stmt, 0);
   20273         2188 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20274              :       /* For masked ABS, only optimize if the mask is all ones.  */
   20275         2188 :       if (n_args > 1
   20276         2188 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20277              :         break;
   20278          228 :       {
   20279          228 :         tree utype, ures, vce;
   20280          228 :         utype = unsigned_type_for (TREE_TYPE (arg0));
   20281              :         /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
   20282              :            instead of ABS_EXPR to handle overflow case(TYPE_MIN).  */
   20283          228 :         ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
   20284          228 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20285          228 :         loc = gimple_location (stmt);
   20286          228 :         vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
   20287          228 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20288              :                                  VIEW_CONVERT_EXPR, vce);
   20289          228 :         gsi_replace (gsi, g, false);
   20290              :       }
   20291          228 :       return true;
   20292              : 
   20293         2225 :     case IX86_BUILTIN_MINPS:
   20294         2225 :     case IX86_BUILTIN_MINPD:
   20295         2225 :     case IX86_BUILTIN_MINPS256:
   20296         2225 :     case IX86_BUILTIN_MINPD256:
   20297         2225 :     case IX86_BUILTIN_MINPS512:
   20298         2225 :     case IX86_BUILTIN_MINPD512:
   20299         2225 :     case IX86_BUILTIN_MINPS128_MASK:
   20300         2225 :     case IX86_BUILTIN_MINPD128_MASK:
   20301         2225 :     case IX86_BUILTIN_MINPS256_MASK:
   20302         2225 :     case IX86_BUILTIN_MINPD256_MASK:
   20303         2225 :     case IX86_BUILTIN_MINPH128_MASK:
   20304         2225 :     case IX86_BUILTIN_MINPH256_MASK:
   20305         2225 :     case IX86_BUILTIN_MINPH512_MASK:
   20306         2225 :       tcode = LT_EXPR;
   20307         2225 :       goto do_minmax;
   20308              : 
   20309              :     case IX86_BUILTIN_MAXPS:
   20310              :     case IX86_BUILTIN_MAXPD:
   20311              :     case IX86_BUILTIN_MAXPS256:
   20312              :     case IX86_BUILTIN_MAXPD256:
   20313              :     case IX86_BUILTIN_MAXPS512:
   20314              :     case IX86_BUILTIN_MAXPD512:
   20315              :     case IX86_BUILTIN_MAXPS128_MASK:
   20316              :     case IX86_BUILTIN_MAXPD128_MASK:
   20317              :     case IX86_BUILTIN_MAXPS256_MASK:
   20318              :     case IX86_BUILTIN_MAXPD256_MASK:
   20319              :     case IX86_BUILTIN_MAXPH128_MASK:
   20320              :     case IX86_BUILTIN_MAXPH256_MASK:
   20321              :     case IX86_BUILTIN_MAXPH512_MASK:
   20322              :       tcode = GT_EXPR;
   20323         4435 :     do_minmax:
   20324         4435 :       gcc_assert (n_args >= 2);
   20325              :       /* Without SSE4.1 we often aren't able to pattern match it back to the
   20326              :          desired instruction.  */
   20327         4435 :       if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
   20328              :         break;
   20329         3865 :       arg0 = gimple_call_arg (stmt, 0);
   20330         3865 :       arg1 = gimple_call_arg (stmt, 1);
   20331         3865 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20332              :       /* For masked minmax, only optimize if the mask is all ones.  */
   20333         3865 :       if (n_args > 2
   20334         3865 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
   20335              :         break;
   20336          647 :       if (n_args >= 5)
   20337              :         {
   20338          436 :           tree arg4 = gimple_call_arg (stmt, 4);
   20339          436 :           if (!tree_fits_uhwi_p (arg4))
   20340              :             break;
   20341          424 :           if (tree_to_uhwi (arg4) == 4)
   20342              :             /* Ok.  */;
   20343          416 :           else if (tree_to_uhwi (arg4) != 8)
   20344              :             /* Invalid round argument.  */
   20345              :             break;
   20346          416 :           else if (HONOR_NANS (arg0))
   20347              :             /* Lowering to comparison would raise exceptions which
   20348              :                shouldn't be raised.  */
   20349              :             break;
   20350              :         }
   20351          219 :       {
   20352          219 :         tree type = truth_type_for (TREE_TYPE (arg0));
   20353          219 :         tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
   20354          219 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20355          219 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20356              :                                  VEC_COND_EXPR, cmpres, arg0, arg1);
   20357          219 :         gsi_replace (gsi, g, false);
   20358              :       }
   20359          219 :       return true;
   20360              : 
   20361              :     default:
   20362              :       break;
   20363              :     }
   20364              : 
   20365              :   return false;
   20366              : }
   20367              : 
   20368              : /* Handler for an SVML-style interface to
   20369              :    a library with vectorized intrinsics.  */
   20370              : 
   20371              : tree
   20372           10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
   20373              : {
   20374           10 :   char name[20];
   20375           10 :   tree fntype, new_fndecl, args;
   20376           10 :   unsigned arity;
   20377           10 :   const char *bname;
   20378           10 :   machine_mode el_mode, in_mode;
   20379           10 :   int n, in_n;
   20380              : 
   20381              :   /* The SVML is suitable for unsafe math only.  */
   20382           10 :   if (!flag_unsafe_math_optimizations)
   20383              :     return NULL_TREE;
   20384              : 
   20385           10 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20386           10 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20387           10 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20388           10 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20389           10 :   if (el_mode != in_mode
   20390           10 :       || n != in_n)
   20391              :     return NULL_TREE;
   20392              : 
   20393           10 :   switch (fn)
   20394              :     {
   20395           10 :     CASE_CFN_EXP:
   20396           10 :     CASE_CFN_LOG:
   20397           10 :     CASE_CFN_LOG10:
   20398           10 :     CASE_CFN_POW:
   20399           10 :     CASE_CFN_TANH:
   20400           10 :     CASE_CFN_TAN:
   20401           10 :     CASE_CFN_ATAN:
   20402           10 :     CASE_CFN_ATAN2:
   20403           10 :     CASE_CFN_ATANH:
   20404           10 :     CASE_CFN_CBRT:
   20405           10 :     CASE_CFN_SINH:
   20406           10 :     CASE_CFN_SIN:
   20407           10 :     CASE_CFN_ASINH:
   20408           10 :     CASE_CFN_ASIN:
   20409           10 :     CASE_CFN_COSH:
   20410           10 :     CASE_CFN_COS:
   20411           10 :     CASE_CFN_ACOSH:
   20412           10 :     CASE_CFN_ACOS:
   20413           10 :       if ((el_mode != DFmode || n != 2)
   20414            8 :           && (el_mode != SFmode || n != 4))
   20415              :         return NULL_TREE;
   20416            6 :       break;
   20417              : 
   20418              :     default:
   20419              :       return NULL_TREE;
   20420              :     }
   20421              : 
   20422            6 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20423              :                                  ? double_type_node : float_type_node, fn);
   20424            6 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20425              : 
   20426            6 :   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
   20427            2 :     strcpy (name, "vmlsLn4");
   20428            4 :   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
   20429            0 :     strcpy (name, "vmldLn2");
   20430            4 :   else if (n == 4)
   20431              :     {
   20432            2 :       sprintf (name, "vmls%s", bname+10);
   20433            2 :       name[strlen (name)-1] = '4';
   20434              :     }
   20435              :   else
   20436            2 :     sprintf (name, "vmld%s2", bname+10);
   20437              : 
   20438              :   /* Convert to uppercase. */
   20439            6 :   name[4] &= ~0x20;
   20440              : 
   20441            6 :   arity = 0;
   20442            6 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20443            0 :     arity++;
   20444              : 
   20445            6 :   if (arity == 1)
   20446            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20447              :   else
   20448            6 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20449              : 
   20450              :   /* Build a function declaration for the vectorized function.  */
   20451            6 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20452              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20453            6 :   TREE_PUBLIC (new_fndecl) = 1;
   20454            6 :   DECL_EXTERNAL (new_fndecl) = 1;
   20455            6 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20456            6 :   TREE_READONLY (new_fndecl) = 1;
   20457              : 
   20458            6 :   return new_fndecl;
   20459              : }
   20460              : 
   20461              : /* Handler for an ACML-style interface to
   20462              :    a library with vectorized intrinsics.  */
   20463              : 
   20464              : tree
   20465            3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
   20466              : {
   20467            3 :   char name[20] = "__vr.._";
   20468            3 :   tree fntype, new_fndecl, args;
   20469            3 :   unsigned arity;
   20470            3 :   const char *bname;
   20471            3 :   machine_mode el_mode, in_mode;
   20472            3 :   int n, in_n;
   20473              : 
   20474              :   /* The ACML is 64bits only and suitable for unsafe math only as
   20475              :      it does not correctly support parts of IEEE with the required
   20476              :      precision such as denormals.  */
   20477            3 :   if (!TARGET_64BIT
   20478            3 :       || !flag_unsafe_math_optimizations)
   20479              :     return NULL_TREE;
   20480              : 
   20481            3 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20482            3 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20483            3 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20484            3 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20485            3 :   if (el_mode != in_mode
   20486            3 :       || n != in_n)
   20487              :     return NULL_TREE;
   20488              : 
   20489            3 :   switch (fn)
   20490              :     {
   20491            3 :     CASE_CFN_SIN:
   20492            3 :     CASE_CFN_COS:
   20493            3 :     CASE_CFN_EXP:
   20494            3 :     CASE_CFN_LOG:
   20495            3 :     CASE_CFN_LOG2:
   20496            3 :     CASE_CFN_LOG10:
   20497            3 :       if (el_mode == DFmode && n == 2)
   20498              :         {
   20499            3 :           name[4] = 'd';
   20500            3 :           name[5] = '2';
   20501              :         }
   20502            0 :       else if (el_mode == SFmode && n == 4)
   20503              :         {
   20504            0 :           name[4] = 's';
   20505            0 :           name[5] = '4';
   20506              :         }
   20507              :       else
   20508              :         return NULL_TREE;
   20509            3 :       break;
   20510              : 
   20511              :     default:
   20512              :       return NULL_TREE;
   20513              :     }
   20514              : 
   20515            3 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20516              :                                  ? double_type_node : float_type_node, fn);
   20517            3 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20518            3 :   sprintf (name + 7, "%s", bname+10);
   20519              : 
   20520            3 :   arity = 0;
   20521            3 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20522            0 :     arity++;
   20523              : 
   20524            3 :   if (arity == 1)
   20525            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20526              :   else
   20527            3 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20528              : 
   20529              :   /* Build a function declaration for the vectorized function.  */
   20530            3 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20531              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20532            3 :   TREE_PUBLIC (new_fndecl) = 1;
   20533            3 :   DECL_EXTERNAL (new_fndecl) = 1;
   20534            3 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20535            3 :   TREE_READONLY (new_fndecl) = 1;
   20536              : 
   20537            3 :   return new_fndecl;
   20538              : }
   20539              : 
   20540              : /* Handler for an AOCL-LibM-style interface to
   20541              :    a library with vectorized intrinsics.  */
   20542              : 
   20543              : tree
   20544          386 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
   20545              : {
   20546          386 :   char name[20] = "amd_vr";
   20547          386 :   int name_len = 6;
   20548          386 :   tree fntype, new_fndecl, args;
   20549          386 :   unsigned arity;
   20550          386 :   const char *bname;
   20551          386 :   machine_mode el_mode, in_mode;
   20552          386 :   int n, in_n;
   20553              : 
   20554              :   /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
   20555              :      as it trades off some accuracy for increased performance.  */
   20556          386 :   if (!TARGET_64BIT
   20557          386 :       || !flag_unsafe_math_optimizations)
   20558              :     return NULL_TREE;
   20559              : 
   20560          386 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20561          386 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20562          386 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20563          386 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20564          386 :   if (el_mode != in_mode
   20565          386 :       || n != in_n)
   20566              :     return NULL_TREE;
   20567              : 
   20568          386 :   gcc_checking_assert (n > 0);
   20569              : 
   20570              :   /* Decide whether there exists a function for the combination of FN, the mode
   20571              :      and the vector width.  Return early if it doesn't.  */
   20572              : 
   20573          386 :   if (el_mode != DFmode && el_mode != SFmode)
   20574              :     return NULL_TREE;
   20575              : 
   20576              :   /* Supported vector widths for given FN and single/double precision.  Zeros
   20577              :      are used to fill out unused positions in the arrays.  */
   20578          386 :   static const int supported_n[][2][3] = {
   20579              :   /*   Single prec. ,  Double prec.  */
   20580              :     { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
   20581              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
   20582              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
   20583              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
   20584              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
   20585              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
   20586              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
   20587              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
   20588              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
   20589              :     { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
   20590              :     { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
   20591              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
   20592              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
   20593              :     { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
   20594              :     { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
   20595              :     { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
   20596              :     { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
   20597              :     { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
   20598              :   };
   20599              : 
   20600              :   /* We cannot simply index the supported_n array with FN since multiple FNs
   20601              :      may correspond to a single operation (see the definitions of these
   20602              :      CASE_CFN_* macros).  */
   20603          386 :   int i;
   20604          386 :   switch (fn)
   20605              :     {
   20606              :     CASE_CFN_TAN   :  i = 0; break;
   20607           28 :     CASE_CFN_EXP   :  i = 1; break;
   20608           28 :     CASE_CFN_EXP2  :  i = 2; break;
   20609           28 :     CASE_CFN_LOG   :  i = 3; break;
   20610           28 :     CASE_CFN_LOG2  :  i = 4; break;
   20611           28 :     CASE_CFN_COS   :  i = 5; break;
   20612           28 :     CASE_CFN_SIN   :  i = 6; break;
   20613           28 :     CASE_CFN_POW   :  i = 7; break;
   20614           28 :     CASE_CFN_ERF   :  i = 8; break;
   20615           25 :     CASE_CFN_ATAN  :  i = 9; break;
   20616           20 :     CASE_CFN_LOG10 : i = 10; break;
   20617           10 :     CASE_CFN_EXP10 : i = 11; break;
   20618           10 :     CASE_CFN_LOG1P : i = 12; break;
   20619           24 :     CASE_CFN_ASIN  : i = 13; break;
   20620           14 :     CASE_CFN_ACOS  : i = 14; break;
   20621           18 :     CASE_CFN_TANH  : i = 15; break;
   20622            9 :     CASE_CFN_EXPM1 : i = 16; break;
   20623           14 :     CASE_CFN_COSH  : i = 17; break;
   20624              :     default: return NULL_TREE;
   20625              :     }
   20626              : 
   20627          386 :   int j = el_mode == DFmode;
   20628          386 :   bool n_is_supported = false;
   20629          976 :   for (unsigned k = 0; k < 3; k++)
   20630          857 :     if (supported_n[i][j][k] == n)
   20631              :       {
   20632              :         n_is_supported = true;
   20633              :         break;
   20634              :       }
   20635          386 :   if (!n_is_supported)
   20636              :     return NULL_TREE;
   20637              : 
   20638              :   /* Append the precision and the vector width to the function name we are
   20639              :      constructing.  */
   20640          267 :   name[name_len++] = el_mode == DFmode ? 'd' : 's';
   20641          267 :   switch (n)
   20642              :     {
   20643          214 :       case 2:
   20644          214 :       case 4:
   20645          214 :       case 8:
   20646          214 :         name[name_len++] = '0' + n;
   20647          214 :         break;
   20648           53 :       case 16:
   20649           53 :         name[name_len++] = '1';
   20650           53 :         name[name_len++] = '6';
   20651           53 :         break;
   20652            0 :       default:
   20653            0 :         gcc_unreachable ();
   20654              :     }
   20655          267 :   name[name_len++] = '_';
   20656              : 
   20657              :   /* Append the operation name (steal it from the name of a builtin).  */
   20658          267 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20659              :                                  ? double_type_node : float_type_node, fn);
   20660          267 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20661          267 :   sprintf (name + name_len, "%s", bname + 10);
   20662              : 
   20663          267 :   arity = 0;
   20664          267 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20665            0 :     arity++;
   20666              : 
   20667          267 :   if (arity == 1)
   20668            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20669              :   else
   20670          267 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20671              : 
   20672              :   /* Build a function declaration for the vectorized function.  */
   20673          267 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20674              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20675          267 :   TREE_PUBLIC (new_fndecl) = 1;
   20676          267 :   DECL_EXTERNAL (new_fndecl) = 1;
   20677          267 :   TREE_READONLY (new_fndecl) = 1;
   20678              : 
   20679          267 :   return new_fndecl;
   20680              : }
   20681              : 
   20682              : /* Returns a decl of a function that implements scatter store with
   20683              :    register type VECTYPE and index type INDEX_TYPE and SCALE.
   20684              :    Return NULL_TREE if it is not available.  */
   20685              : 
   20686              : static tree
   20687       130448 : ix86_vectorize_builtin_scatter (const_tree vectype,
   20688              :                                 const_tree index_type, int scale)
   20689              : {
   20690       130448 :   bool si;
   20691       130448 :   enum ix86_builtins code;
   20692              : 
   20693       130448 :   if (!TARGET_AVX512F)
   20694              :     return NULL_TREE;
   20695              : 
   20696         4215 :   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
   20697         7389 :       ? !TARGET_USE_SCATTER_2PARTS
   20698         7389 :       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
   20699         3174 :          ? !TARGET_USE_SCATTER_4PARTS
   20700         2057 :          : !TARGET_USE_SCATTER_8PARTS))
   20701              :     return NULL_TREE;
   20702              : 
   20703         4215 :   if ((TREE_CODE (index_type) != INTEGER_TYPE
   20704          463 :        && !POINTER_TYPE_P (index_type))
   20705         4678 :       || (TYPE_MODE (index_type) != SImode
   20706         1783 :           && TYPE_MODE (index_type) != DImode))
   20707            0 :     return NULL_TREE;
   20708              : 
   20709         4445 :   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
   20710              :     return NULL_TREE;
   20711              : 
   20712              :   /* v*scatter* insn sign extends index to pointer mode.  */
   20713         4215 :   if (TYPE_PRECISION (index_type) < POINTER_SIZE
   20714         4215 :       && TYPE_UNSIGNED (index_type))
   20715              :     return NULL_TREE;
   20716              : 
   20717              :   /* Scale can be 1, 2, 4 or 8.  */
   20718         4215 :   if (scale <= 0
   20719         4215 :       || scale > 8
   20720         4199 :       || (scale & (scale - 1)) != 0)
   20721              :     return NULL_TREE;
   20722              : 
   20723         4199 :   si = TYPE_MODE (index_type) == SImode;
   20724         4199 :   switch (TYPE_MODE (vectype))
   20725              :     {
   20726          169 :     case E_V8DFmode:
   20727          169 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
   20728              :       break;
   20729          104 :     case E_V8DImode:
   20730          104 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
   20731              :       break;
   20732          177 :     case E_V16SFmode:
   20733          177 :       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
   20734              :       break;
   20735          257 :     case E_V16SImode:
   20736          257 :       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
   20737              :       break;
   20738          206 :     case E_V4DFmode:
   20739          206 :       if (TARGET_AVX512VL)
   20740           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
   20741              :       else
   20742              :         return NULL_TREE;
   20743              :       break;
   20744          142 :     case E_V4DImode:
   20745          142 :       if (TARGET_AVX512VL)
   20746           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
   20747              :       else
   20748              :         return NULL_TREE;
   20749              :       break;
   20750          248 :     case E_V8SFmode:
   20751          248 :       if (TARGET_AVX512VL)
   20752           40 :         code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
   20753              :       else
   20754              :         return NULL_TREE;
   20755              :       break;
   20756          268 :     case E_V8SImode:
   20757          268 :       if (TARGET_AVX512VL)
   20758           82 :         code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
   20759              :       else
   20760              :         return NULL_TREE;
   20761              :       break;
   20762          254 :     case E_V2DFmode:
   20763          254 :       if (TARGET_AVX512VL)
   20764           94 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
   20765              :       else
   20766              :         return NULL_TREE;
   20767              :       break;
   20768          196 :     case E_V2DImode:
   20769          196 :       if (TARGET_AVX512VL)
   20770           94 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
   20771              :       else
   20772              :         return NULL_TREE;
   20773              :       break;
   20774          301 :     case E_V4SFmode:
   20775          301 :       if (TARGET_AVX512VL)
   20776           96 :         code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
   20777              :       else
   20778              :         return NULL_TREE;
   20779              :       break;
   20780          324 :     case E_V4SImode:
   20781          324 :       if (TARGET_AVX512VL)
   20782          138 :         code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
   20783              :       else
   20784              :         return NULL_TREE;
   20785              :       break;
   20786              :     default:
   20787              :       return NULL_TREE;
   20788              :     }
   20789              : 
   20790         1319 :   return get_ix86_builtin (code);
   20791              : }
   20792              : 
   20793              : /* Return true if it is safe to use the rsqrt optabs to optimize
   20794              :    1.0/sqrt.  */
   20795              : 
   20796              : static bool
   20797           66 : use_rsqrt_p (machine_mode mode)
   20798              : {
   20799           66 :   return ((mode == HFmode
   20800           42 :            || (TARGET_SSE && TARGET_SSE_MATH))
   20801           66 :           && flag_finite_math_only
   20802           65 :           && !flag_trapping_math
   20803          119 :           && flag_unsafe_math_optimizations);
   20804              : }
   20805              : 
   20806              : /* Helper for avx_vpermilps256_operand et al.  This is also used by
   20807              :    the expansion functions to turn the parallel back into a mask.
   20808              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20809              : 
   20810              : int
   20811        63524 : avx_vpermilp_parallel (rtx par, machine_mode mode)
   20812              : {
   20813        63524 :   unsigned i, nelt = GET_MODE_NUNITS (mode);
   20814        63524 :   unsigned mask = 0;
   20815        63524 :   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
   20816              : 
   20817        63524 :   if (XVECLEN (par, 0) != (int) nelt)
   20818              :     return 0;
   20819              : 
   20820              :   /* Validate that all of the elements are constants, and not totally
   20821              :      out of range.  Copy the data into an integral array to make the
   20822              :      subsequent checks easier.  */
   20823       311086 :   for (i = 0; i < nelt; ++i)
   20824              :     {
   20825       247562 :       rtx er = XVECEXP (par, 0, i);
   20826       247562 :       unsigned HOST_WIDE_INT ei;
   20827              : 
   20828       247562 :       if (!CONST_INT_P (er))
   20829              :         return 0;
   20830       247562 :       ei = INTVAL (er);
   20831       247562 :       if (ei >= nelt)
   20832              :         return 0;
   20833       247562 :       ipar[i] = ei;
   20834              :     }
   20835              : 
   20836        63524 :   switch (mode)
   20837              :     {
   20838              :     case E_V8DFmode:
   20839              :     case E_V8DImode:
   20840              :       /* In the 512-bit DFmode case, we can only move elements within
   20841              :          a 128-bit lane.  First fill the second part of the mask,
   20842              :          then fallthru.  */
   20843         4933 :       for (i = 4; i < 6; ++i)
   20844              :         {
   20845         3419 :           if (!IN_RANGE (ipar[i], 4, 5))
   20846              :             return 0;
   20847         3194 :           mask |= (ipar[i] - 4) << i;
   20848              :         }
   20849         3690 :       for (i = 6; i < 8; ++i)
   20850              :         {
   20851         2602 :           if (!IN_RANGE (ipar[i], 6, 7))
   20852              :             return 0;
   20853         2176 :           mask |= (ipar[i] - 6) << i;
   20854              :         }
   20855              :       /* FALLTHRU */
   20856              : 
   20857              :     case E_V4DFmode:
   20858              :     case E_V4DImode:
   20859              :       /* In the 256-bit DFmode case, we can only move elements within
   20860              :          a 128-bit lane.  */
   20861        44649 :       for (i = 0; i < 2; ++i)
   20862              :         {
   20863        37299 :           if (!IN_RANGE (ipar[i], 0, 1))
   20864              :             return 0;
   20865        25156 :           mask |= ipar[i] << i;
   20866              :         }
   20867        19352 :       for (i = 2; i < 4; ++i)
   20868              :         {
   20869        13356 :           if (!IN_RANGE (ipar[i], 2, 3))
   20870              :             return 0;
   20871        12002 :           mask |= (ipar[i] - 2) << i;
   20872              :         }
   20873              :       break;
   20874              : 
   20875              :     case E_V16SFmode:
   20876              :     case E_V16SImode:
   20877              :       /* In 512 bit SFmode case, permutation in the upper 256 bits
   20878              :          must mirror the permutation in the lower 256-bits.  */
   20879         4326 :       for (i = 0; i < 8; ++i)
   20880         3854 :         if (ipar[i] + 8 != ipar[i + 8])
   20881              :           return 0;
   20882              :       /* FALLTHRU */
   20883              : 
   20884              :     case E_V8SFmode:
   20885              :     case E_V8SImode:
   20886              :       /* In 256 bit SFmode case, we have full freedom of
   20887              :          movement within the low 128-bit lane, but the high 128-bit
   20888              :          lane must mirror the exact same pattern.  */
   20889        35630 :       for (i = 0; i < 4; ++i)
   20890        30149 :         if (ipar[i] + 4 != ipar[i + 4])
   20891              :           return 0;
   20892              :       nelt = 4;
   20893              :       /* FALLTHRU */
   20894              : 
   20895        38663 :     case E_V2DFmode:
   20896        38663 :     case E_V2DImode:
   20897        38663 :     case E_V4SFmode:
   20898        38663 :     case E_V4SImode:
   20899              :       /* In the 128-bit case, we've full freedom in the placement of
   20900              :          the elements from the source operand.  */
   20901       134649 :       for (i = 0; i < nelt; ++i)
   20902        95986 :         mask |= ipar[i] << (i * (nelt / 2));
   20903              :       break;
   20904              : 
   20905            0 :     default:
   20906            0 :       gcc_unreachable ();
   20907              :     }
   20908              : 
   20909              :   /* Make sure success has a non-zero value by adding one.  */
   20910        44659 :   return mask + 1;
   20911              : }
   20912              : 
   20913              : /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
   20914              :    the expansion functions to turn the parallel back into a mask.
   20915              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20916              : 
   20917              : int
   20918        42146 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
   20919              : {
   20920        42146 :   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
   20921        42146 :   unsigned mask = 0;
   20922        42146 :   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
   20923              : 
   20924        42146 :   if (XVECLEN (par, 0) != (int) nelt)
   20925              :     return 0;
   20926              : 
   20927              :   /* Validate that all of the elements are constants, and not totally
   20928              :      out of range.  Copy the data into an integral array to make the
   20929              :      subsequent checks easier.  */
   20930       342298 :   for (i = 0; i < nelt; ++i)
   20931              :     {
   20932       300152 :       rtx er = XVECEXP (par, 0, i);
   20933       300152 :       unsigned HOST_WIDE_INT ei;
   20934              : 
   20935       300152 :       if (!CONST_INT_P (er))
   20936              :         return 0;
   20937       300152 :       ei = INTVAL (er);
   20938       300152 :       if (ei >= 2 * nelt)
   20939              :         return 0;
   20940       300152 :       ipar[i] = ei;
   20941              :     }
   20942              : 
   20943              :   /* Validate that the halves of the permute are halves.  */
   20944        81279 :   for (i = 0; i < nelt2 - 1; ++i)
   20945        65814 :     if (ipar[i] + 1 != ipar[i + 1])
   20946              :       return 0;
   20947        49614 :   for (i = nelt2; i < nelt - 1; ++i)
   20948        34791 :     if (ipar[i] + 1 != ipar[i + 1])
   20949              :       return 0;
   20950              : 
   20951              :   /* Reconstruct the mask.  */
   20952        44373 :   for (i = 0; i < 2; ++i)
   20953              :     {
   20954        29600 :       unsigned e = ipar[i * nelt2];
   20955        29600 :       if (e % nelt2)
   20956              :         return 0;
   20957        29550 :       e /= nelt2;
   20958        29550 :       mask |= e << (i * 4);
   20959              :     }
   20960              : 
   20961              :   /* Make sure success has a non-zero value by adding one.  */
   20962        14773 :   return mask + 1;
   20963              : }
   20964              : 
   20965              : /* Return a mask of VPTERNLOG operands that do not affect output.  */
   20966              : 
   20967              : int
   20968         2431 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
   20969              : {
   20970         2431 :   int mask = 0;
   20971         2431 :   int imm8 = INTVAL (pternlog_imm);
   20972              : 
   20973         2431 :   if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
   20974            6 :     mask |= 1;
   20975         2431 :   if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
   20976            6 :     mask |= 2;
   20977         2431 :   if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
   20978          155 :     mask |= 4;
   20979              : 
   20980         2431 :   return mask;
   20981              : }
   20982              : 
   20983              : /* Eliminate false dependencies on operands that do not affect output
   20984              :    by substituting other operands of a VPTERNLOG.  */
   20985              : 
   20986              : void
   20987           81 : substitute_vpternlog_operands (rtx *operands)
   20988              : {
   20989           81 :   int mask = vpternlog_redundant_operand_mask (operands[4]);
   20990              : 
   20991           81 :   if (mask & 1) /* The first operand is redundant.  */
   20992            2 :     operands[1] = operands[2];
   20993              : 
   20994           81 :   if (mask & 2) /* The second operand is redundant.  */
   20995            2 :     operands[2] = operands[1];
   20996              : 
   20997           81 :   if (mask & 4) /* The third operand is redundant.  */
   20998           77 :     operands[3] = operands[1];
   20999            4 :   else if (REG_P (operands[3]))
   21000              :     {
   21001            0 :       if (mask & 1)
   21002            0 :         operands[1] = operands[3];
   21003            0 :       if (mask & 2)
   21004            0 :         operands[2] = operands[3];
   21005              :     }
   21006           81 : }
   21007              : 
   21008              : /* Return a register priority for hard reg REGNO.  */
   21009              : static int
   21010     58162992 : ix86_register_priority (int hard_regno)
   21011              : {
   21012              :   /* ebp and r13 as the base always wants a displacement, r12 as the
   21013              :      base always wants an index.  So discourage their usage in an
   21014              :      address.  */
   21015     58162992 :   if (hard_regno == R12_REG || hard_regno == R13_REG)
   21016              :     return 0;
   21017     53689699 :   if (hard_regno == BP_REG)
   21018              :     return 1;
   21019              :   /* New x86-64 int registers result in bigger code size.  Discourage them.  */
   21020     51720706 :   if (REX_INT_REGNO_P (hard_regno))
   21021              :     return 2;
   21022     35240588 :   if (REX2_INT_REGNO_P (hard_regno))
   21023              :     return 2;
   21024              :   /* New x86-64 SSE registers result in bigger code size.  Discourage them.  */
   21025     35238148 :   if (REX_SSE_REGNO_P (hard_regno))
   21026              :     return 2;
   21027     29120967 :   if (EXT_REX_SSE_REGNO_P (hard_regno))
   21028              :     return 1;
   21029              :   /* Usage of AX register results in smaller code.  Prefer it.  */
   21030     28844284 :   if (hard_regno == AX_REG)
   21031      3796141 :     return 4;
   21032              :   return 3;
   21033              : }
   21034              : 
   21035              : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
   21036              : 
   21037              :    Put float CONST_DOUBLE in the constant pool instead of fp regs.
   21038              :    QImode must go into class Q_REGS.
   21039              :    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
   21040              :    movdf to do mem-to-mem moves through integer regs.  */
   21041              : 
   21042              : static reg_class_t
   21043    547405141 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
   21044              : {
   21045    547405141 :   machine_mode mode = GET_MODE (x);
   21046              : 
   21047              :   /* We're only allowed to return a subclass of CLASS.  Many of the
   21048              :      following checks fail for NO_REGS, so eliminate that early.  */
   21049    547405141 :   if (regclass == NO_REGS)
   21050              :     return NO_REGS;
   21051              : 
   21052              :   /* All classes can load zeros.  */
   21053    546548470 :   if (x == CONST0_RTX (mode))
   21054              :     return regclass;
   21055              : 
   21056              :   /* Force constants into memory if we are loading a (nonzero) constant into
   21057              :      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
   21058              :      instructions to load from a constant.  */
   21059    521652666 :   if (CONSTANT_P (x)
   21060    521652666 :       && (MAYBE_MMX_CLASS_P (regclass)
   21061    152120033 :           || MAYBE_SSE_CLASS_P (regclass)
   21062    122083034 :           || MAYBE_MASK_CLASS_P (regclass)))
   21063     30169108 :     return NO_REGS;
   21064              : 
   21065              :   /* Floating-point constants need more complex checks.  */
   21066    491483558 :   if (CONST_DOUBLE_P (x))
   21067              :     {
   21068              :       /* General regs can load everything.  */
   21069       303387 :       if (INTEGER_CLASS_P (regclass))
   21070              :         return regclass;
   21071              : 
   21072              :       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
   21073              :          zero above.  We only want to wind up preferring 80387 registers if
   21074              :          we plan on doing computation with them.  */
   21075       179712 :       if (IS_STACK_MODE (mode)
   21076       238096 :           && standard_80387_constant_p (x) > 0)
   21077              :         {
   21078              :           /* Limit class to FP regs.  */
   21079        40492 :           if (FLOAT_CLASS_P (regclass))
   21080              :             return FLOAT_REGS;
   21081              :         }
   21082              : 
   21083       139220 :       return NO_REGS;
   21084              :     }
   21085              : 
   21086              :   /* Prefer SSE if we can use them for math.  Also allow integer regs
   21087              :      when moves between register units are cheap.  */
   21088    491180171 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21089              :     {
   21090     31132731 :       if (TARGET_INTER_UNIT_MOVES_FROM_VEC
   21091     31117818 :           && TARGET_INTER_UNIT_MOVES_TO_VEC
   21092     93358903 :           && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
   21093     30974502 :         return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21094              :       else
   21095       158229 :         return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21096              :     }
   21097              : 
   21098              :   /* Generally when we see PLUS here, it's the function invariant
   21099              :      (plus soft-fp const_int).  Which can only be computed into general
   21100              :      regs.  */
   21101    460047440 :   if (GET_CODE (x) == PLUS)
   21102      1895625 :     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
   21103              : 
   21104              :   /* QImode constants are easy to load, but non-constant QImode data
   21105              :      must go into Q_REGS or ALL_MASK_REGS.  */
   21106    458151815 :   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
   21107              :     {
   21108     24685449 :       if (Q_CLASS_P (regclass))
   21109              :         return regclass;
   21110     19949216 :       else if (reg_class_subset_p (Q_REGS, regclass))
   21111              :         return Q_REGS;
   21112        55325 :       else if (MASK_CLASS_P (regclass))
   21113              :         return regclass;
   21114              :       else
   21115              :         return NO_REGS;
   21116              :     }
   21117              : 
   21118              :   return regclass;
   21119              : }
   21120              : 
   21121              : /* Discourage putting floating-point values in SSE registers unless
   21122              :    SSE math is being used, and likewise for the 387 registers.  */
   21123              : static reg_class_t
   21124     74480253 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
   21125              : {
   21126              :   /* Restrict the output reload class to the register bank that we are doing
   21127              :      math on.  If we would like not to return a subset of CLASS, reject this
   21128              :      alternative: if reload cannot do this, it will still use its choice.  */
   21129     74480253 :   machine_mode mode = GET_MODE (x);
   21130     74480253 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21131      7229401 :     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
   21132              : 
   21133     67250852 :   if (IS_STACK_MODE (mode))
   21134       207106 :     return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
   21135              : 
   21136              :   return regclass;
   21137              : }
   21138              : 
   21139              : static reg_class_t
   21140    385987734 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   21141              :                        machine_mode mode, secondary_reload_info *sri)
   21142              : {
   21143              :   /* Double-word spills from general registers to non-offsettable memory
   21144              :      references (zero-extended addresses) require special handling.  */
   21145    385987734 :   if (TARGET_64BIT
   21146    333024663 :       && MEM_P (x)
   21147    180891141 :       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
   21148     18952538 :       && INTEGER_CLASS_P (rclass)
   21149    388698330 :       && !offsettable_memref_p (x))
   21150              :     {
   21151      2459670 :       sri->icode = (in_p
   21152      1229835 :                     ? CODE_FOR_reload_noff_load
   21153              :                     : CODE_FOR_reload_noff_store);
   21154              :       /* Add the cost of moving address to a temporary.  */
   21155      1229835 :       sri->extra_cost = 1;
   21156              : 
   21157      1229835 :       return NO_REGS;
   21158              :     }
   21159              : 
   21160              :   /* QImode spills from non-QI registers require
   21161              :      intermediate register on 32bit targets.  */
   21162    384757899 :   if (mode == QImode
   21163    384757899 :       && ((!TARGET_64BIT && !in_p
   21164       585977 :            && INTEGER_CLASS_P (rclass)
   21165       585937 :            && MAYBE_NON_Q_CLASS_P (rclass))
   21166     22174181 :           || (!TARGET_AVX512DQ
   21167     21975316 :               && MAYBE_MASK_CLASS_P (rclass))))
   21168              :     {
   21169         6518 :       int regno = true_regnum (x);
   21170              : 
   21171              :       /* Return Q_REGS if the operand is in memory.  */
   21172         6518 :       if (regno == -1)
   21173              :         return Q_REGS;
   21174              : 
   21175              :       return NO_REGS;
   21176              :     }
   21177              : 
   21178              :   /* Require movement to gpr, and then store to memory.  */
   21179    384751381 :   if ((mode == HFmode || mode == HImode || mode == V2QImode
   21180              :        || mode == BFmode)
   21181      3990267 :       && !TARGET_SSE4_1
   21182      3397744 :       && SSE_CLASS_P (rclass)
   21183       273415 :       && !in_p && MEM_P (x))
   21184              :     {
   21185       168976 :       sri->extra_cost = 1;
   21186       168976 :       return GENERAL_REGS;
   21187              :     }
   21188              : 
   21189              :   /* This condition handles corner case where an expression involving
   21190              :      pointers gets vectorized.  We're trying to use the address of a
   21191              :      stack slot as a vector initializer.
   21192              : 
   21193              :      (set (reg:V2DI 74 [ vect_cst_.2 ])
   21194              :           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
   21195              : 
   21196              :      Eventually frame gets turned into sp+offset like this:
   21197              : 
   21198              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21199              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21200              :                                        (const_int 392 [0x188]))))
   21201              : 
   21202              :      That later gets turned into:
   21203              : 
   21204              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21205              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21206              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
   21207              : 
   21208              :      We'll have the following reload recorded:
   21209              : 
   21210              :      Reload 0: reload_in (DI) =
   21211              :            (plus:DI (reg/f:DI 7 sp)
   21212              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
   21213              :      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21214              :      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
   21215              :      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
   21216              :      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21217              :      reload_reg_rtx: (reg:V2DI 22 xmm1)
   21218              : 
   21219              :      Which isn't going to work since SSE instructions can't handle scalar
   21220              :      additions.  Returning GENERAL_REGS forces the addition into integer
   21221              :      register and reload can handle subsequent reloads without problems.  */
   21222              : 
   21223    221276087 :   if (in_p && GET_CODE (x) == PLUS
   21224            2 :       && SSE_CLASS_P (rclass)
   21225    384582405 :       && SCALAR_INT_MODE_P (mode))
   21226              :     return GENERAL_REGS;
   21227              : 
   21228              :   return NO_REGS;
   21229              : }
   21230              : 
   21231              : /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
   21232              : 
   21233              : static bool
   21234    716915843 : ix86_class_likely_spilled_p (reg_class_t rclass)
   21235              : {
   21236    706876065 :   switch (rclass)
   21237              :     {
   21238              :       case AREG:
   21239              :       case DREG:
   21240              :       case CREG:
   21241              :       case BREG:
   21242              :       case AD_REGS:
   21243              :       case SIREG:
   21244              :       case DIREG:
   21245              :       case SSE_FIRST_REG:
   21246              :       case FP_TOP_REG:
   21247              :       case FP_SECOND_REG:
   21248              :         return true;
   21249              : 
   21250    685336582 :       default:
   21251    685336582 :         break;
   21252              :     }
   21253              : 
   21254    685336582 :   return false;
   21255              : }
   21256              : 
   21257              : /* Implement TARGET_CALLEE_SAVE_COST.  */
   21258              : 
   21259              : static int
   21260     81881934 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
   21261              :                        unsigned int, int mem_cost, const HARD_REG_SET &, bool)
   21262              : {
   21263              :   /* Account for the fact that push and pop are shorter and do their
   21264              :      own allocation and deallocation.  */
   21265     81881934 :   if (GENERAL_REGNO_P (hard_regno))
   21266              :     {
   21267              :       /* push is 1 byte while typical spill is 4-5 bytes.
   21268              :          ??? We probably should adjust size costs accordingly.
   21269              :          Costs are relative to reg-reg move that has 2 bytes for 32bit
   21270              :          and 3 bytes otherwise.  Be sure that no cost table sets cost
   21271              :          to 2, so we end up with 0.  */
   21272     81872076 :       if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
   21273      3572286 :         return 1;
   21274     78299790 :       return mem_cost - 2;
   21275              :     }
   21276              :   return mem_cost;
   21277              : }
   21278              : 
   21279              : /* Return true if a set of DST by the expression SRC should be allowed.
   21280              :    This prevents complex sets of likely_spilled hard regs before split1.  */
   21281              : 
   21282              : bool
   21283    631686272 : ix86_hardreg_mov_ok (rtx dst, rtx src)
   21284              : {
   21285              :   /* Avoid complex sets of likely_spilled hard registers before reload.  */
   21286    513538201 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   21287    304399638 :       && !REG_P (src) && !MEM_P (src)
   21288     95438421 :       && !(VECTOR_MODE_P (GET_MODE (dst))
   21289     95438421 :            ? standard_sse_constant_p (src, GET_MODE (dst))
   21290     47483673 :            : x86_64_immediate_operand (src, GET_MODE (dst)))
   21291     10039778 :       && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
   21292    640495352 :       && ix86_pre_reload_split ())
   21293              :     return false;
   21294              :   return true;
   21295              : }
   21296              : 
   21297              : /* If we are copying between registers from different register sets
   21298              :    (e.g. FP and integer), we may need a memory location.
   21299              : 
   21300              :    The function can't work reliably when one of the CLASSES is a class
   21301              :    containing registers from multiple sets.  We avoid this by never combining
   21302              :    different sets in a single alternative in the machine description.
   21303              :    Ensure that this constraint holds to avoid unexpected surprises.
   21304              : 
   21305              :    When STRICT is false, we are being called from REGISTER_MOVE_COST,
   21306              :    so do not enforce these sanity checks.
   21307              : 
   21308              :    To optimize register_move_cost performance, define inline variant.  */
   21309              : 
   21310              : static inline bool
   21311   5681960011 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21312              :                                 reg_class_t class2, int strict)
   21313              : {
   21314   5681960011 :   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
   21315              :     return false;
   21316              : 
   21317   5649635239 :   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
   21318   4814465395 :       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
   21319   4111293083 :       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
   21320   3922602603 :       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
   21321   3744032940 :       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
   21322   3744032940 :       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
   21323   3744032940 :       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
   21324   9223504441 :       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
   21325              :     {
   21326   2237913128 :       gcc_assert (!strict || lra_in_progress);
   21327              :       return true;
   21328              :     }
   21329              : 
   21330   3411722111 :   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
   21331              :     return true;
   21332              : 
   21333              :   /* ??? This is a lie.  We do have moves between mmx/general, and for
   21334              :      mmx/sse2.  But by saying we need secondary memory we discourage the
   21335              :      register allocator from using the mmx registers unless needed.  */
   21336   3262553528 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21337              :     return true;
   21338              : 
   21339              :   /* Between mask and general, we have moves no larger than word size.  */
   21340   3166237288 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21341              :     {
   21342      2605806 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
   21343      3406186 :           || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   21344       192451 :         return true;
   21345              :     }
   21346              : 
   21347   3166044837 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21348              :     {
   21349              :       /* SSE1 doesn't have any direct moves from other classes.  */
   21350    687844117 :       if (!TARGET_SSE2)
   21351              :         return true;
   21352              : 
   21353    685180385 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
   21354              :         return true;
   21355              : 
   21356              :       /* If the target says that inter-unit moves are more expensive
   21357              :          than moving through memory, then don't generate them.  */
   21358   1027304176 :       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
   21359   1026818332 :           || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
   21360      1321033 :         return true;
   21361              : 
   21362              :       /* With SSE4.1, *mov{ti,di}_internal supports moves between
   21363              :          SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}.  */
   21364    683859352 :       if (TARGET_SSE4_1
   21365     36716709 :           && (TARGET_64BIT ? mode == TImode : mode == DImode))
   21366              :         return false;
   21367              : 
   21368    682268072 :       int msize = GET_MODE_SIZE (mode);
   21369              : 
   21370              :       /* Between SSE and general, we have moves no larger than word size.  */
   21371    698632701 :       if (msize > UNITS_PER_WORD)
   21372              :         return true;
   21373              : 
   21374              :       /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
   21375              :          Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
   21376    590174629 :       int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
   21377              : 
   21378    590174629 :       if (msize < minsize)
   21379              :         return true;
   21380              :     }
   21381              : 
   21382              :   return false;
   21383              : }
   21384              : 
   21385              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
   21386              : 
   21387              : static bool
   21388     71088308 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21389              :                               reg_class_t class2)
   21390              : {
   21391     71088308 :   return inline_secondary_memory_needed (mode, class1, class2, true);
   21392              : }
   21393              : 
   21394              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
   21395              : 
   21396              :    get_secondary_mem widens integral modes to BITS_PER_WORD.
   21397              :    There is no need to emit full 64 bit move on 64 bit targets
   21398              :    for integral modes that can be moved using 32 bit move.  */
   21399              : 
   21400              : static machine_mode
   21401        13069 : ix86_secondary_memory_needed_mode (machine_mode mode)
   21402              : {
   21403        26138 :   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
   21404           19 :     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
   21405              :   return mode;
   21406              : }
   21407              : 
   21408              : /* Implement the TARGET_CLASS_MAX_NREGS hook.
   21409              : 
   21410              :    On the 80386, this is the size of MODE in words,
   21411              :    except in the FP regs, where a single reg is always enough.  */
   21412              : 
   21413              : static unsigned char
   21414   5958990220 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
   21415              : {
   21416   5958990220 :   if (MAYBE_INTEGER_CLASS_P (rclass))
   21417              :     {
   21418   4008509247 :       if (mode == XFmode)
   21419    145938113 :         return (TARGET_64BIT ? 2 : 3);
   21420   3862571134 :       else if (mode == XCmode)
   21421    145937744 :         return (TARGET_64BIT ? 4 : 6);
   21422              :       else
   21423   7539180929 :         return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21424              :     }
   21425              :   else
   21426              :     {
   21427   1950480973 :       if (COMPLEX_MODE_P (mode))
   21428              :         return 2;
   21429              :       else
   21430   1665885734 :         return 1;
   21431              :     }
   21432              : }
   21433              : 
   21434              : /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
   21435              : 
   21436              : static bool
   21437     40219858 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
   21438              :                             reg_class_t regclass)
   21439              : {
   21440     40219858 :   if (from == to)
   21441              :     return true;
   21442              : 
   21443              :   /* x87 registers can't do subreg at all, as all values are reformatted
   21444              :      to extended precision.
   21445              : 
   21446              :      ??? middle-end queries mode changes for ALL_REGS and this makes
   21447              :      vec_series_lowpart_p to always return false.  We probably should
   21448              :      restrict this to modes supported by i387 and check if it is enabled.  */
   21449     38818955 :   if (MAYBE_FLOAT_CLASS_P (regclass))
   21450              :     return false;
   21451              : 
   21452     34209572 :   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
   21453              :     {
   21454              :       /* Vector registers do not support QI or HImode loads.  If we don't
   21455              :          disallow a change to these modes, reload will assume it's ok to
   21456              :          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
   21457              :          the vec_dupv4hi pattern.
   21458              :          NB: SSE2 can load 16bit data to sse register via pinsrw.  */
   21459     16494647 :       int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
   21460     16494647 :       if (GET_MODE_SIZE (from) < mov_size
   21461     32988982 :           || GET_MODE_SIZE (to) < mov_size)
   21462              :         return false;
   21463              :     }
   21464              : 
   21465              :   return true;
   21466              : }
   21467              : 
   21468              : /* Return index of MODE in the sse load/store tables.  */
   21469              : 
   21470              : static inline int
   21471    773939717 : sse_store_index (machine_mode mode)
   21472              : {
   21473              :   /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
   21474              :      costs to processor_costs, which requires changes to all entries in
   21475              :      processor cost table.  */
   21476    773939717 :   if (mode == E_HFmode)
   21477    137133734 :     mode = E_SFmode;
   21478              : 
   21479   1547879434 :   switch (GET_MODE_SIZE (mode))
   21480              :     {
   21481              :     case 4:
   21482              :       return 0;
   21483              :     case 8:
   21484              :       return 1;
   21485              :     case 16:
   21486              :       return 2;
   21487              :     case 32:
   21488              :       return 3;
   21489              :     case 64:
   21490              :       return 4;
   21491              :     default:
   21492              :       return -1;
   21493              :     }
   21494              : }
   21495              : 
   21496              : /* Return the cost of moving data of mode M between a
   21497              :    register and memory.  A value of 2 is the default; this cost is
   21498              :    relative to those in `REGISTER_MOVE_COST'.
   21499              : 
   21500              :    This function is used extensively by register_move_cost that is used to
   21501              :    build tables at startup.  Make it inline in this case.
   21502              :    When IN is 2, return maximum of in and out move cost.
   21503              : 
   21504              :    If moving between registers and memory is more expensive than
   21505              :    between two registers, you should define this macro to express the
   21506              :    relative cost.
   21507              : 
   21508              :    Model also increased moving costs of QImode registers in non
   21509              :    Q_REGS classes.
   21510              :  */
   21511              : static inline int
   21512   6915399486 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
   21513              : {
   21514   6915399486 :   int cost;
   21515              : 
   21516   6915399486 :   if (FLOAT_CLASS_P (regclass))
   21517              :     {
   21518    353164850 :       int index;
   21519    353164850 :       switch (mode)
   21520              :         {
   21521              :           case E_SFmode:
   21522              :             index = 0;
   21523              :             break;
   21524              :           case E_DFmode:
   21525              :             index = 1;
   21526              :             break;
   21527              :           case E_XFmode:
   21528              :             index = 2;
   21529              :             break;
   21530              :           default:
   21531              :             return 100;
   21532              :         }
   21533    105567002 :       if (in == 2)
   21534    101627446 :         return MAX (ix86_cost->hard_register.fp_load [index],
   21535              :                     ix86_cost->hard_register.fp_store [index]);
   21536      3939556 :       return in ? ix86_cost->hard_register.fp_load [index]
   21537      3939556 :                 : ix86_cost->hard_register.fp_store [index];
   21538              :     }
   21539   6562234636 :   if (SSE_CLASS_P (regclass))
   21540              :     {
   21541    644867901 :       int index = sse_store_index (mode);
   21542    644867901 :       if (index == -1)
   21543              :         return 100;
   21544    560816394 :       if (in == 2)
   21545    397424637 :         return MAX (ix86_cost->hard_register.sse_load [index],
   21546              :                     ix86_cost->hard_register.sse_store [index]);
   21547    163391757 :       return in ? ix86_cost->hard_register.sse_load [index]
   21548    163391757 :                 : ix86_cost->hard_register.sse_store [index];
   21549              :     }
   21550   5917366735 :   if (MASK_CLASS_P (regclass))
   21551              :     {
   21552    108289175 :       int index;
   21553    216578350 :       switch (GET_MODE_SIZE (mode))
   21554              :         {
   21555              :         case 1:
   21556              :           index = 0;
   21557              :           break;
   21558      8934069 :         case 2:
   21559      8934069 :           index = 1;
   21560      8934069 :           break;
   21561              :         /* DImode loads and stores assumed to cost the same as SImode.  */
   21562     40187470 :         case 4:
   21563     40187470 :         case 8:
   21564     40187470 :           index = 2;
   21565     40187470 :           break;
   21566              :         default:
   21567              :           return 100;
   21568              :         }
   21569              : 
   21570     52699287 :       if (in == 2)
   21571       583719 :         return MAX (ix86_cost->hard_register.mask_load[index],
   21572              :                     ix86_cost->hard_register.mask_store[index]);
   21573     52115568 :       return in ? ix86_cost->hard_register.mask_load[2]
   21574     52115568 :                 : ix86_cost->hard_register.mask_store[2];
   21575              :     }
   21576   5809077560 :   if (MMX_CLASS_P (regclass))
   21577              :     {
   21578    172433024 :       int index;
   21579    344866048 :       switch (GET_MODE_SIZE (mode))
   21580              :         {
   21581              :           case 4:
   21582              :             index = 0;
   21583              :             break;
   21584    101199804 :           case 8:
   21585    101199804 :             index = 1;
   21586    101199804 :             break;
   21587              :           default:
   21588              :             return 100;
   21589              :         }
   21590    138557936 :       if (in == 2)
   21591    118580298 :         return MAX (ix86_cost->hard_register.mmx_load [index],
   21592              :                     ix86_cost->hard_register.mmx_store [index]);
   21593     19977638 :       return in ? ix86_cost->hard_register.mmx_load [index]
   21594     19977638 :                 : ix86_cost->hard_register.mmx_store [index];
   21595              :     }
   21596  11273289072 :   switch (GET_MODE_SIZE (mode))
   21597              :     {
   21598    124774864 :       case 1:
   21599    124774864 :         if (Q_CLASS_P (regclass) || TARGET_64BIT)
   21600              :           {
   21601    122147411 :             if (!in)
   21602     19594300 :               return ix86_cost->hard_register.int_store[0];
   21603    102553111 :             if (TARGET_PARTIAL_REG_DEPENDENCY
   21604    102553111 :                 && optimize_function_for_speed_p (cfun))
   21605     95628137 :               cost = ix86_cost->hard_register.movzbl_load;
   21606              :             else
   21607      6924974 :               cost = ix86_cost->hard_register.int_load[0];
   21608    102553111 :             if (in == 2)
   21609     82930390 :               return MAX (cost, ix86_cost->hard_register.int_store[0]);
   21610              :             return cost;
   21611              :           }
   21612              :         else
   21613              :           {
   21614      2627453 :            if (in == 2)
   21615      1860710 :              return MAX (ix86_cost->hard_register.movzbl_load,
   21616              :                          ix86_cost->hard_register.int_store[0] + 4);
   21617       766743 :            if (in)
   21618       383427 :              return ix86_cost->hard_register.movzbl_load;
   21619              :            else
   21620       383316 :              return ix86_cost->hard_register.int_store[0] + 4;
   21621              :           }
   21622    644077154 :         break;
   21623    644077154 :       case 2:
   21624    644077154 :         {
   21625    644077154 :           int cost;
   21626    644077154 :           if (in == 2)
   21627    544168225 :             cost = MAX (ix86_cost->hard_register.int_load[1],
   21628              :                         ix86_cost->hard_register.int_store[1]);
   21629              :           else
   21630     99908929 :             cost = in ? ix86_cost->hard_register.int_load[1]
   21631              :                       : ix86_cost->hard_register.int_store[1];
   21632              : 
   21633    644077154 :           if (mode == E_HFmode)
   21634              :             {
   21635              :               /* Prefer SSE over GPR for HFmode.  */
   21636    124807302 :               int sse_cost;
   21637    124807302 :               int index = sse_store_index (mode);
   21638    124807302 :               if (in == 2)
   21639    114818432 :                 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
   21640              :                                 ix86_cost->hard_register.sse_store[index]);
   21641              :               else
   21642     19977740 :                 sse_cost = (in
   21643      9988870 :                             ? ix86_cost->hard_register.sse_load [index]
   21644              :                             : ix86_cost->hard_register.sse_store [index]);
   21645    124807302 :               if (sse_cost >= cost)
   21646    124807302 :                 cost = sse_cost + 1;
   21647              :             }
   21648              :           return cost;
   21649              :         }
   21650   4867792518 :       default:
   21651   4867792518 :         if (in == 2)
   21652   3768645032 :           cost = MAX (ix86_cost->hard_register.int_load[2],
   21653              :                       ix86_cost->hard_register.int_store[2]);
   21654   1099147486 :         else if (in)
   21655    549763046 :           cost = ix86_cost->hard_register.int_load[2];
   21656              :         else
   21657    549384440 :           cost = ix86_cost->hard_register.int_store[2];
   21658              :         /* Multiply with the number of GPR moves needed.  */
   21659   9854481525 :         return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21660              :     }
   21661              : }
   21662              : 
   21663              : static int
   21664   1777785732 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
   21665              : {
   21666   2666355348 :   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
   21667              : }
   21668              : 
   21669              : 
   21670              : /* Return the cost of moving data from a register in class CLASS1 to
   21671              :    one in class CLASS2.
   21672              : 
   21673              :    It is not required that the cost always equal 2 when FROM is the same as TO;
   21674              :    on some machines it is expensive to move between registers if they are not
   21675              :    general registers.  */
   21676              : 
   21677              : static int
   21678   5610871703 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
   21679              :                          reg_class_t class2_i)
   21680              : {
   21681   5610871703 :   enum reg_class class1 = (enum reg_class) class1_i;
   21682   5610871703 :   enum reg_class class2 = (enum reg_class) class2_i;
   21683              : 
   21684              :   /* In case we require secondary memory, compute cost of the store followed
   21685              :      by load.  In order to avoid bad register allocation choices, we need
   21686              :      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
   21687              : 
   21688   5610871703 :   if (inline_secondary_memory_needed (mode, class1, class2, false))
   21689              :     {
   21690   2568806877 :       int cost = 1;
   21691              : 
   21692   2568806877 :       cost += inline_memory_move_cost (mode, class1, 2);
   21693   2568806877 :       cost += inline_memory_move_cost (mode, class2, 2);
   21694              : 
   21695              :       /* In case of copying from general_purpose_register we may emit multiple
   21696              :          stores followed by single load causing memory size mismatch stall.
   21697              :          Count this as arbitrarily high cost of 20.  */
   21698   5137613754 :       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
   21699    768981607 :           && TARGET_MEMORY_MISMATCH_STALL
   21700   4106770091 :           && targetm.class_max_nregs (class1, mode)
   21701    768981607 :              > targetm.class_max_nregs (class2, mode))
   21702    146314735 :         cost += 20;
   21703              : 
   21704              :       /* In the case of FP/MMX moves, the registers actually overlap, and we
   21705              :          have to switch modes in order to treat them differently.  */
   21706     59290185 :       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
   21707   2618759698 :           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
   21708     18674728 :         cost += 20;
   21709              : 
   21710   2568806877 :       return cost;
   21711              :     }
   21712              : 
   21713              :   /* Moves between MMX and non-MMX units require secondary memory.  */
   21714   3042064826 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21715            0 :     gcc_unreachable ();
   21716              : 
   21717   3042064826 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21718    582314087 :     return (SSE_CLASS_P (class1)
   21719    582314087 :             ? ix86_cost->hard_register.sse_to_integer
   21720    582314087 :             : ix86_cost->hard_register.integer_to_sse);
   21721              : 
   21722              :   /* Moves between mask register and GPR.  */
   21723   2459750739 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21724              :     {
   21725      1054174 :       return (MASK_CLASS_P (class1)
   21726      1054174 :               ? ix86_cost->hard_register.mask_to_integer
   21727      1054174 :               : ix86_cost->hard_register.integer_to_mask);
   21728              :     }
   21729              :   /* Moving between mask registers.  */
   21730   2458696565 :   if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
   21731       101240 :     return ix86_cost->hard_register.mask_move;
   21732              : 
   21733   2458595325 :   if (MAYBE_FLOAT_CLASS_P (class1))
   21734     11801615 :     return ix86_cost->hard_register.fp_move;
   21735   2446793710 :   if (MAYBE_SSE_CLASS_P (class1))
   21736              :     {
   21737    229659204 :       if (GET_MODE_BITSIZE (mode) <= 128)
   21738    112357278 :         return ix86_cost->hard_register.xmm_move;
   21739      4944648 :       if (GET_MODE_BITSIZE (mode) <= 256)
   21740      1571466 :         return ix86_cost->hard_register.ymm_move;
   21741       900858 :       return ix86_cost->hard_register.zmm_move;
   21742              :     }
   21743   2331964108 :   if (MAYBE_MMX_CLASS_P (class1))
   21744      2171535 :     return ix86_cost->hard_register.mmx_move;
   21745              :   return 2;
   21746              : }
   21747              : 
   21748              : /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
   21749              :    words of a value of mode MODE but can be less for certain modes in
   21750              :    special long registers.
   21751              : 
   21752              :    Actually there are no two word move instructions for consecutive
   21753              :    registers.  And only registers 0-3 may have mov byte instructions
   21754              :    applied to them.  */
   21755              : 
   21756              : static unsigned int
   21757   8877363360 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
   21758              : {
   21759   8877363360 :   if (GENERAL_REGNO_P (regno))
   21760              :     {
   21761   3087778560 :       if (mode == XFmode)
   21762     25379840 :         return TARGET_64BIT ? 2 : 3;
   21763   3062877120 :       if (mode == XCmode)
   21764     25379840 :         return TARGET_64BIT ? 4 : 6;
   21765   6134316160 :       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21766              :     }
   21767   5789584800 :   if (COMPLEX_MODE_P (mode))
   21768              :     return 2;
   21769              :   /* Register pair for mask registers.  */
   21770   5042541600 :   if (mode == P2QImode || mode == P2HImode)
   21771     93380400 :     return 2;
   21772              : 
   21773              :   return 1;
   21774              : }
   21775              : 
   21776              : /* Implement REGMODE_NATURAL_SIZE(MODE).  */
   21777              : unsigned int
   21778    110801320 : ix86_regmode_natural_size (machine_mode mode)
   21779              : {
   21780    110801320 :   if (mode == P2HImode || mode == P2QImode)
   21781         2462 :     return GET_MODE_SIZE (mode) / 2;
   21782    110800089 :   return UNITS_PER_WORD;
   21783              : }
   21784              : 
   21785              : /* Implement TARGET_HARD_REGNO_MODE_OK.  */
   21786              : 
   21787              : static bool
   21788  54241960668 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   21789              : {
   21790              :   /* Flags and only flags can only hold CCmode values.  */
   21791  54241960668 :   if (CC_REGNO_P (regno))
   21792    431911877 :     return GET_MODE_CLASS (mode) == MODE_CC;
   21793  53810048791 :   if (GET_MODE_CLASS (mode) == MODE_CC
   21794              :       || GET_MODE_CLASS (mode) == MODE_RANDOM)
   21795              :     return false;
   21796  48275973560 :   if (STACK_REGNO_P (regno))
   21797   4698471776 :     return VALID_FP_MODE_P (mode);
   21798  43577501784 :   if (MASK_REGNO_P (regno))
   21799              :     {
   21800              :       /* Register pair only starts at even register number.  */
   21801   3669861411 :       if ((mode == P2QImode || mode == P2HImode))
   21802     51199960 :         return MASK_PAIR_REGNO_P(regno);
   21803              : 
   21804   1001212275 :       return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
   21805   4599511211 :               || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
   21806              :     }
   21807              : 
   21808  39907640373 :   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
   21809              :     return false;
   21810              : 
   21811  38928630876 :   if (SSE_REGNO_P (regno))
   21812              :     {
   21813              :       /* We implement the move patterns for all vector modes into and
   21814              :          out of SSE registers, even when no operation instructions
   21815              :          are available.  */
   21816              : 
   21817              :       /* For AVX-512 we allow, regardless of regno:
   21818              :           - XI mode
   21819              :           - any of 512-bit wide vector mode
   21820              :           - any scalar mode.  */
   21821  16834935974 :       if (TARGET_AVX512F
   21822              :           && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
   21823              :               || VALID_AVX512F_SCALAR_MODE (mode)))
   21824              :         return true;
   21825              : 
   21826              :       /* TODO check for QI/HI scalars.  */
   21827              :       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
   21828  16144574550 :       if (TARGET_AVX512VL
   21829   1751120226 :           && (VALID_AVX256_REG_OR_OI_MODE (mode)
   21830   1538867313 :               || VALID_AVX512VL_128_REG_MODE (mode)))
   21831              :         return true;
   21832              : 
   21833              :       /* xmm16-xmm31 are only available for AVX-512.  */
   21834  15698059518 :       if (EXT_REX_SSE_REGNO_P (regno))
   21835              :         return false;
   21836              : 
   21837              :       /* OImode and AVX modes are available only when AVX is enabled.  */
   21838   9080317741 :       return ((TARGET_AVX
   21839   1925511921 :                && VALID_AVX256_REG_OR_OI_MODE (mode))
   21840              :               || VALID_SSE_REG_MODE (mode)
   21841              :               || VALID_SSE2_REG_MODE (mode)
   21842              :               || VALID_MMX_REG_MODE (mode)
   21843   9080317741 :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21844              :     }
   21845  22093694902 :   if (MMX_REGNO_P (regno))
   21846              :     {
   21847              :       /* We implement the move patterns for 3DNOW modes even in MMX mode,
   21848              :          so if the register is available at all, then we can move data of
   21849              :          the given mode into or out of it.  */
   21850   3938543258 :       return (VALID_MMX_REG_MODE (mode)
   21851              :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21852              :     }
   21853              : 
   21854  18155151644 :   if (mode == QImode)
   21855              :     {
   21856              :       /* Take care for QImode values - they can be in non-QI regs,
   21857              :          but then they do cause partial register stalls.  */
   21858    205822171 :       if (ANY_QI_REGNO_P (regno))
   21859              :         return true;
   21860     14265215 :       if (!TARGET_PARTIAL_REG_STALL)
   21861              :         return true;
   21862              :       /* LRA checks if the hard register is OK for the given mode.
   21863              :          QImode values can live in non-QI regs, so we allow all
   21864              :          registers here.  */
   21865            0 :       if (lra_in_progress)
   21866              :        return true;
   21867            0 :       return !can_create_pseudo_p ();
   21868              :     }
   21869              :   /* We handle both integer and floats in the general purpose registers.  */
   21870  17949329473 :   else if (VALID_INT_MODE_P (mode)
   21871  13129317255 :            || VALID_FP_MODE_P (mode))
   21872              :     return true;
   21873              :   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
   21874              :      on to use that value in smaller contexts, this can easily force a
   21875              :      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
   21876              :      supporting DImode, allow it.  */
   21877  12071463717 :   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
   21878              :     return true;
   21879              : 
   21880              :   return false;
   21881              : }
   21882              : 
   21883              : /* Implement TARGET_INSN_CALLEE_ABI.  */
   21884              : 
   21885              : const predefined_function_abi &
   21886    247949921 : ix86_insn_callee_abi (const rtx_insn *insn)
   21887              : {
   21888    247949921 :   unsigned int abi_id = 0;
   21889    247949921 :   rtx pat = PATTERN (insn);
   21890    247949921 :   if (vzeroupper_pattern (pat, VOIDmode))
   21891       398632 :     abi_id = ABI_VZEROUPPER;
   21892              : 
   21893    247949921 :   return function_abis[abi_id];
   21894              : }
   21895              : 
   21896              : /* Initialize function_abis with corresponding abi_id,
   21897              :    currently only handle vzeroupper.  */
   21898              : void
   21899        21895 : ix86_initialize_callee_abi (unsigned int abi_id)
   21900              : {
   21901        21895 :   gcc_assert (abi_id == ABI_VZEROUPPER);
   21902        21895 :   predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
   21903        21895 :   if (!vzeroupper_abi.initialized_p ())
   21904              :     {
   21905              :       HARD_REG_SET full_reg_clobbers;
   21906         4274 :       CLEAR_HARD_REG_SET (full_reg_clobbers);
   21907         4274 :       vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
   21908              :     }
   21909        21895 : }
   21910              : 
   21911              : void
   21912        21895 : ix86_expand_avx_vzeroupper (void)
   21913              : {
   21914              :   /* Initialize vzeroupper_abi here.  */
   21915        21895 :   ix86_initialize_callee_abi (ABI_VZEROUPPER);
   21916        21895 :   rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
   21917              :   /* Return false for non-local goto in can_nonlocal_goto.  */
   21918        21895 :   make_reg_eh_region_note (insn, 0, INT_MIN);
   21919              :   /* Flag used for call_insn indicates it's a fake call.  */
   21920        21895 :   RTX_FLAG (insn, used) = 1;
   21921        21895 : }
   21922              : 
   21923              : 
   21924              : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
   21925              :    saves SSE registers across calls is Win64 (thus no need to check the
   21926              :    current ABI here), and with AVX enabled Win64 only guarantees that
   21927              :    the low 16 bytes are saved.  */
   21928              : 
   21929              : static bool
   21930   2045700279 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
   21931              :                                      machine_mode mode)
   21932              : {
   21933              :   /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */
   21934   2045700279 :   if (abi_id == ABI_VZEROUPPER)
   21935     30893696 :       return (GET_MODE_SIZE (mode) > 16
   21936     30893696 :               && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
   21937      4724924 :                   || LEGACY_SSE_REGNO_P (regno)));
   21938              : 
   21939   2651583135 :   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
   21940              : }
   21941              : 
   21942              : /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
   21943              :    tieable integer mode.  */
   21944              : 
   21945              : static bool
   21946     52155810 : ix86_tieable_integer_mode_p (machine_mode mode)
   21947              : {
   21948     52155810 :   switch (mode)
   21949              :     {
   21950              :     case E_HImode:
   21951              :     case E_SImode:
   21952              :       return true;
   21953              : 
   21954      5263592 :     case E_QImode:
   21955      5263592 :       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
   21956              : 
   21957     10183017 :     case E_DImode:
   21958     10183017 :       return TARGET_64BIT;
   21959              : 
   21960              :     default:
   21961              :       return false;
   21962              :     }
   21963              : }
   21964              : 
   21965              : /* Implement TARGET_MODES_TIEABLE_P.
   21966              : 
   21967              :    Return true if MODE1 is accessible in a register that can hold MODE2
   21968              :    without copying.  That is, all register classes that can hold MODE2
   21969              :    can also hold MODE1.  */
   21970              : 
   21971              : static bool
   21972     33788604 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
   21973              : {
   21974     33788604 :   if (mode1 == mode2)
   21975              :     return true;
   21976              : 
   21977     33702128 :   if (ix86_tieable_integer_mode_p (mode1)
   21978     33702128 :       && ix86_tieable_integer_mode_p (mode2))
   21979              :     return true;
   21980              : 
   21981              :   /* MODE2 being XFmode implies fp stack or general regs, which means we
   21982              :      can tie any smaller floating point modes to it.  Note that we do not
   21983              :      tie this with TFmode.  */
   21984     24678761 :   if (mode2 == XFmode)
   21985         4314 :     return mode1 == SFmode || mode1 == DFmode;
   21986              : 
   21987              :   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
   21988              :      that we can tie it with SFmode.  */
   21989     24674447 :   if (mode2 == DFmode)
   21990       249977 :     return mode1 == SFmode;
   21991              : 
   21992              :   /* If MODE2 is only appropriate for an SSE register, then tie with
   21993              :      any vector modes or scalar floating point modes acceptable to SSE
   21994              :      registers, excluding scalar integer modes with SUBREG:
   21995              :         (subreg:QI (reg:TI 99) 0))
   21996              :         (subreg:HI (reg:TI 99) 0))
   21997              :         (subreg:SI (reg:TI 99) 0))
   21998              :         (subreg:DI (reg:TI 99) 0))
   21999              :      to avoid unnecessary move from SSE register to integer register.
   22000              :    */
   22001     24424470 :   if (GET_MODE_SIZE (mode2) >= 16
   22002     38248558 :       && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
   22003     13474929 :           || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
   22004       484704 :               && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
   22005     30315082 :       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
   22006      5457078 :     return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
   22007              : 
   22008              :   /* If MODE2 is appropriate for an MMX register, then tie
   22009              :      with any other mode acceptable to MMX registers.  */
   22010     18967392 :   if (GET_MODE_SIZE (mode2) == 8
   22011     18967392 :       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
   22012      3289403 :     return (GET_MODE_SIZE (mode1) == 8
   22013      3289403 :             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
   22014              : 
   22015              :   /* SCmode and DImode can be tied.  */
   22016     15677989 :   if ((mode1 == E_SCmode && mode2 == E_DImode)
   22017     15677989 :       || (mode1 == E_DImode && mode2 == E_SCmode))
   22018          108 :     return TARGET_64BIT;
   22019              : 
   22020              :   /* [SD]Cmode and V2[SD]Fmode modes can be tied.  */
   22021     15677881 :   if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
   22022     15677881 :       || (mode1 == E_V2SFmode && mode2 == E_SCmode)
   22023     15677881 :       || (mode1 == E_DCmode && mode2 == E_V2DFmode)
   22024     15677881 :       || (mode1 == E_V2DFmode && mode2 == E_DCmode))
   22025            0 :     return true;
   22026              : 
   22027              :   return false;
   22028              : }
   22029              : 
   22030              : /* Return the cost of moving between two registers of mode MODE.  */
   22031              : 
   22032              : static int
   22033     29251605 : ix86_set_reg_reg_cost (machine_mode mode)
   22034              : {
   22035     29251605 :   unsigned int units = UNITS_PER_WORD;
   22036              : 
   22037     29251605 :   switch (GET_MODE_CLASS (mode))
   22038              :     {
   22039              :     default:
   22040              :       break;
   22041              : 
   22042              :     case MODE_CC:
   22043     29251605 :       units = GET_MODE_SIZE (CCmode);
   22044              :       break;
   22045              : 
   22046      1165229 :     case MODE_FLOAT:
   22047      1165229 :       if ((TARGET_SSE && mode == TFmode)
   22048       683238 :           || (TARGET_80387 && mode == XFmode)
   22049       210599 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
   22050       142490 :           || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
   22051      2300542 :         units = GET_MODE_SIZE (mode);
   22052              :       break;
   22053              : 
   22054      1307262 :     case MODE_COMPLEX_FLOAT:
   22055      1307262 :       if ((TARGET_SSE && mode == TCmode)
   22056       876438 :           || (TARGET_80387 && mode == XCmode)
   22057       445492 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
   22058        14518 :           || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
   22059      2608036 :         units = GET_MODE_SIZE (mode);
   22060              :       break;
   22061              : 
   22062     18707751 :     case MODE_VECTOR_INT:
   22063     18707751 :     case MODE_VECTOR_FLOAT:
   22064     18707751 :       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   22065     18611788 :           || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   22066     18440330 :           || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   22067     15811766 :           || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   22068     14506114 :           || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   22069     14460604 :               && VALID_MMX_REG_MODE (mode)))
   22070      8511284 :         units = GET_MODE_SIZE (mode);
   22071              :     }
   22072              : 
   22073              :   /* Return the cost of moving between two registers of mode MODE,
   22074              :      assuming that the move will be in pieces of at most UNITS bytes.  */
   22075     29251605 :   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
   22076              : }
   22077              : 
   22078              : /* Return cost of vector operation in MODE given that scalar version has
   22079              :    COST.  */
   22080              : 
   22081              : static int
   22082   2831901434 : ix86_vec_cost (machine_mode mode, int cost)
   22083              : {
   22084   2831901434 :   if (!VECTOR_MODE_P (mode))
   22085              :     return cost;
   22086              : 
   22087   2831667923 :   if (GET_MODE_BITSIZE (mode) == 128
   22088   2831667923 :       && TARGET_SSE_SPLIT_REGS)
   22089      2861998 :     return cost * GET_MODE_BITSIZE (mode) / 64;
   22090   2830236924 :   else if (GET_MODE_BITSIZE (mode) > 128
   22091   2830236924 :       && TARGET_AVX256_SPLIT_REGS)
   22092      1674620 :     return cost * GET_MODE_BITSIZE (mode) / 128;
   22093   2829399614 :   else if (GET_MODE_BITSIZE (mode) > 256
   22094   2829399614 :       && TARGET_AVX512_SPLIT_REGS)
   22095       224056 :     return cost * GET_MODE_BITSIZE (mode) / 256;
   22096              :   return cost;
   22097              : }
   22098              : 
   22099              : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
   22100              :    vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2.  */
   22101              : static int
   22102         1076 : ix86_widen_mult_cost (const struct processor_costs *cost,
   22103              :                       enum machine_mode mode, bool uns_p)
   22104              : {
   22105         1076 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
   22106         1076 :   int extra_cost = 0;
   22107         1076 :   int basic_cost = 0;
   22108         1076 :   switch (mode)
   22109              :     {
   22110          124 :     case V8HImode:
   22111          124 :     case V16HImode:
   22112          124 :       if (!uns_p || mode == V16HImode)
   22113           53 :         extra_cost = cost->sse_op * 2;
   22114          124 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22115          124 :       break;
   22116          203 :     case V4SImode:
   22117          203 :     case V8SImode:
   22118              :       /* pmulhw/pmullw can be used.  */
   22119          203 :       basic_cost = cost->mulss * 2 + cost->sse_op * 2;
   22120          203 :       break;
   22121          681 :     case V2DImode:
   22122              :       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
   22123              :          require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
   22124          681 :       if (!TARGET_SSE4_1 && !uns_p)
   22125          403 :         extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
   22126          403 :                       + cost->sse_op * 2;
   22127              :       /* Fallthru.  */
   22128          737 :     case V4DImode:
   22129          737 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22130          737 :       break;
   22131              :     default:
   22132              :       /* Not implemented.  */
   22133              :       return 100;
   22134              :     }
   22135         1064 :   return ix86_vec_cost (mode, basic_cost + extra_cost);
   22136              : }
   22137              : 
   22138              : /* Return cost of multiplication in MODE.  */
   22139              : 
   22140              : static int
   22141   1208506312 : ix86_multiplication_cost (const struct processor_costs *cost,
   22142              :                           enum machine_mode mode)
   22143              : {
   22144   1208506312 :   machine_mode inner_mode = mode;
   22145   1208506312 :   if (VECTOR_MODE_P (mode))
   22146   1207482968 :     inner_mode = GET_MODE_INNER (mode);
   22147              : 
   22148   1208506312 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22149       753467 :     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
   22150   1207752845 :   else if (X87_FLOAT_MODE_P (mode))
   22151       162427 :     return cost->fmul;
   22152   1207590418 :   else if (FLOAT_MODE_P (mode))
   22153       230434 :     return  ix86_vec_cost (mode,
   22154       230434 :                            inner_mode == DFmode ? cost->mulsd : cost->mulss);
   22155   1207359984 :   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22156              :     {
   22157   1207276780 :       int nmults, nops;
   22158              :       /* Cost of reading the memory.  */
   22159   1207276780 :       int extra;
   22160              : 
   22161   1207276780 :       switch (mode)
   22162              :         {
   22163     19011938 :         case V4QImode:
   22164     19011938 :         case V8QImode:
   22165              :           /* Partial V*QImode is emulated with 4-6 insns.  */
   22166     19011938 :           nmults = 1;
   22167     19011938 :           nops = 3;
   22168     19011938 :           extra = 0;
   22169              : 
   22170     19011938 :           if (TARGET_AVX512BW && TARGET_AVX512VL)
   22171              :             ;
   22172     18902720 :           else if (TARGET_AVX2)
   22173              :             nops += 2;
   22174     18395372 :           else if (TARGET_XOP)
   22175        10216 :             extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22176              :           else
   22177              :             {
   22178     18385156 :               nops += 1;
   22179     18385156 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22180              :             }
   22181     19011938 :           goto do_qimode;
   22182              : 
   22183      9506444 :         case V16QImode:
   22184              :           /* V*QImode is emulated with 4-11 insns.  */
   22185      9506444 :           nmults = 1;
   22186      9506444 :           nops = 3;
   22187      9506444 :           extra = 0;
   22188              : 
   22189      9506444 :           if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
   22190              :             {
   22191       306247 :               if (!(TARGET_AVX512BW && TARGET_AVX512VL))
   22192       251924 :                 nops += 3;
   22193              :             }
   22194      9200197 :           else if (TARGET_XOP)
   22195              :             {
   22196         5552 :               nmults += 1;
   22197         5552 :               nops += 2;
   22198         5552 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22199              :             }
   22200              :           else
   22201              :             {
   22202      9194645 :               nmults += 1;
   22203      9194645 :               nops += 4;
   22204      9194645 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22205              :             }
   22206      9506444 :           goto do_qimode;
   22207              : 
   22208      9504824 :         case V32QImode:
   22209      9504824 :           nmults = 1;
   22210      9504824 :           nops = 3;
   22211      9504824 :           extra = 0;
   22212              : 
   22213      9504824 :           if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
   22214              :             {
   22215      9422224 :               nmults += 1;
   22216      9422224 :               nops += 4;
   22217              :               /* 2 loads, so no division by 2.  */
   22218      9422224 :               extra += COSTS_N_INSNS (cost->sse_load[3]);
   22219              :             }
   22220      9504824 :           goto do_qimode;
   22221              : 
   22222      9504345 :         case V64QImode:
   22223      9504345 :           nmults = 2;
   22224      9504345 :           nops = 9;
   22225              :           /* 2 loads of each size, so no division by 2.  */
   22226      9504345 :           extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
   22227              : 
   22228     47527551 :         do_qimode:
   22229     47527551 :           return ix86_vec_cost (mode, cost->mulss * nmults
   22230     47527551 :                                 + cost->sse_op * nops) + extra;
   22231              : 
   22232     40647006 :         case V4SImode:
   22233              :           /* pmulld is used in this case. No emulation is needed.  */
   22234     40647006 :           if (TARGET_SSE4_1)
   22235      2245249 :             goto do_native;
   22236              :           /* V4SImode is emulated with 7 insns.  */
   22237              :           else
   22238     38401757 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
   22239              : 
   22240    164286955 :         case V2DImode:
   22241    164286955 :         case V4DImode:
   22242              :           /* vpmullq is used in this case. No emulation is needed.  */
   22243    164286955 :           if (TARGET_AVX512DQ && TARGET_AVX512VL)
   22244       590707 :             goto do_native;
   22245              :           /* V*DImode is emulated with 6-8 insns.  */
   22246    163696248 :           else if (TARGET_XOP && mode == V2DImode)
   22247        55860 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
   22248              :           /* FALLTHRU */
   22249    245724302 :         case V8DImode:
   22250              :           /* vpmullq is used in this case. No emulation is needed.  */
   22251    245724302 :           if (TARGET_AVX512DQ && mode == V8DImode)
   22252       388450 :             goto do_native;
   22253              :           else
   22254    245335852 :             return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
   22255              : 
   22256    875955760 :         default:
   22257    875955760 :         do_native:
   22258    875955760 :           return ix86_vec_cost (mode, cost->mulss);
   22259              :         }
   22260              :     }
   22261              :   else
   22262       166400 :     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
   22263              : }
   22264              : 
   22265              : /* Return cost of multiplication in MODE.  */
   22266              : 
   22267              : static int
   22268     72629201 : ix86_division_cost (const struct processor_costs *cost,
   22269              :                           enum machine_mode mode)
   22270              : {
   22271     72629201 :   machine_mode inner_mode = mode;
   22272     72629201 :   if (VECTOR_MODE_P (mode))
   22273     53583417 :     inner_mode = GET_MODE_INNER (mode);
   22274              : 
   22275     72629201 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22276       248436 :     return inner_mode == DFmode ? cost->divsd : cost->divss;
   22277     72380765 :   else if (X87_FLOAT_MODE_P (mode))
   22278        44842 :     return cost->fdiv;
   22279     72335923 :   else if (FLOAT_MODE_P (mode))
   22280        17522 :     return ix86_vec_cost (mode,
   22281        17522 :                           inner_mode == DFmode ? cost->divsd : cost->divss);
   22282              :   else
   22283     80701128 :     return cost->divide[MODE_INDEX (mode)];
   22284              : }
   22285              : 
   22286              : /* Return cost of shift in MODE.
   22287              :    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
   22288              :    AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
   22289              :    if op1 is a result of subreg.
   22290              : 
   22291              :    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
   22292              : 
   22293              : static int
   22294    775750941 : ix86_shift_rotate_cost (const struct processor_costs *cost,
   22295              :                         enum rtx_code code,
   22296              :                         enum machine_mode mode, bool constant_op1,
   22297              :                         HOST_WIDE_INT op1_val,
   22298              :                         bool and_in_op1,
   22299              :                         bool shift_and_truncate,
   22300              :                         bool *skip_op0, bool *skip_op1)
   22301              : {
   22302    775750941 :   if (skip_op0)
   22303    775678479 :     *skip_op0 = *skip_op1 = false;
   22304              : 
   22305    775750941 :   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22306              :     {
   22307    398515494 :       int count;
   22308              :       /* Cost of reading the memory.  */
   22309    398515494 :       int extra;
   22310              : 
   22311    398515494 :       switch (mode)
   22312              :         {
   22313      6049261 :         case V4QImode:
   22314      6049261 :         case V8QImode:
   22315      6049261 :           if (TARGET_AVX2)
   22316              :             /* Use vpbroadcast.  */
   22317       196243 :             extra = cost->sse_op;
   22318              :           else
   22319      5853018 :             extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22320              : 
   22321      6049261 :           if (constant_op1)
   22322              :             {
   22323      6049231 :               if (code == ASHIFTRT)
   22324              :                 {
   22325          190 :                   count = 4;
   22326          190 :                   extra *= 2;
   22327              :                 }
   22328              :               else
   22329              :                 count = 2;
   22330              :             }
   22331           30 :           else if (TARGET_AVX512BW && TARGET_AVX512VL)
   22332           30 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22333            0 :           else if (TARGET_SSE4_1)
   22334              :             count = 5;
   22335            0 :           else if (code == ASHIFTRT)
   22336              :             count = 6;
   22337              :           else
   22338            0 :             count = 5;
   22339      6049231 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22340              : 
   22341      3027676 :         case V16QImode:
   22342      3027676 :           if (TARGET_XOP)
   22343              :             {
   22344              :               /* For XOP we use vpshab, which requires a broadcast of the
   22345              :                  value to the variable shift insn.  For constants this
   22346              :                  means a V16Q const in mem; even when we can perform the
   22347              :                  shift with one insn set the cost to prefer paddb.  */
   22348         3601 :               if (constant_op1)
   22349              :                 {
   22350         2642 :                   extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22351         2642 :                   return ix86_vec_cost (mode, cost->sse_op) + extra;
   22352              :                 }
   22353              :               else
   22354              :                 {
   22355          959 :                   count = (code == ASHIFT) ? 3 : 4;
   22356          959 :                   return ix86_vec_cost (mode, cost->sse_op * count);
   22357              :                 }
   22358              :             }
   22359              :           /* FALLTHRU */
   22360      6048400 :         case V32QImode:
   22361      6048400 :           if (TARGET_GFNI && constant_op1)
   22362              :             {
   22363              :               /* Use vgf2p8affine.  One extra load for the mask, but in a loop
   22364              :                  with enough registers it will be moved out.  So for now don't
   22365              :                  account the constant mask load.  This is not quite right
   22366              :                  for non loop vectorization.  */
   22367        11878 :               extra = 0;
   22368        11878 :               return ix86_vec_cost (mode, cost->sse_op) + extra;
   22369              :             }
   22370      6036522 :           if (TARGET_AVX2)
   22371              :             /* Use vpbroadcast.  */
   22372       189190 :             extra = cost->sse_op;
   22373              :           else
   22374      5847332 :             extra = COSTS_N_INSNS (mode == V16QImode
   22375              :                                    ? cost->sse_load[2]
   22376      5847332 :                                    : cost->sse_load[3]) / 2;
   22377              : 
   22378      6036522 :           if (constant_op1)
   22379              :             {
   22380      6036334 :               if (code == ASHIFTRT)
   22381              :                 {
   22382          198 :                   count = 4;
   22383          198 :                   extra *= 2;
   22384              :                 }
   22385              :               else
   22386              :                 count = 2;
   22387              :             }
   22388          188 :           else if (TARGET_AVX512BW
   22389           76 :                    && ((mode == V32QImode && !TARGET_PREFER_AVX256)
   22390           38 :                        || (mode == V16QImode && TARGET_AVX512VL
   22391           38 :                            && !TARGET_PREFER_AVX128)))
   22392           76 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22393          112 :           else if (TARGET_AVX2
   22394            0 :                    && mode == V16QImode && !TARGET_PREFER_AVX128)
   22395              :             count = 6;
   22396          112 :           else if (TARGET_SSE4_1)
   22397              :             count = 9;
   22398          112 :           else if (code == ASHIFTRT)
   22399              :             count = 10;
   22400              :           else
   22401           76 :             count = 9;
   22402      6036446 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22403              : 
   22404      3024574 :         case V64QImode:
   22405              :           /* Ignore the mask load for GF2P8AFFINEQB.  */
   22406      3024574 :           extra = 0;
   22407      3024574 :           return ix86_vec_cost (mode, cost->sse_op) + extra;
   22408              : 
   22409     54523373 :         case V2DImode:
   22410     54523373 :         case V4DImode:
   22411              :           /* V*DImode arithmetic right shift is emulated.  */
   22412     54523373 :           if (code == ASHIFTRT && !TARGET_AVX512VL)
   22413              :             {
   22414         1389 :               if (constant_op1)
   22415              :                 {
   22416          650 :                   if (op1_val == 63)
   22417          440 :                     count = TARGET_SSE4_2 ? 1 : 2;
   22418          509 :                   else if (TARGET_XOP)
   22419              :                     count = 2;
   22420          210 :                   else if (TARGET_SSE4_1)
   22421              :                     count = 3;
   22422              :                   else
   22423          230 :                     count = 4;
   22424              :                 }
   22425          739 :               else if (TARGET_XOP)
   22426              :                 count = 3;
   22427           74 :               else if (TARGET_SSE4_2)
   22428              :                 count = 4;
   22429              :               else
   22430         1389 :                 count = 5;
   22431              : 
   22432         1389 :               return ix86_vec_cost (mode, cost->sse_op * count);
   22433              :             }
   22434              :           /* FALLTHRU */
   22435    383388269 :         default:
   22436    383388269 :           return ix86_vec_cost (mode, cost->sse_op);
   22437              :         }
   22438              :     }
   22439              : 
   22440    763166031 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22441              :     {
   22442    193071946 :       if (constant_op1)
   22443              :         {
   22444    193037309 :           if (op1_val > 32)
   22445    137148697 :             return cost->shift_const + COSTS_N_INSNS (2);
   22446              :           else
   22447     55888612 :             return cost->shift_const * 2;
   22448              :         }
   22449              :       else
   22450              :         {
   22451        34637 :           if (and_in_op1)
   22452           63 :             return cost->shift_var * 2;
   22453              :           else
   22454        34574 :             return cost->shift_var * 6 + COSTS_N_INSNS (2);
   22455              :         }
   22456              :     }
   22457              :   else
   22458              :     {
   22459    184163501 :       if (constant_op1)
   22460    183427052 :         return cost->shift_const;
   22461       736449 :       else if (shift_and_truncate)
   22462              :         {
   22463        22905 :           if (skip_op0)
   22464        22905 :             *skip_op0 = *skip_op1 = true;
   22465              :           /* Return the cost after shift-and truncation.  */
   22466        22905 :           return cost->shift_var;
   22467              :         }
   22468              :       else
   22469       713544 :         return cost->shift_var;
   22470              :     }
   22471              : }
   22472              : 
   22473              : static int
   22474    148822715 : ix86_insn_cost (rtx_insn *insn, bool speed)
   22475              : {
   22476    148822715 :   int insn_cost = 0;
   22477              :   /* Add extra cost to avoid post_reload late_combine revert
   22478              :      the optimization did in pass_rpad.  */
   22479    148822715 :   if (reload_completed
   22480      4580078 :       && ix86_rpad_gate ()
   22481       247264 :       && recog_memoized (insn) >= 0
   22482    149069717 :       && get_attr_avx_partial_xmm_update (insn)
   22483              :       == AVX_PARTIAL_XMM_UPDATE_TRUE)
   22484              :     insn_cost += COSTS_N_INSNS (3);
   22485              : 
   22486    148822715 :   return insn_cost + pattern_cost (PATTERN (insn), speed);
   22487              : }
   22488              : 
   22489              : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
   22490              : 
   22491              : static int
   22492       757346 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
   22493              : {
   22494       757346 :   if (size < 128)
   22495       752210 :     return cost->cvtss2sd;
   22496         5136 :   else if (size < 256)
   22497              :     {
   22498         2351 :       if (TARGET_SSE_SPLIT_REGS)
   22499            0 :         return cost->cvtss2sd * size / 64;
   22500         2351 :       return cost->cvtss2sd;
   22501              :     }
   22502         2785 :   if (size < 512)
   22503         1483 :     return cost->vcvtps2pd256;
   22504              :   else
   22505         1302 :     return cost->vcvtps2pd512;
   22506              : }
   22507              : 
   22508              : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
   22509              : 
   22510              : static bool
   22511       271041 : unspec_pcmp_p (rtx x)
   22512              : {
   22513       271041 :   return GET_CODE (x) == UNSPEC
   22514       271041 :          && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
   22515              : }
   22516              : 
   22517              : /* Compute a (partial) cost for rtx X.  Return true if the complete
   22518              :    cost has been computed, and false if subexpressions should be
   22519              :    scanned.  In either case, *TOTAL contains the cost result.  */
   22520              : 
   22521              : static bool
   22522   7707471020 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
   22523              :                 int *total, bool speed)
   22524              : {
   22525   7707471020 :   rtx mask;
   22526   7707471020 :   enum rtx_code code = GET_CODE (x);
   22527   7707471020 :   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
   22528   4126787060 :   const struct processor_costs *cost
   22529   7707471020 :     = speed ? ix86_tune_cost : &ix86_size_cost;
   22530   7707471020 :   int src_cost;
   22531              : 
   22532              :   /* Handling different vternlog variants.  */
   22533   7707471020 :   if ((GET_MODE_SIZE (mode) == 64
   22534   7707471020 :        ? TARGET_AVX512F
   22535   6522607366 :        : (TARGET_AVX512VL
   22536   6461000004 :           || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
   22537    177801295 :       && GET_MODE_SIZE (mode) >= 16
   22538    120814674 :       && outer_code_i == SET
   22539   7754140250 :       && ternlog_operand (x, mode))
   22540              :     {
   22541        33628 :       rtx args[3];
   22542              : 
   22543        33628 :       args[0] = NULL_RTX;
   22544        33628 :       args[1] = NULL_RTX;
   22545        33628 :       args[2] = NULL_RTX;
   22546        33628 :       int idx = ix86_ternlog_idx (x, args);
   22547        33628 :       gcc_assert (idx >= 0);
   22548              : 
   22549        33628 :       *total = cost->sse_op;
   22550       134512 :       for (int i = 0; i != 3; i++)
   22551       100884 :         if (args[i])
   22552        71146 :           *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
   22553        33628 :       return true;
   22554              :     }
   22555              : 
   22556              : 
   22557   7707437392 :   switch (code)
   22558              :     {
   22559     47720047 :     case SET:
   22560     47720047 :       if (register_operand (SET_DEST (x), VOIDmode)
   22561     47720047 :           && register_operand (SET_SRC (x), VOIDmode))
   22562              :         {
   22563     29251605 :           *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
   22564     29251605 :           return true;
   22565              :         }
   22566              : 
   22567     18468442 :       if (register_operand (SET_SRC (x), VOIDmode))
   22568              :         /* Avoid potentially incorrect high cost from rtx_costs
   22569              :            for non-tieable SUBREGs.  */
   22570              :         src_cost = 0;
   22571              :       else
   22572              :         {
   22573     15648829 :           src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
   22574              : 
   22575     15648829 :           if (CONSTANT_P (SET_SRC (x)))
   22576              :             /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
   22577              :                a small value, possibly zero for cheap constants.  */
   22578      6992484 :             src_cost += COSTS_N_INSNS (1);
   22579              :         }
   22580              : 
   22581     18468442 :       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
   22582     18468442 :       return true;
   22583              : 
   22584   2838511814 :     case CONST_INT:
   22585   2838511814 :     case CONST:
   22586   2838511814 :     case LABEL_REF:
   22587   2838511814 :     case SYMBOL_REF:
   22588   2838511814 :       if (x86_64_immediate_operand (x, VOIDmode))
   22589   2232646330 :         *total = 0;
   22590    605865484 :       else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
   22591              :         /* Consider the zext constants slightly more expensive, as they
   22592              :            can't appear in most instructions.  */
   22593     28043839 :         *total = 1;
   22594              :       else
   22595              :         /* movabsq is slightly more expensive than a simple instruction. */
   22596    577821645 :         *total = COSTS_N_INSNS (1) + 1;
   22597              :       return true;
   22598              : 
   22599      7534015 :     case CONST_DOUBLE:
   22600      7534015 :       if (IS_STACK_MODE (mode))
   22601      1300526 :         switch (standard_80387_constant_p (x))
   22602              :           {
   22603              :           case -1:
   22604              :           case 0:
   22605              :             break;
   22606       279744 :           case 1: /* 0.0 */
   22607       279744 :             *total = 1;
   22608       279744 :             return true;
   22609       485458 :           default: /* Other constants */
   22610       485458 :             *total = 2;
   22611       485458 :             return true;
   22612              :           }
   22613              :       /* FALLTHRU */
   22614              : 
   22615     14498707 :     case CONST_VECTOR:
   22616     14498707 :       switch (standard_sse_constant_p (x, mode))
   22617              :         {
   22618              :         case 0:
   22619              :           break;
   22620      4199712 :         case 1:  /* 0: xor eliminates false dependency */
   22621      4199712 :           *total = 0;
   22622      4199712 :           return true;
   22623       192049 :         default: /* -1: cmp contains false dependency */
   22624       192049 :           *total = 1;
   22625       192049 :           return true;
   22626              :         }
   22627              :       /* FALLTHRU */
   22628              : 
   22629     11101960 :     case CONST_WIDE_INT:
   22630              :       /* Fall back to (MEM (SYMBOL_REF)), since that's where
   22631              :          it'll probably end up.  Add a penalty for size.  */
   22632     22203920 :       *total = (COSTS_N_INSNS (1)
   22633     21977663 :                 + (!TARGET_64BIT && flag_pic)
   22634     22203920 :                 + (GET_MODE_SIZE (mode) <= 4
   22635     19427353 :                    ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
   22636     11101960 :       return true;
   22637              : 
   22638     22552387 :     case ZERO_EXTEND:
   22639              :       /* The zero extensions is often completely free on x86_64, so make
   22640              :          it as cheap as possible.  */
   22641     22552387 :       if (TARGET_64BIT && mode == DImode
   22642      4905648 :           && GET_MODE (XEXP (x, 0)) == SImode)
   22643      3024061 :         *total = 1;
   22644     19528326 :       else if (TARGET_ZERO_EXTEND_WITH_AND)
   22645            0 :         *total = cost->add;
   22646              :       else
   22647     19528326 :         *total = cost->movzx;
   22648              :       return false;
   22649              : 
   22650      2737588 :     case SIGN_EXTEND:
   22651      2737588 :       *total = cost->movsx;
   22652      2737588 :       return false;
   22653              : 
   22654    638249435 :     case ASHIFT:
   22655    638249435 :       if (SCALAR_INT_MODE_P (mode)
   22656    246669624 :           && GET_MODE_SIZE (mode) < UNITS_PER_WORD
   22657    681384407 :           && CONST_INT_P (XEXP (x, 1)))
   22658              :         {
   22659     42956183 :           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22660     42956183 :           if (value == 1)
   22661              :             {
   22662      2478499 :               *total = cost->add;
   22663      2478499 :               return false;
   22664              :             }
   22665     40477684 :           if ((value == 2 || value == 3)
   22666      4548597 :               && cost->lea <= cost->shift_const)
   22667              :             {
   22668      2148308 :               *total = cost->lea;
   22669      2148308 :               return false;
   22670              :             }
   22671              :         }
   22672              :       /* FALLTHRU */
   22673              : 
   22674    775678479 :     case ROTATE:
   22675    775678479 :     case ASHIFTRT:
   22676    775678479 :     case LSHIFTRT:
   22677    775678479 :     case ROTATERT:
   22678    775678479 :       bool skip_op0, skip_op1;
   22679    775678479 :       *total = ix86_shift_rotate_cost (cost, code, mode,
   22680    775678479 :                                        CONSTANT_P (XEXP (x, 1)),
   22681              :                                        CONST_INT_P (XEXP (x, 1))
   22682              :                                          ? INTVAL (XEXP (x, 1)) : -1,
   22683              :                                        GET_CODE (XEXP (x, 1)) == AND,
   22684    775678479 :                                        SUBREG_P (XEXP (x, 1))
   22685    775678479 :                                        && GET_CODE (XEXP (XEXP (x, 1),
   22686              :                                                           0)) == AND,
   22687              :                                        &skip_op0, &skip_op1);
   22688    775678479 :       if (skip_op0 || skip_op1)
   22689              :         {
   22690        22905 :           if (!skip_op0)
   22691            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   22692        22905 :           if (!skip_op1)
   22693            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
   22694        22905 :           return true;
   22695              :         }
   22696              :       return false;
   22697              : 
   22698       228862 :     case FMA:
   22699       228862 :       {
   22700       228862 :         rtx sub;
   22701              : 
   22702       228862 :         gcc_assert (FLOAT_MODE_P (mode));
   22703       228862 :         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
   22704              : 
   22705       457724 :         *total = ix86_vec_cost (mode,
   22706       228862 :                                 GET_MODE_INNER (mode) == SFmode
   22707              :                                 ? cost->fmass : cost->fmasd);
   22708       228862 :         *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
   22709              : 
   22710              :         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
   22711       228862 :         sub = XEXP (x, 0);
   22712       228862 :         if (GET_CODE (sub) == NEG)
   22713        50851 :           sub = XEXP (sub, 0);
   22714       228862 :         *total += rtx_cost (sub, mode, FMA, 0, speed);
   22715              : 
   22716       228862 :         sub = XEXP (x, 2);
   22717       228862 :         if (GET_CODE (sub) == NEG)
   22718        40517 :           sub = XEXP (sub, 0);
   22719       228862 :         *total += rtx_cost (sub, mode, FMA, 2, speed);
   22720       228862 :         return true;
   22721              :       }
   22722              : 
   22723   1760206957 :     case MULT:
   22724   1760206957 :       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
   22725              :         {
   22726    551953968 :           rtx op0 = XEXP (x, 0);
   22727    551953968 :           rtx op1 = XEXP (x, 1);
   22728    551953968 :           int nbits;
   22729    551953968 :           if (CONST_INT_P (XEXP (x, 1)))
   22730              :             {
   22731    533826256 :               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22732   1083612276 :               for (nbits = 0; value != 0; value &= value - 1)
   22733    549786020 :                 nbits++;
   22734              :             }
   22735              :           else
   22736              :             /* This is arbitrary.  */
   22737              :             nbits = 7;
   22738              : 
   22739              :           /* Compute costs correctly for widening multiplication.  */
   22740    551953968 :           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
   22741    557491983 :               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
   22742      5538015 :                  == GET_MODE_SIZE (mode))
   22743              :             {
   22744      5524216 :               int is_mulwiden = 0;
   22745      5524216 :               machine_mode inner_mode = GET_MODE (op0);
   22746              : 
   22747      5524216 :               if (GET_CODE (op0) == GET_CODE (op1))
   22748      5423575 :                 is_mulwiden = 1, op1 = XEXP (op1, 0);
   22749       100641 :               else if (CONST_INT_P (op1))
   22750              :                 {
   22751        90643 :                   if (GET_CODE (op0) == SIGN_EXTEND)
   22752        40493 :                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
   22753        40493 :                                   == INTVAL (op1);
   22754              :                   else
   22755        50150 :                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
   22756              :                 }
   22757              : 
   22758      5514218 :               if (is_mulwiden)
   22759      5514218 :                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
   22760              :             }
   22761              : 
   22762    551953968 :           int mult_init;
   22763              :           // Double word multiplication requires 3 mults and 2 adds.
   22764   1119562846 :           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22765              :             {
   22766    332248210 :               mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
   22767    332248210 :                           + 2 * cost->add;
   22768    332248210 :               nbits *= 3;
   22769              :             }
   22770    378932076 :           else mult_init = cost->mult_init[MODE_INDEX (mode)];
   22771              : 
   22772   1103907936 :           *total = (mult_init
   22773    551953968 :                     + nbits * cost->mult_bit
   22774    551953968 :                     + rtx_cost (op0, mode, outer_code, opno, speed)
   22775    551953968 :                     + rtx_cost (op1, mode, outer_code, opno, speed));
   22776              : 
   22777    551953968 :           return true;
   22778              :         }
   22779   1208252989 :       *total = ix86_multiplication_cost (cost, mode);
   22780   1208252989 :       return false;
   22781              : 
   22782     72615174 :     case DIV:
   22783     72615174 :     case UDIV:
   22784     72615174 :     case MOD:
   22785     72615174 :     case UMOD:
   22786     72615174 :       *total = ix86_division_cost (cost, mode);
   22787     72615174 :       return false;
   22788              : 
   22789    690037337 :     case PLUS:
   22790    690037337 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22791    946411052 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   22792              :         {
   22793    142718762 :           if (GET_CODE (XEXP (x, 0)) == PLUS
   22794      3824437 :               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   22795       844672 :               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
   22796       844647 :               && CONSTANT_P (XEXP (x, 1)))
   22797              :             {
   22798       844590 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
   22799       844590 :               if (val == 2 || val == 4 || val == 8)
   22800              :                 {
   22801       844486 :                   *total = cost->lea;
   22802       844486 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22803              :                                       outer_code, opno, speed);
   22804       844486 :                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
   22805              :                                       outer_code, opno, speed);
   22806       844486 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22807              :                                       outer_code, opno, speed);
   22808       844486 :                   return true;
   22809              :                 }
   22810              :             }
   22811    141874172 :           else if (GET_CODE (XEXP (x, 0)) == MULT
   22812     52441287 :                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
   22813              :             {
   22814     52381386 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
   22815     52381386 :               if (val == 2 || val == 4 || val == 8)
   22816              :                 {
   22817      8032178 :                   *total = cost->lea;
   22818      8032178 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22819              :                                       outer_code, opno, speed);
   22820      8032178 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22821              :                                       outer_code, opno, speed);
   22822      8032178 :                   return true;
   22823              :                 }
   22824              :             }
   22825     89492786 :           else if (GET_CODE (XEXP (x, 0)) == PLUS)
   22826              :             {
   22827      2979847 :               rtx op = XEXP (XEXP (x, 0), 0);
   22828              : 
   22829              :               /* Add with carry, ignore the cost of adding a carry flag.  */
   22830      2979847 :               if (ix86_carry_flag_operator (op, mode)
   22831      2979847 :                   || ix86_carry_flag_unset_operator (op, mode))
   22832        70586 :                 *total = cost->add;
   22833              :               else
   22834              :                 {
   22835      2909261 :                   *total = cost->lea;
   22836      2909261 :                   *total += rtx_cost (op, mode,
   22837              :                                       outer_code, opno, speed);
   22838              :                 }
   22839              : 
   22840      2979847 :               *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22841              :                                   outer_code, opno, speed);
   22842      2979847 :               *total += rtx_cost (XEXP (x, 1), mode,
   22843              :                                   outer_code, opno, speed);
   22844      2979847 :               return true;
   22845              :             }
   22846              :         }
   22847              :       /* FALLTHRU */
   22848              : 
   22849   1837853101 :     case MINUS:
   22850              :       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
   22851   1837853101 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22852    519787902 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
   22853    234985286 :           && GET_CODE (XEXP (x, 0)) == MINUS
   22854   1837893181 :           && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
   22855        15161 :               || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
   22856              :         {
   22857        24919 :           *total = cost->add;
   22858        24919 :           *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22859              :                               outer_code, opno, speed);
   22860        24919 :           *total += rtx_cost (XEXP (x, 1), mode,
   22861              :                               outer_code, opno, speed);
   22862        24919 :           return true;
   22863              :         }
   22864              : 
   22865   1837828182 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22866      2410034 :         *total = cost->addss;
   22867   1835418148 :       else if (X87_FLOAT_MODE_P (mode))
   22868       220181 :         *total = cost->fadd;
   22869   1835197967 :       else if (FLOAT_MODE_P (mode))
   22870       441234 :         *total = ix86_vec_cost (mode, cost->addss);
   22871   1834756733 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22872   1208067601 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22873   1292749157 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22874    331249325 :         *total = cost->add * 2;
   22875              :       else
   22876    295439807 :         *total = cost->add;
   22877              :       return false;
   22878              : 
   22879      3936105 :     case IOR:
   22880      3936105 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22881      3690273 :           || SSE_FLOAT_MODE_P (mode))
   22882              :         {
   22883              :           /* (ior (not ...) ...) can be a single insn in AVX512.  */
   22884          480 :           if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
   22885       255426 :               && (GET_MODE_SIZE (mode) == 64
   22886            0 :                   || (TARGET_AVX512VL
   22887            0 :                       && (GET_MODE_SIZE (mode) == 32
   22888            0 :                           || GET_MODE_SIZE (mode) == 16))))
   22889              :             {
   22890            0 :               rtx right = GET_CODE (XEXP (x, 1)) != NOT
   22891            0 :                           ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
   22892              : 
   22893            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22894            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22895              :                                    outer_code, opno, speed)
   22896            0 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22897            0 :               return true;
   22898              :             }
   22899       255426 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22900       255426 :         }
   22901      3680679 :       else if (TARGET_64BIT
   22902      3388661 :                && mode == TImode
   22903      1687567 :                && GET_CODE (XEXP (x, 0)) == ASHIFT
   22904       252336 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
   22905       250340 :                && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
   22906       250340 :                && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   22907       250340 :                && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
   22908       250340 :                && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
   22909       228364 :                && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
   22910              :         {
   22911              :           /* *concatditi3 is cheap.  */
   22912       228364 :           rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
   22913       228364 :           rtx op1 = XEXP (XEXP (x, 1), 0);
   22914         1386 :           *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
   22915       228364 :                    ? COSTS_N_INSNS (1)    /* movq.  */
   22916       226978 :                    : set_src_cost (op0, DImode, speed);
   22917         2348 :           *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
   22918       228364 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22919       226029 :                     : set_src_cost (op1, DImode, speed);
   22920       228364 :           return true;
   22921              :         }
   22922      3452315 :       else if (TARGET_64BIT
   22923      3160297 :                && mode == TImode
   22924      1459203 :                && GET_CODE (XEXP (x, 0)) == AND
   22925      1399244 :                && REG_P (XEXP (XEXP (x, 0), 0))
   22926      1394040 :                && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
   22927      1391362 :                && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
   22928      1391362 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
   22929       909072 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
   22930       909072 :                && GET_CODE (XEXP (x, 1)) == ASHIFT
   22931       906922 :                && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
   22932       906922 :                && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
   22933       906922 :                && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   22934      4359237 :                && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
   22935              :         {
   22936              :           /* *insvti_highpart is cheap.  */
   22937       906922 :           rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
   22938       906922 :           *total = COSTS_N_INSNS (1) + 1;
   22939         1393 :           *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
   22940       906922 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22941       906019 :                     : set_src_cost (op, DImode, speed);
   22942       906922 :           return true;
   22943              :         }
   22944      5382804 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22945       745611 :         *total = cost->add * 2;
   22946              :       else
   22947      1799782 :         *total = cost->add;
   22948              :       return false;
   22949              : 
   22950       570157 :     case XOR:
   22951       570157 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22952       436954 :           || SSE_FLOAT_MODE_P (mode))
   22953       133203 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22954       933524 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22955        16507 :         *total = cost->add * 2;
   22956              :       else
   22957       420447 :         *total = cost->add;
   22958              :       return false;
   22959              : 
   22960      7052716 :     case AND:
   22961      7052716 :       if (address_no_seg_operand (x, mode))
   22962              :         {
   22963        15686 :           *total = cost->lea;
   22964        15686 :           return true;
   22965              :         }
   22966      7037030 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22967      6634042 :                || SSE_FLOAT_MODE_P (mode))
   22968              :         {
   22969              :           /* pandn is a single instruction.  */
   22970       436495 :           if (GET_CODE (XEXP (x, 0)) == NOT)
   22971              :             {
   22972        55705 :               rtx right = XEXP (x, 1);
   22973              : 
   22974              :               /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
   22975          362 :               if (GET_CODE (right) == NOT && TARGET_AVX512F
   22976        55705 :                   && (GET_MODE_SIZE (mode) == 64
   22977            0 :                       || (TARGET_AVX512VL
   22978            0 :                           && (GET_MODE_SIZE (mode) == 32
   22979            0 :                               || GET_MODE_SIZE (mode) == 16))))
   22980            0 :                 right = XEXP (right, 0);
   22981              : 
   22982        55705 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22983        55705 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22984              :                                    outer_code, opno, speed)
   22985        55705 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22986        55705 :               return true;
   22987              :             }
   22988       380790 :           else if (GET_CODE (XEXP (x, 1)) == NOT)
   22989              :             {
   22990          740 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22991          740 :                        + rtx_cost (XEXP (x, 0), mode,
   22992              :                                    outer_code, opno, speed)
   22993          740 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22994              :                                    outer_code, opno, speed);
   22995          740 :               return true;
   22996              :             }
   22997       380050 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22998       380050 :         }
   22999     13902262 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23000              :         {
   23001      1131709 :           if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   23002              :             {
   23003         1670 :               *total = cost->add * 2
   23004          835 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23005              :                                    outer_code, opno, speed)
   23006          835 :                        + rtx_cost (XEXP (x, 1), mode,
   23007              :                                    outer_code, opno, speed);
   23008          835 :               return true;
   23009              :             }
   23010      1130874 :           else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
   23011              :             {
   23012            0 :               *total = cost->add * 2
   23013            0 :                        + rtx_cost (XEXP (x, 0), mode,
   23014              :                                    outer_code, opno, speed)
   23015            0 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   23016              :                                    outer_code, opno, speed);
   23017            0 :               return true;
   23018              :             }
   23019      1130874 :           *total = cost->add * 2;
   23020              :         }
   23021      5468826 :       else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   23022              :         {
   23023         7578 :           *total = cost->add
   23024         3789 :                    + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23025              :                                outer_code, opno, speed)
   23026         3789 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23027         3789 :           return true;
   23028              :         }
   23029      5465037 :       else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
   23030              :         {
   23031          112 :           *total = cost->add
   23032           56 :                    + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23033           56 :                    + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   23034              :                                outer_code, opno, speed);
   23035           56 :           return true;
   23036              :         }
   23037              :       else
   23038      5464981 :         *total = cost->add;
   23039              :       return false;
   23040              : 
   23041       516574 :     case NOT:
   23042       516574 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23043              :         {
   23044              :           /* (not (xor ...)) can be a single insn in AVX512.  */
   23045            0 :           if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
   23046        11016 :               && (GET_MODE_SIZE (mode) == 64
   23047            0 :                   || (TARGET_AVX512VL
   23048            0 :                       && (GET_MODE_SIZE (mode) == 32
   23049            0 :                           || GET_MODE_SIZE (mode) == 16))))
   23050              :             {
   23051            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   23052            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23053              :                                    outer_code, opno, speed)
   23054            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   23055              :                                    outer_code, opno, speed);
   23056            0 :               return true;
   23057              :             }
   23058              : 
   23059              :           // vnot is pxor -1.
   23060        11016 :           *total = ix86_vec_cost (mode, cost->sse_op) + 1;
   23061              :         }
   23062      1156999 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23063        45905 :         *total = cost->add * 2;
   23064              :       else
   23065       459653 :         *total = cost->add;
   23066              :       return false;
   23067              : 
   23068     18212345 :     case NEG:
   23069     18212345 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23070        51184 :         *total = cost->sse_op;
   23071     18161161 :       else if (X87_FLOAT_MODE_P (mode))
   23072        15090 :         *total = cost->fchs;
   23073     18146071 :       else if (FLOAT_MODE_P (mode))
   23074        27070 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23075     18119001 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23076     13404576 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23077      9579721 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23078      1765058 :         *total = cost->add * 3;
   23079              :       else
   23080      2949367 :         *total = cost->add;
   23081              :       return false;
   23082              : 
   23083     53976519 :     case COMPARE:
   23084     53976519 :       rtx op0, op1;
   23085     53976519 :       op0 = XEXP (x, 0);
   23086     53976519 :       op1 = XEXP (x, 1);
   23087     53976519 :       if (GET_CODE (op0) == ZERO_EXTRACT
   23088       167335 :           && XEXP (op0, 1) == const1_rtx
   23089       150103 :           && CONST_INT_P (XEXP (op0, 2))
   23090       150067 :           && op1 == const0_rtx)
   23091              :         {
   23092              :           /* This kind of construct is implemented using test[bwl].
   23093              :              Treat it as if we had an AND.  */
   23094       150067 :           mode = GET_MODE (XEXP (op0, 0));
   23095       300134 :           *total = (cost->add
   23096       150067 :                     + rtx_cost (XEXP (op0, 0), mode, outer_code,
   23097              :                                 opno, speed)
   23098       150067 :                     + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
   23099       150067 :           return true;
   23100              :         }
   23101              : 
   23102     53826452 :       if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
   23103              :         {
   23104              :           /* This is an overflow detection, count it as a normal compare.  */
   23105       143162 :           *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
   23106       143162 :           return true;
   23107              :         }
   23108              : 
   23109     53683290 :       rtx geu;
   23110              :       /* Match x
   23111              :          (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   23112              :                       (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))  */
   23113     53683290 :       if (mode == CCCmode
   23114       293077 :           && GET_CODE (op0) == NEG
   23115         7879 :           && GET_CODE (geu = XEXP (op0, 0)) == GEU
   23116         7876 :           && REG_P (XEXP (geu, 0))
   23117         7876 :           && (GET_MODE (XEXP (geu, 0)) == CCCmode
   23118          759 :               || GET_MODE (XEXP (geu, 0)) == CCmode)
   23119         7876 :           && REGNO (XEXP (geu, 0)) == FLAGS_REG
   23120         7876 :           && XEXP (geu, 1) == const0_rtx
   23121         7876 :           && GET_CODE (op1) == LTU
   23122         7876 :           && REG_P (XEXP (op1, 0))
   23123         7876 :           && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   23124         7876 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23125     53691166 :           && XEXP (op1, 1) == const0_rtx)
   23126              :         {
   23127              :           /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop.  */
   23128         7876 :           *total = 0;
   23129         7876 :           return true;
   23130              :         }
   23131              :       /* Match x
   23132              :          (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   23133              :                       (geu:QI (reg:CCC FLAGS_REG) (const_int 0)))  */
   23134     53675414 :       if (mode == CCCmode
   23135       285201 :           && GET_CODE (op0) == NEG
   23136            3 :           && GET_CODE (XEXP (op0, 0)) == LTU
   23137            3 :           && REG_P (XEXP (XEXP (op0, 0), 0))
   23138            3 :           && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   23139            3 :           && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
   23140            3 :           && XEXP (XEXP (op0, 0), 1) == const0_rtx
   23141            3 :           && GET_CODE (op1) == GEU
   23142            3 :           && REG_P (XEXP (op1, 0))
   23143            3 :           && GET_MODE (XEXP (op1, 0)) == CCCmode
   23144            3 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23145     53675417 :           && XEXP (op1, 1) == const0_rtx)
   23146              :         {
   23147              :           /* This is *x86_cmc.  */
   23148            3 :           if (!speed)
   23149            0 :             *total = COSTS_N_BYTES (1);
   23150            3 :           else if (TARGET_SLOW_STC)
   23151            0 :             *total = COSTS_N_INSNS (2);
   23152              :           else
   23153            3 :             *total = COSTS_N_INSNS (1);
   23154            3 :           return true;
   23155              :         }
   23156              : 
   23157     53675411 :       if (SCALAR_INT_MODE_P (GET_MODE (op0))
   23158    111809523 :           && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
   23159              :         {
   23160       757633 :           if (op1 == const0_rtx)
   23161       218208 :             *total = cost->add
   23162       109104 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
   23163              :           else
   23164      1297058 :             *total = 3*cost->add
   23165       648529 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
   23166       648529 :                      + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
   23167       757633 :           return true;
   23168              :         }
   23169              : 
   23170              :       /* The embedded comparison operand is completely free.  */
   23171     52917778 :       if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
   23172       377366 :         *total = 0;
   23173              : 
   23174              :       return false;
   23175              : 
   23176      1369818 :     case FLOAT_EXTEND:
   23177              :       /* x87 represents all values extended to 80bit.  */
   23178      1369818 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23179       665041 :         *total = 0;
   23180              :       else
   23181      1409554 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23182              :       return false;
   23183              : 
   23184        83815 :     case FLOAT_TRUNCATE:
   23185        83815 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23186        58216 :         *total = cost->fadd;
   23187              :       else
   23188        51198 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23189              :       return false;
   23190       683371 :     case FLOAT:
   23191       683371 :     case UNSIGNED_FLOAT:
   23192       683371 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23193              :         /* TODO: We do not have cost tables for x87.  */
   23194        93499 :         *total = cost->fadd;
   23195       589872 :       else if (VECTOR_MODE_P (mode))
   23196            0 :         *total = ix86_vec_cost (mode, cost->cvtpi2ps);
   23197              :       else
   23198       589872 :         *total = cost->cvtsi2ss;
   23199              :       return false;
   23200              : 
   23201       285311 :     case FIX:
   23202       285311 :     case UNSIGNED_FIX:
   23203       285311 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23204              :         /* TODO: We do not have cost tables for x87.  */
   23205       285311 :         *total = cost->fadd;
   23206            0 :       else if (VECTOR_MODE_P (mode))
   23207            0 :         *total = ix86_vec_cost (mode, cost->cvtps2pi);
   23208              :       else
   23209            0 :         *total = cost->cvtss2si;
   23210              :       return false;
   23211              : 
   23212       371416 :     case ABS:
   23213              :       /* SSE requires memory load for the constant operand. It may make
   23214              :          sense to account for this.  Of course the constant operand may or
   23215              :          may not be reused. */
   23216       371416 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23217       264477 :         *total = cost->sse_op;
   23218       106939 :       else if (X87_FLOAT_MODE_P (mode))
   23219        31496 :         *total = cost->fabs;
   23220        75443 :       else if (FLOAT_MODE_P (mode))
   23221        25859 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23222        49584 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23223         6330 :         *total = cost->sse_op;
   23224              :       return false;
   23225              : 
   23226        28721 :     case SQRT:
   23227        28721 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23228        18364 :         *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
   23229        10357 :       else if (X87_FLOAT_MODE_P (mode))
   23230         4315 :         *total = cost->fsqrt;
   23231         6042 :       else if (FLOAT_MODE_P (mode))
   23232         6042 :         *total = ix86_vec_cost (mode,
   23233              :                                 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
   23234              :       return false;
   23235              : 
   23236      3943489 :     case UNSPEC:
   23237      3943489 :       switch (XINT (x, 1))
   23238              :         {
   23239       126039 :         case UNSPEC_TP:
   23240       126039 :           *total = 0;
   23241       126039 :           break;
   23242              : 
   23243         5210 :         case UNSPEC_VTERNLOG:
   23244         5210 :           *total = cost->sse_op;
   23245         5210 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23246          720 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23247         5210 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23248          694 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23249         5210 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23250          733 :             *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
   23251              :           return true;
   23252              : 
   23253        95135 :         case UNSPEC_PTEST:
   23254        95135 :           {
   23255        95135 :             *total = cost->sse_op;
   23256        95135 :             rtx test_op0 = XVECEXP (x, 0, 0);
   23257        95135 :             if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
   23258              :               return false;
   23259        94487 :             if (GET_CODE (test_op0) == AND)
   23260              :               {
   23261           23 :                 rtx and_op0 = XEXP (test_op0, 0);
   23262           23 :                 if (GET_CODE (and_op0) == NOT)
   23263            0 :                   and_op0 = XEXP (and_op0, 0);
   23264           23 :                 *total += rtx_cost (and_op0, GET_MODE (and_op0),
   23265              :                                     AND, 0, speed)
   23266           23 :                           + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
   23267              :                                       AND, 1, speed);
   23268              :              }
   23269              :             else
   23270        94464 :               *total = rtx_cost (test_op0, GET_MODE (test_op0),
   23271              :                                  UNSPEC, 0, speed);
   23272              :           }
   23273              :           return true;
   23274              : 
   23275        20568 :         case UNSPEC_BLENDV:
   23276        20568 :           *total = cost->sse_op;
   23277        20568 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23278         8409 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23279        20568 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23280         9988 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23281        20568 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23282              :             {
   23283        12710 :               rtx cond = XVECEXP (x, 0, 2);
   23284        12710 :               if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
   23285          773 :                   && CONST_VECTOR_P (XEXP (cond, 1)))
   23286              :                 {
   23287              :                   /* avx2_blendvpd256_gt and friends.  */
   23288          153 :                   if (!REG_P (XEXP (cond, 0)))
   23289           70 :                     *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
   23290              :                 }
   23291              :               else
   23292        12557 :                 *total += rtx_cost (cond, mode, code, 2, speed);
   23293              :             }
   23294              :           return true;
   23295              : 
   23296        28353 :         case UNSPEC_MOVMSK:
   23297        28353 :           *total = cost->sse_op;
   23298        28353 :           return true;
   23299              : 
   23300              :         default:
   23301              :           break;
   23302              :         }
   23303              :       return false;
   23304              : 
   23305      2018590 :     case VEC_CONCAT:
   23306              :       /* ??? Assume all of these vector manipulation patterns are
   23307              :          recognizable.  In which case they all pretty much have the
   23308              :          same cost.
   23309              :          ??? We should still recruse when computing cost.  */
   23310      2018590 :      *total = cost->sse_op;
   23311      2018590 :      return true;
   23312              : 
   23313      2428541 :     case VEC_SELECT:
   23314              :      /* Special case extracting lower part from the vector.
   23315              :         This by itself needs to code and most of SSE/AVX instructions have
   23316              :         packed and single forms where the single form may be represented
   23317              :         by such VEC_SELECT.
   23318              : 
   23319              :         Use cost 1 (despite the fact that functionally equivalent SUBREG has
   23320              :         cost 0).  Making VEC_SELECT completely free, for example instructs CSE
   23321              :         to forward propagate VEC_SELECT into
   23322              : 
   23323              :            (set (reg eax) (reg src))
   23324              : 
   23325              :         which then prevents fwprop and combining. See i.e.
   23326              :         gcc.target/i386/pr91103-1.c.
   23327              : 
   23328              :         ??? rtvec_series_p test should be, for valid patterns, equivalent to
   23329              :         vec_series_lowpart_p but is not, since the latter calls
   23330              :         can_cange_mode_class on ALL_REGS and this return false since x87 does
   23331              :         not support subregs at all.  */
   23332      2428541 :      if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
   23333       761356 :        *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   23334       761356 :                           outer_code, opno, speed) + 1;
   23335              :      else
   23336              :        /* ??? We should still recruse when computing cost.  */
   23337      1667185 :        *total = cost->sse_op;
   23338              :      return true;
   23339              : 
   23340      1222314 :     case VEC_DUPLICATE:
   23341      2444628 :       *total = rtx_cost (XEXP (x, 0),
   23342      1222314 :                          GET_MODE (XEXP (x, 0)),
   23343              :                          VEC_DUPLICATE, 0, speed);
   23344              :       /* It's broadcast instruction, not embedded broadcasting.  */
   23345      1222314 :       if (outer_code == SET)
   23346      1174031 :         *total += cost->sse_op;
   23347              : 
   23348              :      return true;
   23349              : 
   23350       722832 :     case VEC_MERGE:
   23351       722832 :       mask = XEXP (x, 2);
   23352              :       /* Scalar versions of SSE instructions may be represented as:
   23353              : 
   23354              :          (vec_merge (vec_duplicate (operation ....))
   23355              :                      (register or memory)
   23356              :                      (const_int 1))
   23357              : 
   23358              :          In this case vec_merge and vec_duplicate is for free.
   23359              :          Just recurse into operation and second operand.  */
   23360       722832 :       if (mask == const1_rtx
   23361       212737 :           && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
   23362              :         {
   23363        75683 :           *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23364              :                              outer_code, opno, speed)
   23365        75683 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23366        75683 :           return true;
   23367              :         }
   23368              :       /* This is masked instruction, assume the same cost,
   23369              :          as nonmasked variant.  */
   23370       647149 :       else if (TARGET_AVX512F
   23371       647149 :                && (register_operand (mask, GET_MODE (mask))
   23372              :                    /* Redunduant clean up of high bits for kmask with VL=2/4
   23373              :                       .i.e (vec_merge op0, op1, (and op3 15)).  */
   23374       120092 :                    || (GET_CODE (mask) == AND
   23375          372 :                        && register_operand (XEXP (mask, 0), GET_MODE (mask))
   23376          372 :                        && CONST_INT_P (XEXP (mask, 1))
   23377          372 :                        && ((INTVAL (XEXP (mask, 1)) == 3
   23378          131 :                             && GET_MODE_NUNITS (mode) == 2)
   23379          241 :                            || (INTVAL (XEXP (mask, 1)) == 15
   23380          241 :                                && GET_MODE_NUNITS (mode) == 4)))))
   23381              :         {
   23382       373858 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23383       373858 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23384       373858 :           return true;
   23385              :         }
   23386              :       /* Combination of the two above:
   23387              : 
   23388              :          (vec_merge (vec_merge (vec_duplicate (operation ...))
   23389              :                        (register or memory)
   23390              :                        (reg:QI mask))
   23391              :                     (register or memory)
   23392              :                     (const_int 1))
   23393              : 
   23394              :          i.e. avx512fp16_vcvtss2sh_mask.  */
   23395       273291 :       else if (TARGET_AVX512F
   23396       119720 :                && mask == const1_rtx
   23397        46523 :                && GET_CODE (XEXP (x, 0)) == VEC_MERGE
   23398        27158 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
   23399       275553 :                && register_operand (XEXP (XEXP (x, 0), 2),
   23400         2262 :                                     GET_MODE (XEXP (XEXP (x, 0), 2))))
   23401              :         {
   23402         2250 :           *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
   23403              :                              mode, outer_code, opno, speed)
   23404         2250 :                    + rtx_cost (XEXP (XEXP (x, 0), 1),
   23405              :                                mode, outer_code, opno, speed)
   23406         2250 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23407         2250 :           return true;
   23408              :         }
   23409              :       /* vcmp.  */
   23410       271041 :       else if (unspec_pcmp_p (mask)
   23411       271041 :                || (GET_CODE (mask) == NOT
   23412            0 :                    && unspec_pcmp_p (XEXP (mask, 0))))
   23413              :         {
   23414         1950 :           rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
   23415         1950 :           rtx unsop0 = XVECEXP (uns, 0, 0);
   23416              :           /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
   23417              :              cost the same as register.
   23418              :              This is used by avx_cmp<mode>3_ltint_not.  */
   23419         1950 :           if (SUBREG_P (unsop0))
   23420          417 :             unsop0 = XEXP (unsop0, 0);
   23421         1950 :           if (GET_CODE (unsop0) == NOT)
   23422           18 :             unsop0 = XEXP (unsop0, 0);
   23423         1950 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23424         1950 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
   23425         1950 :                    + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
   23426         1950 :                    + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
   23427         1950 :                    + cost->sse_op;
   23428         1950 :           return true;
   23429              :         }
   23430              :       else
   23431       269091 :         *total = cost->sse_op;
   23432       269091 :       return false;
   23433              : 
   23434    106906249 :     case MEM:
   23435              :       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
   23436              :          or variants in ix86_vector_duplicate_simode_const.  */
   23437              : 
   23438    106906249 :       if (GET_MODE_SIZE (mode) >= 16
   23439     18132281 :           && VECTOR_MODE_P (mode)
   23440     12149520 :           && SYMBOL_REF_P (XEXP (x, 0))
   23441      2207900 :           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
   23442    108884910 :           && ix86_broadcast_from_constant (mode, x))
   23443              :         {
   23444       489636 :           *total = COSTS_N_INSNS (2) + speed;
   23445       489636 :           return true;
   23446              :         }
   23447              : 
   23448              :       /* An insn that accesses memory is slightly more expensive
   23449              :          than one that does not.  */
   23450    106416613 :       if (speed)
   23451              :         {
   23452     95187471 :           *total += 1;
   23453     95187471 :           rtx addr = XEXP (x, 0);
   23454              :           /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
   23455              :              so for MEM (reg) and MEM (reg + 4), the former costs 5,
   23456              :              the latter costs 9, it is not accurate for x86. Ideally
   23457              :              address_cost should be used, but it reduce cost too much.
   23458              :              So current solution is make constant disp as cheap as possible.  */
   23459     95187471 :           if (GET_CODE (addr) == PLUS
   23460     77648239 :               && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
   23461              :               /* Only handle (reg + disp) since other forms of addr are mostly LEA,
   23462              :                  there's no additional cost for the plus of disp.  */
   23463    167253568 :               && register_operand (XEXP (addr, 0), Pmode))
   23464              :             {
   23465     55981716 :               *total += 1;
   23466     68838212 :               *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
   23467     55981716 :               return true;
   23468              :             }
   23469              :         }
   23470              : 
   23471              :       return false;
   23472              : 
   23473        52521 :     case ZERO_EXTRACT:
   23474        52521 :       if (XEXP (x, 1) == const1_rtx
   23475        11449 :           && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
   23476            0 :           && GET_MODE (XEXP (x, 2)) == SImode
   23477            0 :           && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
   23478              :         {
   23479              :           /* Ignore cost of zero extension and masking of last argument.  */
   23480            0 :           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23481            0 :           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23482            0 :           *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
   23483            0 :           return true;
   23484              :         }
   23485              :       return false;
   23486              : 
   23487     29127102 :     case IF_THEN_ELSE:
   23488     29127102 :       if (TARGET_XOP
   23489        25126 :           && VECTOR_MODE_P (mode)
   23490     29132493 :           && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
   23491              :         {
   23492              :           /* vpcmov.  */
   23493         4823 :           *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
   23494         4823 :           if (!REG_P (XEXP (x, 0)))
   23495         4663 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23496         4823 :           if (!REG_P (XEXP (x, 1)))
   23497         4630 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23498         4823 :           if (!REG_P (XEXP (x, 2)))
   23499         4632 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23500         4823 :           return true;
   23501              :         }
   23502            0 :       else if (TARGET_CMOVE
   23503     29122279 :                && SCALAR_INT_MODE_P (mode)
   23504     31512079 :                && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   23505              :         {
   23506              :           /* cmov.  */
   23507      2194828 :           *total = COSTS_N_INSNS (1);
   23508      2194828 :           if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
   23509            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23510      2194828 :           if (!REG_P (XEXP (x, 1)))
   23511       114188 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23512      2194828 :           if (!REG_P (XEXP (x, 2)))
   23513       705676 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23514      2194828 :           return true;
   23515              :         }
   23516              :       return false;
   23517              : 
   23518     18177926 :     case EQ:
   23519     18177926 :     case GT:
   23520     18177926 :     case GTU:
   23521     18177926 :     case LT:
   23522     18177926 :     case LTU:
   23523     18177926 :       if (TARGET_SSE2
   23524     18174728 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23525     18510841 :           && GET_MODE_SIZE (mode) >= 8)
   23526              :         {
   23527              :           /* vpcmpeq */
   23528       328350 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
   23529       328350 :           if (!REG_P (XEXP (x, 0)))
   23530        62557 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23531       328350 :           if (!REG_P (XEXP (x, 1)))
   23532       125533 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23533       328350 :           return true;
   23534              :         }
   23535     17849576 :       if (TARGET_XOP
   23536        12261 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23537     17849684 :           && GET_MODE_SIZE (mode) <= 16)
   23538              :         {
   23539              :           /* vpcomeq */
   23540          108 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23541          108 :           if (!REG_P (XEXP (x, 0)))
   23542            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23543          108 :           if (!REG_P (XEXP (x, 1)))
   23544            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23545          108 :           return true;
   23546              :         }
   23547              :       return false;
   23548              : 
   23549     16001796 :     case NE:
   23550     16001796 :     case GE:
   23551     16001796 :     case GEU:
   23552     16001796 :       if (TARGET_XOP
   23553        21866 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23554     16008550 :           && GET_MODE_SIZE (mode) <= 16)
   23555              :         {
   23556              :           /* vpcomneq */
   23557         6754 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23558         6754 :           if (!REG_P (XEXP (x, 0)))
   23559         1401 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23560         6754 :           if (!REG_P (XEXP (x, 1)))
   23561         5734 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23562         6754 :           return true;
   23563              :         }
   23564     15995042 :       if (TARGET_SSE2
   23565     15992935 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23566     15996351 :           && GET_MODE_SIZE (mode) >= 8)
   23567              :         {
   23568         1333 :           if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
   23569              :             /* vpcmpeq + vpternlog */
   23570           40 :             *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
   23571              :           else
   23572              :             /* vpcmpeq + pxor + vpcmpeq */
   23573         1267 :             *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
   23574         1285 :           if (!REG_P (XEXP (x, 0)))
   23575           28 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23576         1285 :           if (!REG_P (XEXP (x, 1)))
   23577           28 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23578         1285 :           return true;
   23579              :         }
   23580              :       return false;
   23581              : 
   23582              :     default:
   23583              :       return false;
   23584              :     }
   23585              : }
   23586              : 
   23587              : #if TARGET_MACHO
   23588              : 
   23589              : static int current_machopic_label_num;
   23590              : 
   23591              : /* Given a symbol name and its associated stub, write out the
   23592              :    definition of the stub.  */
   23593              : 
   23594              : void
   23595              : machopic_output_stub (FILE *file, const char *symb, const char *stub)
   23596              : {
   23597              :   unsigned int length;
   23598              :   char *binder_name, *symbol_name, lazy_ptr_name[32];
   23599              :   int label = ++current_machopic_label_num;
   23600              : 
   23601              :   /* For 64-bit we shouldn't get here.  */
   23602              :   gcc_assert (!TARGET_64BIT);
   23603              : 
   23604              :   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
   23605              :   symb = targetm.strip_name_encoding (symb);
   23606              : 
   23607              :   length = strlen (stub);
   23608              :   binder_name = XALLOCAVEC (char, length + 32);
   23609              :   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
   23610              : 
   23611              :   length = strlen (symb);
   23612              :   symbol_name = XALLOCAVEC (char, length + 32);
   23613              :   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
   23614              : 
   23615              :   sprintf (lazy_ptr_name, "L%d$lz", label);
   23616              : 
   23617              :   if (MACHOPIC_ATT_STUB)
   23618              :     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
   23619              :   else if (MACHOPIC_PURE)
   23620              :     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
   23621              :   else
   23622              :     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
   23623              : 
   23624              :   fprintf (file, "%s:\n", stub);
   23625              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23626              : 
   23627              :   if (MACHOPIC_ATT_STUB)
   23628              :     {
   23629              :       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
   23630              :     }
   23631              :   else if (MACHOPIC_PURE)
   23632              :     {
   23633              :       /* PIC stub.  */
   23634              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23635              :       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
   23636              :       output_set_got (tmp, NULL_RTX);   /* "CALL ___<cpu>.get_pc_thunk.cx".  */
   23637              :       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
   23638              :                label, lazy_ptr_name, label);
   23639              :       fprintf (file, "\tjmp\t*%%ecx\n");
   23640              :     }
   23641              :   else
   23642              :     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
   23643              : 
   23644              :   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
   23645              :      it needs no stub-binding-helper.  */
   23646              :   if (MACHOPIC_ATT_STUB)
   23647              :     return;
   23648              : 
   23649              :   fprintf (file, "%s:\n", binder_name);
   23650              : 
   23651              :   if (MACHOPIC_PURE)
   23652              :     {
   23653              :       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
   23654              :       fprintf (file, "\tpushl\t%%ecx\n");
   23655              :     }
   23656              :   else
   23657              :     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
   23658              : 
   23659              :   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
   23660              : 
   23661              :   /* N.B. Keep the correspondence of these
   23662              :      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
   23663              :      old-pic/new-pic/non-pic stubs; altering this will break
   23664              :      compatibility with existing dylibs.  */
   23665              :   if (MACHOPIC_PURE)
   23666              :     {
   23667              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23668              :       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
   23669              :     }
   23670              :   else
   23671              :     /* 16-byte -mdynamic-no-pic stub.  */
   23672              :     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
   23673              : 
   23674              :   fprintf (file, "%s:\n", lazy_ptr_name);
   23675              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23676              :   fprintf (file, ASM_LONG "%s\n", binder_name);
   23677              : }
   23678              : #endif /* TARGET_MACHO */
   23679              : 
   23680              : /* Order the registers for register allocator.  */
   23681              : 
   23682              : void
   23683       217148 : x86_order_regs_for_local_alloc (void)
   23684              : {
   23685       217148 :    int pos = 0;
   23686       217148 :    int i;
   23687              : 
   23688              :    /* First allocate the local general purpose registers.  */
   23689     20194764 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23690     26926352 :      if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
   23691      5654824 :         reg_alloc_order [pos++] = i;
   23692              : 
   23693              :    /* Global general purpose registers.  */
   23694     20194764 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23695     23191000 :      if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
   23696      1293912 :         reg_alloc_order [pos++] = i;
   23697              : 
   23698              :    /* x87 registers come first in case we are doing FP math
   23699              :       using them.  */
   23700       217148 :    if (!TARGET_SSE_MATH)
   23701        57663 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23702        51256 :        reg_alloc_order [pos++] = i;
   23703              : 
   23704              :    /* SSE registers.  */
   23705      1954332 :    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
   23706      1737184 :      reg_alloc_order [pos++] = i;
   23707      1954332 :    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
   23708      1737184 :      reg_alloc_order [pos++] = i;
   23709              : 
   23710              :    /* Extended REX SSE registers.  */
   23711      3691516 :    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
   23712      3474368 :      reg_alloc_order [pos++] = i;
   23713              : 
   23714              :    /* Mask register.  */
   23715      1954332 :    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
   23716      1737184 :      reg_alloc_order [pos++] = i;
   23717              : 
   23718              :    /* x87 registers.  */
   23719       217148 :    if (TARGET_SSE_MATH)
   23720      1896669 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23721      1685928 :        reg_alloc_order [pos++] = i;
   23722              : 
   23723      1954332 :    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
   23724      1737184 :      reg_alloc_order [pos++] = i;
   23725              : 
   23726              :    /* Initialize the rest of array as we do not allocate some registers
   23727              :       at all.  */
   23728      1085740 :    while (pos < FIRST_PSEUDO_REGISTER)
   23729       868592 :      reg_alloc_order [pos++] = 0;
   23730       217148 : }
   23731              : 
   23732              : static bool
   23733    245481131 : ix86_ms_bitfield_layout_p (const_tree record_type)
   23734              : {
   23735    245481131 :   return ((TARGET_MS_BITFIELD_LAYOUT
   23736          215 :            && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
   23737    245481131 :           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
   23738              : }
   23739              : 
   23740              : /* Returns an expression indicating where the this parameter is
   23741              :    located on entry to the FUNCTION.  */
   23742              : 
   23743              : static rtx
   23744         1761 : x86_this_parameter (tree function)
   23745              : {
   23746         1761 :   tree type = TREE_TYPE (function);
   23747         1761 :   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
   23748         1761 :   int nregs;
   23749              : 
   23750         1761 :   if (TARGET_64BIT)
   23751              :     {
   23752         1759 :       const int *parm_regs;
   23753              : 
   23754         1759 :       if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
   23755              :         parm_regs = x86_64_preserve_none_int_parameter_registers;
   23756         1759 :       else if (ix86_function_type_abi (type) == MS_ABI)
   23757              :         parm_regs = x86_64_ms_abi_int_parameter_registers;
   23758              :       else
   23759         1759 :         parm_regs = x86_64_int_parameter_registers;
   23760         1759 :       return gen_rtx_REG (Pmode, parm_regs[aggr]);
   23761              :     }
   23762              : 
   23763            2 :   nregs = ix86_function_regparm (type, function);
   23764              : 
   23765            2 :   if (nregs > 0 && !stdarg_p (type))
   23766              :     {
   23767            0 :       int regno;
   23768            0 :       unsigned int ccvt = ix86_get_callcvt (type);
   23769              : 
   23770            0 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23771            0 :         regno = aggr ? DX_REG : CX_REG;
   23772            0 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23773              :         {
   23774            0 :           regno = CX_REG;
   23775            0 :           if (aggr)
   23776            0 :             return gen_rtx_MEM (SImode,
   23777            0 :                                 plus_constant (Pmode, stack_pointer_rtx, 4));
   23778              :         }
   23779              :       else
   23780              :         {
   23781            0 :           regno = AX_REG;
   23782            0 :           if (aggr)
   23783              :             {
   23784            0 :               regno = DX_REG;
   23785            0 :               if (nregs == 1)
   23786            0 :                 return gen_rtx_MEM (SImode,
   23787            0 :                                     plus_constant (Pmode,
   23788              :                                                    stack_pointer_rtx, 4));
   23789              :             }
   23790              :         }
   23791            0 :       return gen_rtx_REG (SImode, regno);
   23792              :     }
   23793              : 
   23794            4 :   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
   23795            4 :                                              aggr ? 8 : 4));
   23796              : }
   23797              : 
   23798              : /* Determine whether x86_output_mi_thunk can succeed.  */
   23799              : 
   23800              : static bool
   23801         4907 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
   23802              :                          const_tree function)
   23803              : {
   23804              :   /* 64-bit can handle anything.  */
   23805         4907 :   if (TARGET_64BIT)
   23806              :     return true;
   23807              : 
   23808              :   /* For 32-bit, everything's fine if we have one free register.  */
   23809           76 :   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
   23810              :     return true;
   23811              : 
   23812              :   /* Need a free register for vcall_offset.  */
   23813            0 :   if (vcall_offset)
   23814              :     return false;
   23815              : 
   23816              :   /* Need a free register for GOT references.  */
   23817            0 :   if (flag_pic && !targetm.binds_local_p (function))
   23818              :     return false;
   23819              : 
   23820              :   /* Otherwise ok.  */
   23821              :   return true;
   23822              : }
   23823              : 
   23824              : /* Output the assembler code for a thunk function.  THUNK_DECL is the
   23825              :    declaration for the thunk function itself, FUNCTION is the decl for
   23826              :    the target function.  DELTA is an immediate constant offset to be
   23827              :    added to THIS.  If VCALL_OFFSET is nonzero, the word at
   23828              :    *(*this + vcall_offset) should be added to THIS.  */
   23829              : 
   23830              : static void
   23831         1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
   23832              :                      HOST_WIDE_INT vcall_offset, tree function)
   23833              : {
   23834         1761 :   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
   23835         1761 :   rtx this_param = x86_this_parameter (function);
   23836         1761 :   rtx this_reg, tmp, fnaddr;
   23837         1761 :   unsigned int tmp_regno;
   23838         1761 :   rtx_insn *insn;
   23839         1761 :   int saved_flag_force_indirect_call = flag_force_indirect_call;
   23840              : 
   23841         1761 :   if (TARGET_64BIT)
   23842              :     tmp_regno = R10_REG;
   23843              :   else
   23844              :     {
   23845            2 :       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
   23846            2 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23847              :         tmp_regno = AX_REG;
   23848            2 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23849              :         tmp_regno = DX_REG;
   23850              :       else
   23851            2 :         tmp_regno = CX_REG;
   23852              : 
   23853            2 :       if (flag_pic)
   23854            2 :   flag_force_indirect_call = 0;
   23855              :     }
   23856              : 
   23857         1761 :   emit_note (NOTE_INSN_PROLOGUE_END);
   23858              : 
   23859              :   /* CET is enabled, insert EB instruction.  */
   23860         1761 :   if ((flag_cf_protection & CF_BRANCH))
   23861           20 :     emit_insn (gen_nop_endbr ());
   23862              : 
   23863              :   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
   23864              :      pull it in now and let DELTA benefit.  */
   23865         1761 :   if (REG_P (this_param))
   23866              :     this_reg = this_param;
   23867            2 :   else if (vcall_offset)
   23868              :     {
   23869              :       /* Put the this parameter into %eax.  */
   23870            2 :       this_reg = gen_rtx_REG (Pmode, AX_REG);
   23871            1 :       emit_move_insn (this_reg, this_param);
   23872              :     }
   23873              :   else
   23874              :     this_reg = NULL_RTX;
   23875              : 
   23876              :   /* Adjust the this parameter by a fixed constant.  */
   23877         1761 :   if (delta)
   23878              :     {
   23879          826 :       rtx delta_rtx = GEN_INT (delta);
   23880          826 :       rtx delta_dst = this_reg ? this_reg : this_param;
   23881              : 
   23882          826 :       if (TARGET_64BIT)
   23883              :         {
   23884          825 :           if (!x86_64_general_operand (delta_rtx, Pmode))
   23885              :             {
   23886            0 :               tmp = gen_rtx_REG (Pmode, tmp_regno);
   23887            0 :               emit_move_insn (tmp, delta_rtx);
   23888            0 :               delta_rtx = tmp;
   23889              :             }
   23890              :         }
   23891              : 
   23892          827 :       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
   23893              :     }
   23894              : 
   23895              :   /* Adjust the this parameter by a value stored in the vtable.  */
   23896         1761 :   if (vcall_offset)
   23897              :     {
   23898          986 :       rtx vcall_addr, vcall_mem, this_mem;
   23899              : 
   23900          987 :       tmp = gen_rtx_REG (Pmode, tmp_regno);
   23901              : 
   23902          986 :       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
   23903          987 :       if (Pmode != ptr_mode)
   23904            0 :         this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
   23905          986 :       emit_move_insn (tmp, this_mem);
   23906              : 
   23907              :       /* Adjust the this parameter.  */
   23908          987 :       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
   23909          986 :       if (TARGET_64BIT
   23910          986 :           && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
   23911              :         {
   23912            0 :           rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
   23913            0 :           emit_move_insn (tmp2, GEN_INT (vcall_offset));
   23914            0 :           vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
   23915              :         }
   23916              : 
   23917          986 :       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
   23918          987 :       if (Pmode != ptr_mode)
   23919            0 :         emit_insn (gen_addsi_1_zext (this_reg,
   23920              :                                      gen_rtx_REG (ptr_mode,
   23921              :                                                   REGNO (this_reg)),
   23922              :                                      vcall_mem));
   23923              :       else
   23924          986 :         ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
   23925              :     }
   23926              : 
   23927              :   /* If necessary, drop THIS back to its stack slot.  */
   23928         1761 :   if (this_reg && this_reg != this_param)
   23929            1 :     emit_move_insn (this_param, this_reg);
   23930              : 
   23931         1761 :   fnaddr = XEXP (DECL_RTL (function), 0);
   23932         1761 :   if (TARGET_64BIT)
   23933              :     {
   23934           25 :       if (!flag_pic || targetm.binds_local_p (function)
   23935         1784 :           || TARGET_PECOFF)
   23936              :         ;
   23937              :       else
   23938              :         {
   23939            0 :           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
   23940            0 :           tmp = gen_rtx_CONST (Pmode, tmp);
   23941            0 :           fnaddr = gen_const_mem (Pmode, tmp);
   23942              :         }
   23943              :     }
   23944              :   else
   23945              :     {
   23946            2 :       if (!flag_pic || targetm.binds_local_p (function))
   23947              :         ;
   23948              : #if TARGET_MACHO
   23949              :       else if (TARGET_MACHO)
   23950              :         {
   23951              :           fnaddr = machopic_indirect_call_target (DECL_RTL (function));
   23952              :           fnaddr = XEXP (fnaddr, 0);
   23953              :         }
   23954              : #endif /* TARGET_MACHO */
   23955              :       else
   23956              :         {
   23957            0 :           tmp = gen_rtx_REG (Pmode, CX_REG);
   23958            0 :           output_set_got (tmp, NULL_RTX);
   23959              : 
   23960            0 :           fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
   23961            0 :           fnaddr = gen_rtx_CONST (Pmode, fnaddr);
   23962            0 :           fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
   23963            0 :           fnaddr = gen_const_mem (Pmode, fnaddr);
   23964              :         }
   23965              :     }
   23966              : 
   23967              :   /* Our sibling call patterns do not allow memories, because we have no
   23968              :      predicate that can distinguish between frame and non-frame memory.
   23969              :      For our purposes here, we can get away with (ab)using a jump pattern,
   23970              :      because we're going to do no optimization.  */
   23971         1761 :   if (MEM_P (fnaddr))
   23972              :     {
   23973            0 :       if (sibcall_insn_operand (fnaddr, word_mode))
   23974              :         {
   23975            0 :           fnaddr = XEXP (DECL_RTL (function), 0);
   23976            0 :           tmp = gen_rtx_MEM (QImode, fnaddr);
   23977            0 :           tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23978            0 :           tmp = emit_call_insn (tmp);
   23979            0 :           SIBLING_CALL_P (tmp) = 1;
   23980              :         }
   23981              :       else
   23982            0 :         emit_jump_insn (gen_indirect_jump (fnaddr));
   23983              :     }
   23984              :   else
   23985              :     {
   23986         1761 :       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
   23987              :         {
   23988              :           // CM_LARGE_PIC always uses pseudo PIC register which is
   23989              :           // uninitialized.  Since FUNCTION is local and calling it
   23990              :           // doesn't go through PLT, we use scratch register %r11 as
   23991              :           // PIC register and initialize it here.
   23992            3 :           pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
   23993            3 :           ix86_init_large_pic_reg (tmp_regno);
   23994            3 :           fnaddr = legitimize_pic_address (fnaddr,
   23995            3 :                                            gen_rtx_REG (Pmode, tmp_regno));
   23996              :         }
   23997              : 
   23998         1761 :       if (!sibcall_insn_operand (fnaddr, word_mode))
   23999              :         {
   24000            9 :           tmp = gen_rtx_REG (word_mode, tmp_regno);
   24001            9 :           if (GET_MODE (fnaddr) != word_mode)
   24002            0 :             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
   24003            9 :           emit_move_insn (tmp, fnaddr);
   24004            9 :           fnaddr = tmp;
   24005              :         }
   24006              : 
   24007         1761 :       tmp = gen_rtx_MEM (QImode, fnaddr);
   24008         1761 :       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   24009         1761 :       tmp = emit_call_insn (tmp);
   24010         1761 :       SIBLING_CALL_P (tmp) = 1;
   24011              :     }
   24012         1761 :   emit_barrier ();
   24013              : 
   24014              :   /* Emit just enough of rest_of_compilation to get the insns emitted.  */
   24015         1761 :   insn = get_insns ();
   24016         1761 :   shorten_branches (insn);
   24017         1761 :   assemble_start_function (thunk_fndecl, fnname);
   24018         1761 :   final_start_function (insn, file, 1);
   24019         1761 :   final (insn, file, 1);
   24020         1761 :   final_end_function ();
   24021         1761 :   assemble_end_function (thunk_fndecl, fnname);
   24022              : 
   24023         1761 :   flag_force_indirect_call = saved_flag_force_indirect_call;
   24024         1761 : }
   24025              : 
   24026              : static void
   24027       273142 : x86_file_start (void)
   24028              : {
   24029       273142 :   default_file_start ();
   24030       273142 :   if (TARGET_16BIT)
   24031            6 :     fputs ("\t.code16gcc\n", asm_out_file);
   24032              : #if TARGET_MACHO
   24033              :   darwin_file_start ();
   24034              : #endif
   24035       273142 :   if (X86_FILE_START_VERSION_DIRECTIVE)
   24036              :     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
   24037       273142 :   if (X86_FILE_START_FLTUSED)
   24038              :     fputs ("\t.global\t__fltused\n", asm_out_file);
   24039       273142 :   if (ix86_asm_dialect == ASM_INTEL)
   24040           68 :     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
   24041       273142 : }
   24042              : 
   24043              : int
   24044    102384857 : x86_field_alignment (tree type, int computed)
   24045              : {
   24046    102384857 :   machine_mode mode;
   24047              : 
   24048    102384857 :   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
   24049              :     return computed;
   24050      9121225 :   if (TARGET_IAMCU)
   24051            0 :     return iamcu_alignment (type, computed);
   24052      9121225 :   type = strip_array_types (type);
   24053      9121225 :   mode = TYPE_MODE (type);
   24054      9121225 :   if (mode == DFmode || mode == DCmode
   24055      9015376 :       || GET_MODE_CLASS (mode) == MODE_INT
   24056      3016898 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
   24057              :     {
   24058      6104327 :       if (TYPE_ATOMIC (type) && computed > 32)
   24059              :         {
   24060            0 :           static bool warned;
   24061              : 
   24062            0 :           if (!warned && warn_psabi)
   24063              :             {
   24064            0 :               const char *url
   24065              :                 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
   24066              : 
   24067            0 :               warned = true;
   24068            0 :               inform (input_location, "the alignment of %<_Atomic %T%> "
   24069              :                                       "fields changed in %{GCC 11.1%}",
   24070            0 :                       TYPE_MAIN_VARIANT (type), url);
   24071              :             }
   24072              :         }
   24073              :       else
   24074      6104327 :       return MIN (32, computed);
   24075              :     }
   24076              :   return computed;
   24077              : }
   24078              : 
   24079              : /* Print call to TARGET to FILE.  */
   24080              : 
   24081              : static void
   24082          295 : x86_print_call_or_nop (FILE *file, const char *target,
   24083              :                        const char *label)
   24084              : {
   24085          295 :   if (flag_nop_mcount || !strcmp (target, "nop"))
   24086              :     {
   24087            9 :       if (TARGET_16BIT)
   24088              :         /* 3 byte no-op: lea 0(%si), %si */
   24089            1 :         fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
   24090              :       else
   24091              :         /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
   24092            8 :         fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
   24093              :                  label);
   24094              :     }
   24095          286 :   else if (!TARGET_PECOFF && flag_pic)
   24096              :     {
   24097            8 :       gcc_assert (flag_plt);
   24098              : 
   24099            8 :       fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
   24100              :     }
   24101              :   else
   24102          278 :     fprintf (file, "%s\tcall\t%s\n", label, target);
   24103          295 : }
   24104              : 
   24105              : static bool
   24106          315 : current_fentry_name (const char **name)
   24107              : {
   24108          315 :   tree attr = lookup_attribute ("fentry_name",
   24109          315 :                                 DECL_ATTRIBUTES (current_function_decl));
   24110          315 :   if (!attr)
   24111              :     return false;
   24112            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24113            2 :   return true;
   24114              : }
   24115              : 
   24116              : static bool
   24117           16 : current_fentry_section (const char **name)
   24118              : {
   24119           16 :   tree attr = lookup_attribute ("fentry_section",
   24120           16 :                                 DECL_ATTRIBUTES (current_function_decl));
   24121           16 :   if (!attr)
   24122              :     return false;
   24123            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24124            2 :   return true;
   24125              : }
   24126              : 
   24127              : /* Return a caller-saved register which isn't live or a callee-saved
   24128              :    register which has been saved on stack in the prologue at entry for
   24129              :    profile.  */
   24130              : 
   24131              : static int
   24132           17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
   24133              : {
   24134              :   /* Use %r10 if the profiler is emitted before the prologue or it isn't
   24135              :      used by DRAP.  */
   24136           17 :   if (ix86_profile_before_prologue ()
   24137            4 :       || !crtl->drap_reg
   24138           17 :       || REGNO (crtl->drap_reg) != R10_REG)
   24139              :     return R10_REG;
   24140              : 
   24141              :   /* The profiler is emitted after the prologue.  If there is a
   24142              :      caller-saved register which isn't live or a callee-saved
   24143              :      register saved on stack in the prologue, use it.  */
   24144              : 
   24145            0 :   bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   24146              : 
   24147            0 :   int i;
   24148            0 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   24149            0 :     if (GENERAL_REGNO_P (i)
   24150            0 :         && i != R10_REG
   24151              : #ifdef NO_PROFILE_COUNTERS
   24152            0 :         && (r11_ok || i != R11_REG)
   24153              : #else
   24154              :         && i != R11_REG
   24155              : #endif
   24156            0 :         && TEST_HARD_REG_BIT (accessible_reg_set, i)
   24157            0 :         && (ix86_save_reg (i, true, true)
   24158            0 :             || (call_used_regs[i]
   24159            0 :                 && !fixed_regs[i]
   24160            0 :                 && !REGNO_REG_SET_P (reg_live, i))))
   24161            0 :       return i;
   24162              : 
   24163            0 :   sorry ("no register available for profiling %<-mcmodel=large%s%>",
   24164            0 :          ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
   24165              : 
   24166            0 :   return R10_REG;
   24167              : }
   24168              : 
   24169              : /* Output assembler code to FILE to increment profiler label # LABELNO
   24170              :    for profiling a function entry.  */
   24171              : void
   24172          315 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
   24173              : {
   24174          315 :   if (cfun->machine->insn_queued_at_entrance)
   24175              :     {
   24176            7 :       if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
   24177            6 :         fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
   24178            7 :       unsigned int patch_area_size
   24179            7 :         = crtl->patch_area_size - crtl->patch_area_entry;
   24180            7 :       if (patch_area_size)
   24181            2 :         ix86_output_patchable_area (patch_area_size,
   24182              :                                     crtl->patch_area_entry == 0);
   24183              :     }
   24184              : 
   24185          315 :   const char *mcount_name = MCOUNT_NAME;
   24186              : 
   24187          315 :   bool fentry_section_p
   24188          315 :     = (flag_record_mcount
   24189          615 :        || lookup_attribute ("fentry_section",
   24190          300 :                             DECL_ATTRIBUTES (current_function_decl)));
   24191              : 
   24192              :   const char *label = fentry_section_p ? "1:" : "";
   24193              : 
   24194          315 :   if (current_fentry_name (&mcount_name))
   24195              :     ;
   24196          313 :   else if (fentry_name)
   24197            1 :     mcount_name = fentry_name;
   24198          312 :   else if (flag_fentry)
   24199          300 :     mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
   24200              : 
   24201          315 :   if (TARGET_64BIT)
   24202              :     {
   24203              : #ifndef NO_PROFILE_COUNTERS
   24204              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24205              :         fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
   24206              :       else
   24207              :         fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
   24208              : #endif
   24209              : 
   24210          314 :       int scratch;
   24211          314 :       const char *reg;
   24212          314 :       char legacy_reg[4] = { 0 };
   24213              : 
   24214          314 :       if (!TARGET_PECOFF)
   24215              :         {
   24216          314 :           switch (ix86_cmodel)
   24217              :             {
   24218            7 :             case CM_LARGE:
   24219            7 :               scratch = x86_64_select_profile_regnum (true);
   24220            7 :               reg = hi_reg_name[scratch];
   24221            7 :               if (LEGACY_INT_REGNO_P (scratch))
   24222              :                 {
   24223            0 :                   legacy_reg[0] = 'r';
   24224            0 :                   legacy_reg[1] = reg[0];
   24225            0 :                   legacy_reg[2] = reg[1];
   24226            0 :                   reg = legacy_reg;
   24227              :                 }
   24228            7 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24229            1 :                 fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
   24230              :                                "\tcall\t%s\n", label, reg, mcount_name,
   24231              :                                reg);
   24232              :               else
   24233            6 :                 fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
   24234              :                          label, mcount_name, reg, reg);
   24235              :               break;
   24236           10 :             case CM_LARGE_PIC:
   24237              : #ifdef NO_PROFILE_COUNTERS
   24238           10 :               scratch = x86_64_select_profile_regnum (false);
   24239           10 :               reg = hi_reg_name[scratch];
   24240           10 :               if (LEGACY_INT_REGNO_P (scratch))
   24241              :                 {
   24242            0 :                   legacy_reg[0] = 'r';
   24243            0 :                   legacy_reg[1] = reg[0];
   24244            0 :                   legacy_reg[2] = reg[1];
   24245            0 :                   reg = legacy_reg;
   24246              :                 }
   24247           10 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24248              :                 {
   24249            1 :                   fprintf (file, "1:movabs\tr11, "
   24250              :                                  "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
   24251            1 :                   fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
   24252            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24253            1 :                   fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
   24254              :                            mcount_name);
   24255            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24256            1 :                   fprintf (file, "\tcall\t%s\n", reg);
   24257            1 :                   break;
   24258              :                 }
   24259            9 :               fprintf (file,
   24260              :                        "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
   24261            9 :               fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
   24262            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24263            9 :               fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
   24264            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24265            9 :               fprintf (file, "\tcall\t*%%%s\n", reg);
   24266              : #else
   24267              :               sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
   24268              : #endif
   24269            9 :               break;
   24270           12 :             case CM_SMALL_PIC:
   24271           12 :             case CM_MEDIUM_PIC:
   24272           12 :               if (!flag_plt)
   24273              :                 {
   24274            3 :                   if (ASSEMBLER_DIALECT == ASM_INTEL)
   24275            0 :                     fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
   24276              :                              label, mcount_name);
   24277              :                   else
   24278            3 :                     fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
   24279              :                              label, mcount_name);
   24280              :                   break;
   24281              :                 }
   24282              :               /* fall through */
   24283          294 :             default:
   24284          294 :               x86_print_call_or_nop (file, mcount_name, label);
   24285          294 :               break;
   24286              :             }
   24287              :         }
   24288              :       else
   24289              :         x86_print_call_or_nop (file, mcount_name, label);
   24290              :     }
   24291            1 :   else if (flag_pic)
   24292              :     {
   24293              : #ifndef NO_PROFILE_COUNTERS
   24294              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24295              :         fprintf (file,
   24296              :                  "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
   24297              :                  LPREFIX, labelno);
   24298              :       else
   24299              :         fprintf (file,
   24300              :                  "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
   24301              :                  LPREFIX, labelno);
   24302              : #endif
   24303            0 :       if (flag_plt)
   24304            0 :         x86_print_call_or_nop (file, mcount_name, label);
   24305            0 :       else if (ASSEMBLER_DIALECT == ASM_INTEL)
   24306            0 :         fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
   24307              :                  label, mcount_name);
   24308              :       else
   24309            0 :         fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
   24310              :                  label, mcount_name);
   24311              :     }
   24312              :   else
   24313              :     {
   24314              : #ifndef NO_PROFILE_COUNTERS
   24315              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24316              :         fprintf (file,
   24317              :                  "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
   24318              :                  LPREFIX, labelno);
   24319              :       else
   24320              :         fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
   24321              :                  LPREFIX, labelno);
   24322              : #endif
   24323            1 :       x86_print_call_or_nop (file, mcount_name, label);
   24324              :     }
   24325              : 
   24326          315 :   if (fentry_section_p)
   24327              :     {
   24328           16 :       const char *sname = "__mcount_loc";
   24329              : 
   24330           16 :       if (current_fentry_section (&sname))
   24331              :         ;
   24332           14 :       else if (fentry_section)
   24333            1 :         sname = fentry_section;
   24334              : 
   24335           16 :       fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
   24336           16 :       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   24337           16 :       fprintf (file, "\t.previous\n");
   24338              :     }
   24339          315 : }
   24340              : 
   24341              : /* We don't have exact information about the insn sizes, but we may assume
   24342              :    quite safely that we are informed about all 1 byte insns and memory
   24343              :    address sizes.  This is enough to eliminate unnecessary padding in
   24344              :    99% of cases.  */
   24345              : 
   24346              : int
   24347    384213981 : ix86_min_insn_size (rtx_insn *insn)
   24348              : {
   24349    384213981 :   int l = 0, len;
   24350              : 
   24351    384213981 :   if (!INSN_P (insn) || !active_insn_p (insn))
   24352       500384 :     return 0;
   24353              : 
   24354              :   /* Discard alignments we've emit and jump instructions.  */
   24355    383713597 :   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   24356    383713597 :       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
   24357              :     return 0;
   24358              : 
   24359              :   /* Important case - calls are always 5 bytes.
   24360              :      It is common to have many calls in the row.  */
   24361    383713591 :   if (CALL_P (insn)
   24362      9151193 :       && symbolic_reference_mentioned_p (PATTERN (insn))
   24363    392534796 :       && !SIBLING_CALL_P (insn))
   24364              :     return 5;
   24365    375129579 :   len = get_attr_length (insn);
   24366    375129579 :   if (len <= 1)
   24367              :     return 1;
   24368              : 
   24369              :   /* For normal instructions we rely on get_attr_length being exact,
   24370              :      with a few exceptions.  */
   24371    366506349 :   if (!JUMP_P (insn))
   24372              :     {
   24373    361175379 :       enum attr_type type = get_attr_type (insn);
   24374              : 
   24375    361175379 :       switch (type)
   24376              :         {
   24377        95376 :         case TYPE_MULTI:
   24378        95376 :           if (GET_CODE (PATTERN (insn)) == ASM_INPUT
   24379        95376 :               || asm_noperands (PATTERN (insn)) >= 0)
   24380          527 :             return 0;
   24381              :           break;
   24382              :         case TYPE_OTHER:
   24383              :         case TYPE_FCMP:
   24384              :           break;
   24385              :         default:
   24386              :           /* Otherwise trust get_attr_length.  */
   24387              :           return len;
   24388              :         }
   24389              : 
   24390       474423 :       l = get_attr_length_address (insn);
   24391       474423 :       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
   24392              :         l = 4;
   24393              :     }
   24394       383933 :   if (l)
   24395        90490 :     return 1+l;
   24396              :   else
   24397      5714903 :     return 2;
   24398              : }
   24399              : 
   24400              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24401              : 
   24402              : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
   24403              :    window.  */
   24404              : 
   24405              : static void
   24406        45424 : ix86_avoid_jump_mispredicts (void)
   24407              : {
   24408        45424 :   rtx_insn *insn, *start = get_insns ();
   24409        45424 :   int nbytes = 0, njumps = 0;
   24410        45424 :   bool isjump = false;
   24411              : 
   24412              :   /* Look for all minimal intervals of instructions containing 4 jumps.
   24413              :      The intervals are bounded by START and INSN.  NBYTES is the total
   24414              :      size of instructions in the interval including INSN and not including
   24415              :      START.  When the NBYTES is smaller than 16 bytes, it is possible
   24416              :      that the end of START and INSN ends up in the same 16byte page.
   24417              : 
   24418              :      The smallest offset in the page INSN can start is the case where START
   24419              :      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
   24420              :      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
   24421              : 
   24422              :      Don't consider asm goto as jump, while it can contain a jump, it doesn't
   24423              :      have to, control transfer to label(s) can be performed through other
   24424              :      means, and also we estimate minimum length of all asm stmts as 0.  */
   24425       700820 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24426              :     {
   24427       655396 :       int min_size;
   24428              : 
   24429       655396 :       if (LABEL_P (insn))
   24430              :         {
   24431          961 :           align_flags alignment = label_to_alignment (insn);
   24432          961 :           int align = alignment.levels[0].log;
   24433          961 :           int max_skip = alignment.levels[0].maxskip;
   24434              : 
   24435          961 :           if (max_skip > 15)
   24436              :             max_skip = 15;
   24437              :           /* If align > 3, only up to 16 - max_skip - 1 bytes can be
   24438              :              already in the current 16 byte page, because otherwise
   24439              :              ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
   24440              :              bytes to reach 16 byte boundary.  */
   24441          961 :           if (align <= 0
   24442          328 :               || (align <= 3 && max_skip != (1 << align) - 1))
   24443          961 :             max_skip = 0;
   24444          961 :           if (dump_file)
   24445            0 :             fprintf (dump_file, "Label %i with max_skip %i\n",
   24446            0 :                      INSN_UID (insn), max_skip);
   24447          961 :           if (max_skip)
   24448              :             {
   24449         6278 :               while (nbytes + max_skip >= 16)
   24450              :                 {
   24451         5950 :                   start = NEXT_INSN (start);
   24452          310 :                   if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24453         5967 :                       || CALL_P (start))
   24454          350 :                     njumps--, isjump = true;
   24455              :                   else
   24456              :                     isjump = false;
   24457         5950 :                   nbytes -= ix86_min_insn_size (start);
   24458              :                 }
   24459              :             }
   24460          961 :           continue;
   24461          961 :         }
   24462              : 
   24463       654435 :       min_size = ix86_min_insn_size (insn);
   24464       654435 :       nbytes += min_size;
   24465       654435 :       if (dump_file)
   24466            0 :         fprintf (dump_file, "Insn %i estimated to %i bytes\n",
   24467            0 :                  INSN_UID (insn), min_size);
   24468        46586 :       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
   24469       654455 :           || CALL_P (insn))
   24470        47601 :         njumps++;
   24471              :       else
   24472       606834 :         continue;
   24473              : 
   24474        55999 :       while (njumps > 3)
   24475              :         {
   24476         8398 :           start = NEXT_INSN (start);
   24477          549 :           if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24478         8398 :               || CALL_P (start))
   24479         1251 :             njumps--, isjump = true;
   24480              :           else
   24481              :             isjump = false;
   24482         8398 :           nbytes -= ix86_min_insn_size (start);
   24483              :         }
   24484        47601 :       gcc_assert (njumps >= 0);
   24485        47601 :       if (dump_file)
   24486            0 :         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
   24487            0 :                  INSN_UID (start), INSN_UID (insn), nbytes);
   24488              : 
   24489        47601 :       if (njumps == 3 && isjump && nbytes < 16)
   24490              :         {
   24491           40 :           int padsize = 15 - nbytes + ix86_min_insn_size (insn);
   24492              : 
   24493           40 :           if (dump_file)
   24494            0 :             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
   24495            0 :                      INSN_UID (insn), padsize);
   24496           40 :           emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
   24497              :         }
   24498              :     }
   24499        45424 : }
   24500              : #endif
   24501              : 
   24502              : /* AMD Athlon works faster
   24503              :    when RET is not destination of conditional jump or directly preceded
   24504              :    by other jump instruction.  We avoid the penalty by inserting NOP just
   24505              :    before the RET instructions in such cases.  */
   24506              : static void
   24507        45144 : ix86_pad_returns (void)
   24508              : {
   24509        45144 :   edge e;
   24510        45144 :   edge_iterator ei;
   24511              : 
   24512        90312 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24513              :     {
   24514        45168 :       basic_block bb = e->src;
   24515        45168 :       rtx_insn *ret = BB_END (bb);
   24516        45168 :       rtx_insn *prev;
   24517        45168 :       bool replace = false;
   24518              : 
   24519        45158 :       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
   24520        90326 :           || optimize_bb_for_size_p (bb))
   24521           23 :         continue;
   24522       179724 :       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
   24523       134161 :         if (active_insn_p (prev) || LABEL_P (prev))
   24524              :           break;
   24525        45145 :       if (prev && LABEL_P (prev))
   24526              :         {
   24527           43 :           edge e;
   24528           43 :           edge_iterator ei;
   24529              : 
   24530           56 :           FOR_EACH_EDGE (e, ei, bb->preds)
   24531          146 :             if (EDGE_FREQUENCY (e) && e->src->index >= 0
   24532           97 :                 && !(e->flags & EDGE_FALLTHRU))
   24533              :               {
   24534              :                 replace = true;
   24535              :                 break;
   24536              :               }
   24537              :         }
   24538           43 :       if (!replace)
   24539              :         {
   24540        45109 :           prev = prev_active_insn (ret);
   24541        45109 :           if (prev
   24542        45109 :               && ((JUMP_P (prev) && any_condjump_p (prev))
   24543        44673 :                   || CALL_P (prev)))
   24544              :             replace = true;
   24545              :           /* Empty functions get branch mispredict even when
   24546              :              the jump destination is not visible to us.  */
   24547        45109 :           if (!prev && !optimize_function_for_size_p (cfun))
   24548              :             replace = true;
   24549              :         }
   24550        44691 :       if (replace)
   24551              :         {
   24552          489 :           emit_jump_insn_before (gen_simple_return_internal_long (), ret);
   24553          489 :           delete_insn (ret);
   24554              :         }
   24555              :     }
   24556        45144 : }
   24557              : 
   24558              : /* Count the minimum number of instructions in BB.  Return 4 if the
   24559              :    number of instructions >= 4.  */
   24560              : 
   24561              : static int
   24562           42 : ix86_count_insn_bb (basic_block bb)
   24563              : {
   24564           42 :   rtx_insn *insn;
   24565           42 :   int insn_count = 0;
   24566              : 
   24567              :   /* Count number of instructions in this block.  Return 4 if the number
   24568              :      of instructions >= 4.  */
   24569          297 :   FOR_BB_INSNS (bb, insn)
   24570              :     {
   24571              :       /* Only happen in exit blocks.  */
   24572          291 :       if (JUMP_P (insn)
   24573          291 :           && ANY_RETURN_P (PATTERN (insn)))
   24574              :         break;
   24575              : 
   24576          267 :       if (NONDEBUG_INSN_P (insn)
   24577          102 :           && GET_CODE (PATTERN (insn)) != USE
   24578          351 :           && GET_CODE (PATTERN (insn)) != CLOBBER)
   24579              :         {
   24580           84 :           insn_count++;
   24581           84 :           if (insn_count >= 4)
   24582              :             return insn_count;
   24583              :         }
   24584              :     }
   24585              : 
   24586              :   return insn_count;
   24587              : }
   24588              : 
   24589              : 
   24590              : /* Count the minimum number of instructions in code path in BB.
   24591              :    Return 4 if the number of instructions >= 4.  */
   24592              : 
   24593              : static int
   24594           62 : ix86_count_insn (basic_block bb)
   24595              : {
   24596           62 :   edge e;
   24597           62 :   edge_iterator ei;
   24598           62 :   int min_prev_count;
   24599              : 
   24600              :   /* Only bother counting instructions along paths with no
   24601              :      more than 2 basic blocks between entry and exit.  Given
   24602              :      that BB has an edge to exit, determine if a predecessor
   24603              :      of BB has an edge from entry.  If so, compute the number
   24604              :      of instructions in the predecessor block.  If there
   24605              :      happen to be multiple such blocks, compute the minimum.  */
   24606           62 :   min_prev_count = 4;
   24607          145 :   FOR_EACH_EDGE (e, ei, bb->preds)
   24608              :     {
   24609          109 :       edge prev_e;
   24610          109 :       edge_iterator prev_ei;
   24611              : 
   24612          109 :       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24613              :         {
   24614           26 :           min_prev_count = 0;
   24615           26 :           break;
   24616              :         }
   24617          182 :       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
   24618              :         {
   24619          109 :           if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24620              :             {
   24621           10 :               int count = ix86_count_insn_bb (e->src);
   24622           10 :               if (count < min_prev_count)
   24623           83 :                 min_prev_count = count;
   24624              :               break;
   24625              :             }
   24626              :         }
   24627              :     }
   24628              : 
   24629           62 :   if (min_prev_count < 4)
   24630           32 :     min_prev_count += ix86_count_insn_bb (bb);
   24631              : 
   24632           62 :   return min_prev_count;
   24633              : }
   24634              : 
   24635              : /* Pad short function to 4 instructions.   */
   24636              : 
   24637              : static void
   24638           63 : ix86_pad_short_function (void)
   24639              : {
   24640           63 :   edge e;
   24641           63 :   edge_iterator ei;
   24642              : 
   24643          128 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24644              :     {
   24645           65 :       rtx_insn *ret = BB_END (e->src);
   24646           65 :       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
   24647              :         {
   24648           62 :           int insn_count = ix86_count_insn (e->src);
   24649              : 
   24650              :           /* Pad short function.  */
   24651           62 :           if (insn_count < 4)
   24652              :             {
   24653              :               rtx_insn *insn = ret;
   24654              : 
   24655              :               /* Find epilogue.  */
   24656              :               while (insn
   24657           60 :                      && (!NOTE_P (insn)
   24658           26 :                          || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
   24659           37 :                 insn = PREV_INSN (insn);
   24660              : 
   24661           23 :               if (!insn)
   24662            0 :                 insn = ret;
   24663              : 
   24664              :               /* Two NOPs count as one instruction.  */
   24665           23 :               insn_count = 2 * (4 - insn_count);
   24666           23 :               emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
   24667              :             }
   24668              :         }
   24669              :     }
   24670           63 : }
   24671              : 
   24672              : /* Fix up a Windows system unwinder issue.  If an EH region falls through into
   24673              :    the epilogue, the Windows system unwinder will apply epilogue logic and
   24674              :    produce incorrect offsets.  This can be avoided by adding a nop between
   24675              :    the last insn that can throw and the first insn of the epilogue.  */
   24676              : 
   24677              : static void
   24678            0 : ix86_seh_fixup_eh_fallthru (void)
   24679              : {
   24680            0 :   edge e;
   24681            0 :   edge_iterator ei;
   24682              : 
   24683            0 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24684              :     {
   24685            0 :       rtx_insn *insn, *next;
   24686              : 
   24687              :       /* Find the beginning of the epilogue.  */
   24688            0 :       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
   24689            0 :         if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
   24690              :           break;
   24691            0 :       if (insn == NULL)
   24692            0 :         continue;
   24693              : 
   24694              :       /* We only care about preceding insns that can throw.  */
   24695            0 :       insn = prev_active_insn (insn);
   24696            0 :       if (insn == NULL || !can_throw_internal (insn))
   24697            0 :         continue;
   24698              : 
   24699              :       /* Do not separate calls from their debug information.  */
   24700            0 :       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
   24701            0 :         if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
   24702            0 :           insn = next;
   24703              :         else
   24704              :           break;
   24705              : 
   24706            0 :       emit_insn_after (gen_nops (const1_rtx), insn);
   24707              :     }
   24708            0 : }
   24709              : /* Split vector load from parm_decl to elemental loads to avoid STLF
   24710              :    stalls.  */
   24711              : static void
   24712       978962 : ix86_split_stlf_stall_load ()
   24713              : {
   24714       978962 :   rtx_insn* insn, *start = get_insns ();
   24715       978962 :   unsigned window = 0;
   24716              : 
   24717     26916436 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24718              :     {
   24719     26915586 :       if (!NONDEBUG_INSN_P (insn))
   24720     15266812 :         continue;
   24721     11648774 :       window++;
   24722              :       /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
   24723              :          other, just emulate for pipeline) before stalled load, stlf stall
   24724              :          case is as fast as no stall cases on CLX.
   24725              :          Since CFG is freed before machine_reorg, just do a rough
   24726              :          calculation of the window according to the layout.  */
   24727     11648774 :       if (window > (unsigned) x86_stlf_window_ninsns)
   24728              :         return;
   24729              : 
   24730     11630798 :       if (any_uncondjump_p (insn)
   24731     11595036 :           || ANY_RETURN_P (PATTERN (insn))
   24732     22849311 :           || CALL_P (insn))
   24733              :         return;
   24734              : 
   24735     10670662 :       rtx set = single_set (insn);
   24736     10670662 :       if (!set)
   24737       435241 :         continue;
   24738     10235421 :       rtx src = SET_SRC (set);
   24739     20470490 :       if (!MEM_P (src)
   24740              :           /* Only handle V2DFmode load since it doesn't need any scratch
   24741              :              register.  */
   24742      1462559 :           || GET_MODE (src) != E_V2DFmode
   24743         5462 :           || !MEM_EXPR (src)
   24744     10239370 :           || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
   24745     10235069 :         continue;
   24746              : 
   24747          352 :       rtx zero = CONST0_RTX (V2DFmode);
   24748          352 :       rtx dest = SET_DEST (set);
   24749          352 :       rtx m = adjust_address (src, DFmode, 0);
   24750          352 :       rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
   24751          352 :       emit_insn_before (loadlpd, insn);
   24752          352 :       m = adjust_address (src, DFmode, 8);
   24753          352 :       rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
   24754          352 :       if (dump_file && (dump_flags & TDF_DETAILS))
   24755              :         {
   24756            0 :           fputs ("Due to potential STLF stall, split instruction:\n",
   24757              :                  dump_file);
   24758            0 :           print_rtl_single (dump_file, insn);
   24759            0 :           fputs ("To:\n", dump_file);
   24760            0 :           print_rtl_single (dump_file, loadlpd);
   24761            0 :           print_rtl_single (dump_file, loadhpd);
   24762              :         }
   24763          352 :       PATTERN (insn) = loadhpd;
   24764          352 :       INSN_CODE (insn) = -1;
   24765          352 :       gcc_assert (recog_memoized (insn) != -1);
   24766              :     }
   24767              : }
   24768              : 
   24769              : /* Implement machine specific optimizations.  We implement padding of returns
   24770              :    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
   24771              : static void
   24772      1481484 : ix86_reorg (void)
   24773              : {
   24774              :   /* We are freeing block_for_insn in the toplev to keep compatibility
   24775              :      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   24776      1481484 :   compute_bb_for_insn ();
   24777              : 
   24778      1481484 :   if (TARGET_SEH && current_function_has_exception_handlers ())
   24779              :     ix86_seh_fixup_eh_fallthru ();
   24780              : 
   24781      1481484 :   if (optimize && optimize_function_for_speed_p (cfun))
   24782              :     {
   24783       981264 :       if (TARGET_SSE2)
   24784       978962 :         ix86_split_stlf_stall_load ();
   24785       981264 :       if (TARGET_PAD_SHORT_FUNCTION)
   24786           63 :         ix86_pad_short_function ();
   24787       981201 :       else if (TARGET_PAD_RETURNS)
   24788        45144 :         ix86_pad_returns ();
   24789              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24790       981264 :       if (TARGET_FOUR_JUMP_LIMIT)
   24791        45424 :         ix86_avoid_jump_mispredicts ();
   24792              : #endif
   24793              :     }
   24794      1481484 : }
   24795              : 
   24796              : /* Return nonzero when QImode register that must be represented via REX prefix
   24797              :    is used.  */
   24798              : bool
   24799      8996484 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
   24800              : {
   24801      8996484 :   int i;
   24802      8996484 :   extract_insn_cached (insn);
   24803     34100383 :   for (i = 0; i < recog_data.n_operands; i++)
   24804      4618044 :     if (GENERAL_REG_P (recog_data.operand[i])
   24805     22293411 :         && !QI_REGNO_P (REGNO (recog_data.operand[i])))
   24806              :        return true;
   24807              :   return false;
   24808              : }
   24809              : 
   24810              : /* Return true when INSN mentions register that must be encoded using REX
   24811              :    prefix.  */
   24812              : bool
   24813    196639509 : x86_extended_reg_mentioned_p (rtx insn)
   24814              : {
   24815    196639509 :   subrtx_iterator::array_type array;
   24816   1030255745 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24817              :     {
   24818    881829807 :       const_rtx x = *iter;
   24819    881829807 :       if (REG_P (x)
   24820    881829807 :           && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
   24821    253328702 :               || REX2_INT_REGNO_P (REGNO (x))))
   24822     48213571 :         return true;
   24823              :     }
   24824    148425938 :   return false;
   24825    196639509 : }
   24826              : 
   24827              : /* Return true when INSN mentions register that must be encoded using REX2
   24828              :    prefix.  */
   24829              : bool
   24830      2069860 : x86_extended_rex2reg_mentioned_p (rtx insn)
   24831              : {
   24832      2069860 :   subrtx_iterator::array_type array;
   24833      9635336 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24834              :     {
   24835      7566151 :       const_rtx x = *iter;
   24836      7566151 :       if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
   24837          675 :         return true;
   24838              :     }
   24839      2069185 :   return false;
   24840      2069860 : }
   24841              : 
   24842              : /* Return true when rtx operands mentions register that must be encoded using
   24843              :    evex prefix.  */
   24844              : bool
   24845           10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
   24846              : {
   24847           10 :   int i;
   24848           28 :   for (i = 0; i < nops; i++)
   24849           22 :     if (EXT_REX_SSE_REG_P (operands[i])
   24850           40 :         || x86_extended_rex2reg_mentioned_p (operands[i]))
   24851            4 :       return true;
   24852              :   return false;
   24853              : }
   24854              : 
   24855              : /* If profitable, negate (without causing overflow) integer constant
   24856              :    of mode MODE at location LOC.  Return true in this case.  */
   24857              : bool
   24858      5905940 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
   24859              : {
   24860      5905940 :   HOST_WIDE_INT val;
   24861              : 
   24862      5905940 :   if (!CONST_INT_P (*loc))
   24863              :     return false;
   24864              : 
   24865      4980409 :   switch (mode)
   24866              :     {
   24867      2829283 :     case E_DImode:
   24868              :       /* DImode x86_64 constants must fit in 32 bits.  */
   24869      2829283 :       gcc_assert (x86_64_immediate_operand (*loc, mode));
   24870              : 
   24871              :       mode = SImode;
   24872              :       break;
   24873              : 
   24874              :     case E_SImode:
   24875              :     case E_HImode:
   24876              :     case E_QImode:
   24877              :       break;
   24878              : 
   24879            0 :     default:
   24880            0 :       gcc_unreachable ();
   24881              :     }
   24882              : 
   24883              :   /* Avoid overflows.  */
   24884      4980409 :   if (mode_signbit_p (mode, *loc))
   24885              :     return false;
   24886              : 
   24887      4979888 :   val = INTVAL (*loc);
   24888              : 
   24889              :   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
   24890              :      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
   24891      4979888 :   if ((val < 0 && val != -128)
   24892      3272732 :       || val == 128)
   24893              :     {
   24894      1718364 :       *loc = GEN_INT (-val);
   24895      1718364 :       return true;
   24896              :     }
   24897              : 
   24898              :   return false;
   24899              : }
   24900              : 
   24901              : /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
   24902              :    optabs would emit if we didn't have TFmode patterns.  */
   24903              : 
   24904              : void
   24905         4512 : x86_emit_floatuns (rtx operands[2])
   24906              : {
   24907         4512 :   rtx_code_label *neglab, *donelab;
   24908         4512 :   rtx i0, i1, f0, in, out;
   24909         4512 :   machine_mode mode, inmode;
   24910              : 
   24911         4512 :   inmode = GET_MODE (operands[1]);
   24912         4512 :   gcc_assert (inmode == SImode || inmode == DImode);
   24913              : 
   24914         4512 :   out = operands[0];
   24915         4512 :   in = force_reg (inmode, operands[1]);
   24916         4512 :   mode = GET_MODE (out);
   24917         4512 :   neglab = gen_label_rtx ();
   24918         4512 :   donelab = gen_label_rtx ();
   24919         4512 :   f0 = gen_reg_rtx (mode);
   24920              : 
   24921         4512 :   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
   24922              : 
   24923         4512 :   expand_float (out, in, 0);
   24924              : 
   24925         4512 :   emit_jump_insn (gen_jump (donelab));
   24926         4512 :   emit_barrier ();
   24927              : 
   24928         4512 :   emit_label (neglab);
   24929              : 
   24930         4512 :   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
   24931              :                             1, OPTAB_DIRECT);
   24932         4512 :   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
   24933              :                             1, OPTAB_DIRECT);
   24934         4512 :   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
   24935              : 
   24936         4512 :   expand_float (f0, i0, 0);
   24937              : 
   24938         4512 :   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
   24939              : 
   24940         4512 :   emit_label (donelab);
   24941         4512 : }
   24942              : 
   24943              : /* Return the diagnostic message string if conversion from FROMTYPE to
   24944              :    TOTYPE is not allowed, NULL otherwise.  */
   24945              : 
   24946              : static const char *
   24947   1081008935 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
   24948              : {
   24949   1081008935 :   machine_mode from_mode = element_mode (fromtype);
   24950   1081008935 :   machine_mode to_mode = element_mode (totype);
   24951              : 
   24952   1081008935 :   if (!TARGET_SSE2 && from_mode != to_mode)
   24953              :     {
   24954              :       /* Do no allow conversions to/from BFmode/HFmode scalar types
   24955              :          when TARGET_SSE2 is not available.  */
   24956       468009 :       if (from_mode == BFmode)
   24957              :         return N_("invalid conversion from type %<__bf16%> "
   24958              :                   "without option %<-msse2%>");
   24959       468008 :       if (from_mode == HFmode)
   24960              :         return N_("invalid conversion from type %<_Float16%> "
   24961              :                   "without option %<-msse2%>");
   24962       468008 :       if (to_mode == BFmode)
   24963              :         return N_("invalid conversion to type %<__bf16%> "
   24964              :                   "without option %<-msse2%>");
   24965       468008 :       if (to_mode == HFmode)
   24966              :         return N_("invalid conversion to type %<_Float16%> "
   24967              :                   "without option %<-msse2%>");
   24968              :     }
   24969              : 
   24970              :   /* Warn for silent implicit conversion between __bf16 and short,
   24971              :      since __bfloat16 is refined as real __bf16 instead of short
   24972              :      since GCC13.  */
   24973   1081008933 :   if (element_mode (fromtype) != element_mode (totype)
   24974   1081008933 :       && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
   24975              :     {
   24976              :       /* Warn for silent implicit conversion where user may expect
   24977              :          a bitcast.  */
   24978      7766483 :       if ((TYPE_MODE (fromtype) == BFmode
   24979          279 :            && TYPE_MODE (totype) == HImode)
   24980      7766761 :           || (TYPE_MODE (totype) == BFmode
   24981          423 :               && TYPE_MODE (fromtype) == HImode))
   24982            1 :         warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
   24983              :                 "to real %<__bf16%> since GCC 13.1, be careful of "
   24984              :                  "implicit conversion between %<__bf16%> and %<short%>; "
   24985              :                  "an explicit bitcast may be needed here");
   24986              :     }
   24987              : 
   24988              :   /* Conversion allowed.  */
   24989              :   return NULL;
   24990              : }
   24991              : 
   24992              : /* Return the diagnostic message string if the unary operation OP is
   24993              :    not permitted on TYPE, NULL otherwise.  */
   24994              : 
   24995              : static const char *
   24996     90909272 : ix86_invalid_unary_op (int op, const_tree type)
   24997              : {
   24998     90909272 :   machine_mode mmode = element_mode (type);
   24999              :   /* Reject all single-operand operations on BFmode/HFmode except for &
   25000              :      when TARGET_SSE2 is not available.  */
   25001     90909272 :   if (!TARGET_SSE2 && op != ADDR_EXPR)
   25002              :     {
   25003       111098 :       if (mmode == BFmode)
   25004              :         return N_("operation not permitted on type %<__bf16%> "
   25005              :                   "without option %<-msse2%>");
   25006       111098 :       if (mmode == HFmode)
   25007            0 :         return N_("operation not permitted on type %<_Float16%> "
   25008              :                   "without option %<-msse2%>");
   25009              :     }
   25010              : 
   25011              :   /* Operation allowed.  */
   25012              :   return NULL;
   25013              : }
   25014              : 
   25015              : /* Return the diagnostic message string if the binary operation OP is
   25016              :    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   25017              : 
   25018              : static const char *
   25019    160518388 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
   25020              :                         const_tree type2)
   25021              : {
   25022    160518388 :   machine_mode type1_mode = element_mode (type1);
   25023    160518388 :   machine_mode type2_mode = element_mode (type2);
   25024              :   /* Reject all 2-operand operations on BFmode or HFmode
   25025              :      when TARGET_SSE2 is not available.  */
   25026    160518388 :   if (!TARGET_SSE2)
   25027              :     {
   25028      1008820 :       if (type1_mode == BFmode || type2_mode == BFmode)
   25029              :         return N_("operation not permitted on type %<__bf16%> "
   25030              :                   "without option %<-msse2%>");
   25031              : 
   25032      1008820 :       if (type1_mode == HFmode || type2_mode == HFmode)
   25033            0 :         return N_("operation not permitted on type %<_Float16%> "
   25034              :                   "without option %<-msse2%>");
   25035              :     }
   25036              : 
   25037              :   /* Operation allowed.  */
   25038              :   return NULL;
   25039              : }
   25040              : 
   25041              : 
   25042              : /* Target hook for scalar_mode_supported_p.  */
   25043              : static bool
   25044      4583380 : ix86_scalar_mode_supported_p (scalar_mode mode)
   25045              : {
   25046      4583380 :   if (DECIMAL_FLOAT_MODE_P (mode))
   25047       631001 :     return default_decimal_float_supported_p ();
   25048      3952379 :   else if (mode == TFmode)
   25049              :     return true;
   25050      3629887 :   else if (mode == HFmode || mode == BFmode)
   25051              :     return true;
   25052              :   else
   25053      2986886 :     return default_scalar_mode_supported_p (mode);
   25054              : }
   25055              : 
   25056              : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
   25057              :    if MODE is HFmode, and punt to the generic implementation otherwise.  */
   25058              : 
   25059              : static bool
   25060      2214613 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
   25061              : {
   25062              :   /* NB: Always return TRUE for HFmode so that the _Float16 type will
   25063              :      be defined by the C front-end for AVX512FP16 intrinsics.  We will
   25064              :      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
   25065              :      enabled.  */
   25066      1893588 :   return ((mode == HFmode || mode == BFmode)
   25067      3787176 :           ? true
   25068      1572563 :           : default_libgcc_floating_mode_supported_p (mode));
   25069              : }
   25070              : 
   25071              : /* Implements target hook vector_mode_supported_p.  */
   25072              : static bool
   25073   1345721517 : ix86_vector_mode_supported_p (machine_mode mode)
   25074              : {
   25075              :   /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
   25076              :      either.  */
   25077   1482728891 :   if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
   25078              :     return false;
   25079   1345721119 :   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   25080              :     return true;
   25081   1131608173 :   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   25082              :     return true;
   25083    507241341 :   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   25084              :     return true;
   25085    366713585 :   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   25086              :     return true;
   25087    232509567 :   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   25088    232453211 :       && VALID_MMX_REG_MODE (mode))
   25089              :     return true;
   25090     33475189 :   if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
   25091     32839907 :       && VALID_MMX_REG_MODE_3DNOW (mode))
   25092              :     return true;
   25093     22351555 :   if (mode == V2QImode)
   25094        24808 :     return true;
   25095              :   return false;
   25096              : }
   25097              : 
   25098              : /* Target hook for c_mode_for_suffix.  */
   25099              : static machine_mode
   25100       196240 : ix86_c_mode_for_suffix (char suffix)
   25101              : {
   25102       196240 :   if (suffix == 'q')
   25103              :     return TFmode;
   25104           37 :   if (suffix == 'w')
   25105              :     return XFmode;
   25106              : 
   25107            0 :   return VOIDmode;
   25108              : }
   25109              : 
   25110              : /* Helper function to map common constraints to non-EGPR ones.
   25111              :    All related constraints have h prefix, and h plus Upper letter
   25112              :    means the constraint is strictly EGPR enabled, while h plus
   25113              :    lower letter indicates the constraint is strictly gpr16 only.
   25114              : 
   25115              :    Specially for "g" constraint, split it to rmi as there is
   25116              :    no corresponding general constraint define for backend.
   25117              : 
   25118              :    Here is the full list to map constraints that may involve
   25119              :    gpr to h prefixed.
   25120              : 
   25121              :    "g" -> "jrjmi"
   25122              :    "r" -> "jr"
   25123              :    "m" -> "jm"
   25124              :    "<" -> "j<"
   25125              :    ">" -> "j>"
   25126              :    "o" -> "jo"
   25127              :    "V" -> "jV"
   25128              :    "p" -> "jp"
   25129              :    "Bm" -> "ja"
   25130              : */
   25131              : 
   25132           43 : static void map_egpr_constraints (vec<const char *> &constraints)
   25133              : {
   25134           53 :   for (size_t i = 0; i < constraints.length(); i++)
   25135              :     {
   25136           10 :       const char *cur = constraints[i];
   25137              : 
   25138           10 :       if (startswith (cur, "=@cc"))
   25139            0 :         continue;
   25140              : 
   25141           10 :       int len = strlen (cur);
   25142           10 :       auto_vec<char> buf;
   25143              : 
   25144           24 :       for (int j = 0; j < len; j++)
   25145              :         {
   25146           14 :           switch (cur[j])
   25147              :             {
   25148            2 :             case 'g':
   25149            2 :               buf.safe_push ('j');
   25150            2 :               buf.safe_push ('r');
   25151            2 :               buf.safe_push ('j');
   25152            2 :               buf.safe_push ('m');
   25153            2 :               buf.safe_push ('i');
   25154            2 :               break;
   25155            8 :             case 'r':
   25156            8 :             case 'm':
   25157            8 :             case '<':
   25158            8 :             case '>':
   25159            8 :             case 'o':
   25160            8 :             case 'V':
   25161            8 :             case 'p':
   25162            8 :               buf.safe_push ('j');
   25163            8 :               buf.safe_push (cur[j]);
   25164            8 :               break;
   25165            0 :             case 'B':
   25166            0 :               if (cur[j + 1] == 'm')
   25167              :                 {
   25168            0 :                   buf.safe_push ('j');
   25169            0 :                   buf.safe_push ('a');
   25170            0 :                   j++;
   25171              :                 }
   25172              :               else
   25173              :                 {
   25174            0 :                   buf.safe_push (cur[j]);
   25175            0 :                   buf.safe_push (cur[j + 1]);
   25176            0 :                   j++;
   25177              :                 }
   25178              :               break;
   25179            0 :             case 'T':
   25180            0 :             case 'Y':
   25181            0 :             case 'W':
   25182            0 :             case 'j':
   25183            0 :               buf.safe_push (cur[j]);
   25184            0 :               buf.safe_push (cur[j + 1]);
   25185            0 :               j++;
   25186            0 :               break;
   25187            0 :             case '{':
   25188            0 :               do
   25189              :                 {
   25190            0 :                   buf.safe_push (cur[j]);
   25191            0 :                 } while (cur[j++] != '}');
   25192              :               break;
   25193            4 :             default:
   25194            4 :               buf.safe_push (cur[j]);
   25195            4 :               break;
   25196              :             }
   25197              :         }
   25198           10 :       buf.safe_push ('\0');
   25199           20 :       constraints[i] = xstrdup (buf.address ());
   25200           10 :     }
   25201           43 : }
   25202              : 
   25203              : /* Worker function for TARGET_MD_ASM_ADJUST.
   25204              : 
   25205              :    We implement asm flag outputs, and maintain source compatibility
   25206              :    with the old cc0-based compiler.  */
   25207              : 
   25208              : static rtx_insn *
   25209       108265 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
   25210              :                     vec<machine_mode> & /*input_modes*/,
   25211              :                     vec<const char *> &constraints, vec<rtx> &/*uses*/,
   25212              :                     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
   25213              :                     location_t loc)
   25214              : {
   25215       108265 :   bool saw_asm_flag = false;
   25216              : 
   25217       108265 :   start_sequence ();
   25218              : 
   25219       108265 :   if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
   25220           43 :     map_egpr_constraints (constraints);
   25221              : 
   25222       292349 :   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
   25223              :     {
   25224        76686 :       const char *con = constraints[i];
   25225        76686 :       if (!startswith (con, "=@cc"))
   25226        76598 :         continue;
   25227           88 :       con += 4;
   25228           88 :       if (strchr (con, ',') != NULL)
   25229              :         {
   25230            1 :           error_at (loc, "alternatives not allowed in %<asm%> flag output");
   25231            1 :           continue;
   25232              :         }
   25233              : 
   25234           87 :       bool invert = false;
   25235           87 :       if (con[0] == 'n')
   25236           19 :         invert = true, con++;
   25237              : 
   25238           87 :       machine_mode mode = CCmode;
   25239           87 :       rtx_code code = UNKNOWN;
   25240              : 
   25241           87 :       switch (con[0])
   25242              :         {
   25243           15 :         case 'a':
   25244           15 :           if (con[1] == 0)
   25245              :             mode = CCAmode, code = EQ;
   25246            4 :           else if (con[1] == 'e' && con[2] == 0)
   25247              :             mode = CCCmode, code = NE;
   25248              :           break;
   25249           11 :         case 'b':
   25250           11 :           if (con[1] == 0)
   25251              :             mode = CCCmode, code = EQ;
   25252            6 :           else if (con[1] == 'e' && con[2] == 0)
   25253              :             mode = CCAmode, code = NE;
   25254              :           break;
   25255           14 :         case 'c':
   25256           14 :           if (con[1] == 0)
   25257              :             mode = CCCmode, code = EQ;
   25258              :           break;
   25259            8 :         case 'e':
   25260            8 :           if (con[1] == 0)
   25261              :             mode = CCZmode, code = EQ;
   25262              :           break;
   25263           11 :         case 'g':
   25264           11 :           if (con[1] == 0)
   25265              :             mode = CCGCmode, code = GT;
   25266            5 :           else if (con[1] == 'e' && con[2] == 0)
   25267              :             mode = CCGCmode, code = GE;
   25268              :           break;
   25269           10 :         case 'l':
   25270           10 :           if (con[1] == 0)
   25271              :             mode = CCGCmode, code = LT;
   25272            5 :           else if (con[1] == 'e' && con[2] == 0)
   25273              :             mode = CCGCmode, code = LE;
   25274              :           break;
   25275            4 :         case 'o':
   25276            4 :           if (con[1] == 0)
   25277              :             mode = CCOmode, code = EQ;
   25278              :           break;
   25279            4 :         case 'p':
   25280            4 :           if (con[1] == 0)
   25281              :             mode = CCPmode, code = EQ;
   25282              :           break;
   25283            4 :         case 's':
   25284            4 :           if (con[1] == 0)
   25285              :             mode = CCSmode, code = EQ;
   25286              :           break;
   25287            6 :         case 'z':
   25288            6 :           if (con[1] == 0)
   25289              :             mode = CCZmode, code = EQ;
   25290              :           break;
   25291              :         }
   25292            1 :       if (code == UNKNOWN)
   25293              :         {
   25294            1 :           error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
   25295            1 :           continue;
   25296              :         }
   25297           86 :       if (invert)
   25298           19 :         code = reverse_condition (code);
   25299              : 
   25300           86 :       rtx dest = outputs[i];
   25301           86 :       if (!saw_asm_flag)
   25302              :         {
   25303              :           /* This is the first asm flag output.  Here we put the flags
   25304              :              register in as the real output and adjust the condition to
   25305              :              allow it.  */
   25306           75 :           constraints[i] = "=Bf";
   25307           75 :           outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
   25308           75 :           saw_asm_flag = true;
   25309              :         }
   25310              :       else
   25311              :         {
   25312              :           /* We don't need the flags register as output twice.  */
   25313           11 :           constraints[i] = "=X";
   25314           11 :           outputs[i] = gen_rtx_SCRATCH (SImode);
   25315              :         }
   25316              : 
   25317           86 :       rtx x = gen_rtx_REG (mode, FLAGS_REG);
   25318           86 :       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
   25319              : 
   25320           86 :       machine_mode dest_mode = GET_MODE (dest);
   25321           86 :       if (!SCALAR_INT_MODE_P (dest_mode))
   25322              :         {
   25323            3 :           error_at (loc, "invalid type for %<asm%> flag output");
   25324            3 :           continue;
   25325              :         }
   25326              : 
   25327           83 :       if (dest_mode == QImode)
   25328           73 :         emit_insn (gen_rtx_SET (dest, x));
   25329              :       else
   25330              :         {
   25331           10 :           rtx reg = gen_reg_rtx (QImode);
   25332           10 :           emit_insn (gen_rtx_SET (reg, x));
   25333              : 
   25334           10 :           reg = convert_to_mode (dest_mode, reg, 1);
   25335           10 :           emit_move_insn (dest, reg);
   25336              :         }
   25337              :     }
   25338              : 
   25339       108265 :   rtx_insn *seq = end_sequence ();
   25340              : 
   25341       108265 :   if (saw_asm_flag)
   25342              :     return seq;
   25343              :   else
   25344              :     {
   25345              :       /* If we had no asm flag outputs, clobber the flags.  */
   25346       108190 :       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   25347       108190 :       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   25348       108190 :       return NULL;
   25349              :     }
   25350              : }
   25351              : 
   25352              : /* Implements target vector targetm.asm.encode_section_info.  */
   25353              : 
   25354              : static void ATTRIBUTE_UNUSED
   25355      9919141 : ix86_encode_section_info (tree decl, rtx rtl, int first)
   25356              : {
   25357      9919141 :   default_encode_section_info (decl, rtl, first);
   25358              : 
   25359      9919141 :   if (ix86_in_large_data_p (decl))
   25360           32 :     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
   25361      9919141 : }
   25362              : 
   25363              : /* Worker function for REVERSE_CONDITION.  */
   25364              : 
   25365              : enum rtx_code
   25366     31648513 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
   25367              : {
   25368     31648513 :   return (mode == CCFPmode
   25369     31648513 :           ? reverse_condition_maybe_unordered (code)
   25370     27290037 :           : reverse_condition (code));
   25371              : }
   25372              : 
   25373              : /* Output code to perform an x87 FP register move, from OPERANDS[1]
   25374              :    to OPERANDS[0].  */
   25375              : 
   25376              : const char *
   25377       648974 : output_387_reg_move (rtx_insn *insn, rtx *operands)
   25378              : {
   25379       648974 :   if (REG_P (operands[0]))
   25380              :     {
   25381       543915 :       if (REG_P (operands[1])
   25382       543915 :           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25383              :         {
   25384       295699 :           if (REGNO (operands[0]) == FIRST_STACK_REG)
   25385       275113 :             return output_387_ffreep (operands, 0);
   25386              :           return "fstp\t%y0";
   25387              :         }
   25388       248216 :       if (STACK_TOP_P (operands[0]))
   25389       248216 :         return "fld%Z1\t%y1";
   25390              :       return "fst\t%y0";
   25391              :     }
   25392       105059 :   else if (MEM_P (operands[0]))
   25393              :     {
   25394       105059 :       gcc_assert (REG_P (operands[1]));
   25395       105059 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25396              :         return "fstp%Z0\t%y0";
   25397              :       else
   25398              :         {
   25399              :           /* There is no non-popping store to memory for XFmode.
   25400              :              So if we need one, follow the store with a load.  */
   25401         6219 :           if (GET_MODE (operands[0]) == XFmode)
   25402              :             return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
   25403              :           else
   25404         1888 :             return "fst%Z0\t%y0";
   25405              :         }
   25406              :     }
   25407              :   else
   25408            0 :     gcc_unreachable();
   25409              : }
   25410              : #ifdef TARGET_SOLARIS
   25411              : /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
   25412              : 
   25413              : static void
   25414              : i386_solaris_elf_named_section (const char *name, unsigned int flags,
   25415              :                                 tree decl)
   25416              : {
   25417              :   /* With Binutils 2.15, the "@unwind" marker must be specified on
   25418              :      every occurrence of the ".eh_frame" section, not just the first
   25419              :      one.  */
   25420              :   if (TARGET_64BIT
   25421              :       && strcmp (name, ".eh_frame") == 0)
   25422              :     {
   25423              :       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
   25424              :                flags & SECTION_WRITE ? "aw" : "a");
   25425              :       return;
   25426              :     }
   25427              : 
   25428              : #if HAVE_SOLARIS_AS
   25429              :   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
   25430              :     {
   25431              :       solaris_elf_asm_comdat_section (name, flags, decl);
   25432              :       return;
   25433              :     }
   25434              : 
   25435              :   /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
   25436              :      SPARC assembler.  One cannot mix single-letter flags and #exclude, so
   25437              :      only emit the latter here.  */
   25438              :   if (flags & SECTION_EXCLUDE)
   25439              :     {
   25440              :       fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
   25441              :       return;
   25442              :     }
   25443              : #endif
   25444              : 
   25445              :   default_elf_asm_named_section (name, flags, decl);
   25446              : }
   25447              : #endif /* TARGET_SOLARIS */
   25448              : 
   25449              : /* Return the mangling of TYPE if it is an extended fundamental type.  */
   25450              : 
   25451              : static const char *
   25452   1044940986 : ix86_mangle_type (const_tree type)
   25453              : {
   25454   1044940986 :   type = TYPE_MAIN_VARIANT (type);
   25455              : 
   25456   1044940986 :   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
   25457              :       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
   25458              :     return NULL;
   25459              : 
   25460    566870465 :   if (type == float128_type_node || type == float64x_type_node)
   25461              :     return NULL;
   25462              : 
   25463    566182858 :   switch (TYPE_MODE (type))
   25464              :     {
   25465              :     case E_BFmode:
   25466              :       return "DF16b";
   25467       301154 :     case E_HFmode:
   25468              :       /* _Float16 is "DF16_".
   25469              :          Align with clang's decision in https://reviews.llvm.org/D33719. */
   25470       301154 :       return "DF16_";
   25471       639048 :     case E_TFmode:
   25472              :       /* __float128 is "g".  */
   25473       639048 :       return "g";
   25474      7854653 :     case E_XFmode:
   25475              :       /* "long double" or __float80 is "e".  */
   25476      7854653 :       return "e";
   25477              :     default:
   25478              :       return NULL;
   25479              :     }
   25480              : }
   25481              : 
   25482              : /* Create C++ tinfo symbols for only conditionally available fundamental
   25483              :    types.  */
   25484              : 
   25485              : static void
   25486            5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
   25487              : {
   25488            5 :   extern tree ix86_float16_type_node;
   25489            5 :   extern tree ix86_bf16_type_node;
   25490              : 
   25491            5 :   if (!TARGET_SSE2)
   25492              :     {
   25493            0 :       if (!float16_type_node)
   25494            0 :         float16_type_node = ix86_float16_type_node;
   25495            0 :       if (!bfloat16_type_node)
   25496            0 :         bfloat16_type_node = ix86_bf16_type_node;
   25497            0 :       callback (float16_type_node);
   25498            0 :       callback (bfloat16_type_node);
   25499            0 :       float16_type_node = NULL_TREE;
   25500            0 :       bfloat16_type_node = NULL_TREE;
   25501              :     }
   25502            5 : }
   25503              : 
   25504              : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
   25505              : 
   25506              : static tree
   25507          330 : ix86_stack_protect_guard (void)
   25508              : {
   25509          330 :   if (TARGET_SSP_TLS_GUARD)
   25510              :     {
   25511          254 :       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
   25512          254 :       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
   25513          254 :       tree type = build_qualified_type (type_node, qual);
   25514          254 :       tree t;
   25515              : 
   25516          254 :       if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
   25517              :         {
   25518            1 :           t = ix86_tls_stack_chk_guard_decl;
   25519              : 
   25520            1 :           if (t == NULL)
   25521              :             {
   25522            1 :               rtx x;
   25523              : 
   25524            1 :               t = build_decl
   25525            1 :                 (UNKNOWN_LOCATION, VAR_DECL,
   25526              :                  get_identifier (ix86_stack_protector_guard_symbol_str),
   25527              :                  type);
   25528            1 :               TREE_STATIC (t) = 1;
   25529            1 :               TREE_PUBLIC (t) = 1;
   25530            1 :               DECL_EXTERNAL (t) = 1;
   25531            1 :               TREE_USED (t) = 1;
   25532            1 :               TREE_THIS_VOLATILE (t) = 1;
   25533            1 :               DECL_ARTIFICIAL (t) = 1;
   25534            1 :               DECL_IGNORED_P (t) = 1;
   25535              : 
   25536              :               /* Do not share RTL as the declaration is visible outside of
   25537              :                  current function.  */
   25538            1 :               x = DECL_RTL (t);
   25539            1 :               RTX_FLAG (x, used) = 1;
   25540              : 
   25541            1 :               ix86_tls_stack_chk_guard_decl = t;
   25542              :             }
   25543              :         }
   25544              :       else
   25545              :         {
   25546          253 :           tree asptrtype = build_pointer_type (type);
   25547              : 
   25548          253 :           t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
   25549          253 :           t = build2 (MEM_REF, asptrtype, t,
   25550              :                       build_int_cst (asptrtype, 0));
   25551          253 :           TREE_THIS_VOLATILE (t) = 1;
   25552              :         }
   25553              : 
   25554          254 :       return t;
   25555              :     }
   25556              : 
   25557           76 :   return default_stack_protect_guard ();
   25558              : }
   25559              : 
   25560              : /* Implement TARGET_STACK_PROTECT_GUARD_SYMBOL_P.  */
   25561              : 
   25562              : static bool
   25563       210021 : ix86_stack_protect_guard_symbol_p (void)
   25564              : {
   25565       210021 :   return TARGET_SSP_GLOBAL_GUARD;
   25566              : }
   25567              : 
   25568              : static bool
   25569          903 : ix86_stack_protect_runtime_enabled_p (void)
   25570              : {
   25571              :   /* Naked functions should not enable stack protector.  */
   25572          903 :   return !ix86_function_naked (current_function_decl);
   25573              : }
   25574              : 
   25575              : /* For 32-bit code we can save PIC register setup by using
   25576              :    __stack_chk_fail_local hidden function instead of calling
   25577              :    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
   25578              :    register, so it is better to call __stack_chk_fail directly.  */
   25579              : 
   25580              : static tree ATTRIBUTE_UNUSED
   25581          322 : ix86_stack_protect_fail (void)
   25582              : {
   25583          322 :   return TARGET_64BIT
   25584          322 :          ? default_external_stack_protect_fail ()
   25585            1 :          : default_hidden_stack_protect_fail ();
   25586              : }
   25587              : 
   25588              : /* Select a format to encode pointers in exception handling data.  CODE
   25589              :    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
   25590              :    true if the symbol may be affected by dynamic relocations.
   25591              : 
   25592              :    ??? All x86 object file formats are capable of representing this.
   25593              :    After all, the relocation needed is the same as for the call insn.
   25594              :    Whether or not a particular assembler allows us to enter such, I
   25595              :    guess we'll have to see.  */
   25596              : 
   25597              : int
   25598       787705 : asm_preferred_eh_data_format (int code, int global)
   25599              : {
   25600              :   /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
   25601       787705 :   if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
   25602              :     {
   25603        38900 :       int type = DW_EH_PE_sdata8;
   25604        38900 :       if (ptr_mode == SImode
   25605        24928 :           || ix86_cmodel == CM_SMALL_PIC
   25606        38986 :           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
   25607              :         type = DW_EH_PE_sdata4;
   25608        54465 :       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
   25609              :     }
   25610              : 
   25611       748805 :   if (ix86_cmodel == CM_SMALL
   25612        18662 :       || (ix86_cmodel == CM_MEDIUM && code))
   25613       730156 :     return DW_EH_PE_udata4;
   25614              : 
   25615              :   return DW_EH_PE_absptr;
   25616              : }
   25617              : 
   25618              : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
   25619              :    from ix86_vector_costs::add_stmt_cost.  */
   25620              : static int
   25621     15280036 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
   25622              :                           machine_mode mode)
   25623              : {
   25624     15280036 :   bool fp = FLOAT_MODE_P (mode);
   25625     15280036 :   int index;
   25626     15280036 :   switch (type_of_cost)
   25627              :     {
   25628      1744155 :       case scalar_stmt:
   25629      1744155 :         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
   25630              : 
   25631      1816823 :       case scalar_load:
   25632              :         /* load/store costs are relative to register move which is 2. Recompute
   25633              :            it to COSTS_N_INSNS so everything have same base.  */
   25634      3633646 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
   25635      1816823 :                               : ix86_cost->int_load [2]) / 2;
   25636              : 
   25637      3955936 :       case scalar_store:
   25638      7911872 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
   25639      3955936 :                               : ix86_cost->int_store [2]) / 2;
   25640              : 
   25641      1198915 :       case vector_stmt:
   25642      2397830 :         return ix86_vec_cost (mode,
   25643      2397830 :                               fp ? ix86_cost->addss : ix86_cost->sse_op);
   25644              : 
   25645      1932301 :       case vector_load:
   25646      1932301 :         index = sse_store_index (mode);
   25647              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25648      1932301 :         if (index < 0)
   25649        99592 :           index = 2;
   25650      1932301 :         return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
   25651              : 
   25652       976071 :       case vector_store:
   25653       976071 :         index = sse_store_index (mode);
   25654              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25655       976071 :         if (index < 0)
   25656        91647 :           index = 2;
   25657       976071 :         return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
   25658              : 
   25659       831614 :       case vec_to_scalar:
   25660       831614 :       case scalar_to_vec:
   25661       831614 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25662              : 
   25663              :       /* We should have separate costs for unaligned loads and gather/scatter.
   25664              :          Do that incrementally.  */
   25665       513407 :       case unaligned_load:
   25666       513407 :         index = sse_store_index (mode);
   25667              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25668       513407 :         if (index < 0)
   25669         2708 :           index = 2;
   25670       513407 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
   25671              : 
   25672       841768 :       case unaligned_store:
   25673       841768 :         index = sse_store_index (mode);
   25674              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25675       841768 :         if (index < 0)
   25676        17180 :           index = 2;
   25677       841768 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
   25678              : 
   25679            0 :       case vector_gather_load:
   25680            0 :         return ix86_vec_cost (mode,
   25681            0 :                               COSTS_N_INSNS
   25682              :                                  (ix86_cost->gather_static
   25683              :                                   + ix86_cost->gather_per_elt
   25684            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25685              : 
   25686            0 :       case vector_scatter_store:
   25687            0 :         return ix86_vec_cost (mode,
   25688            0 :                               COSTS_N_INSNS
   25689              :                                  (ix86_cost->scatter_static
   25690              :                                   + ix86_cost->scatter_per_elt
   25691            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25692              : 
   25693       356788 :       case cond_branch_taken:
   25694       356788 :         return ix86_cost->cond_taken_branch_cost;
   25695              : 
   25696         8538 :       case cond_branch_not_taken:
   25697         8538 :         return ix86_cost->cond_not_taken_branch_cost;
   25698              : 
   25699       285446 :       case vec_perm:
   25700       285446 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25701              : 
   25702        89252 :       case vec_promote_demote:
   25703        89252 :         if (fp)
   25704        11654 :           return vec_fp_conversion_cost (ix86_tune_cost, mode);
   25705        77598 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25706              : 
   25707       729022 :       case vec_construct:
   25708       729022 :         {
   25709       729022 :           int n = GET_MODE_NUNITS (mode);
   25710              :           /* N - 1 element inserts into an SSE vector, the possible
   25711              :              GPR -> XMM move is accounted for in add_stmt_cost.  */
   25712      1458044 :           if (GET_MODE_BITSIZE (mode) <= 128)
   25713       722698 :             return (n - 1) * ix86_cost->sse_op;
   25714              :           /* One vinserti128 for combining two SSE vectors for AVX256.  */
   25715        12648 :           else if (GET_MODE_BITSIZE (mode) == 256)
   25716         5052 :             return ((n - 2) * ix86_cost->sse_op
   25717         5052 :                     + ix86_vec_cost (mode, ix86_cost->sse_op));
   25718              :           /* One vinserti64x4 and two vinserti128 for combining SSE
   25719              :              and AVX256 vectors to AVX512.  */
   25720         2544 :           else if (GET_MODE_BITSIZE (mode) == 512)
   25721              :             {
   25722         1272 :               machine_mode half_mode
   25723         1272 :                 = mode_for_vector (GET_MODE_INNER (mode),
   25724         2544 :                                    GET_MODE_NUNITS (mode) / 2).require ();
   25725         1272 :               return ((n - 4) * ix86_cost->sse_op
   25726         1272 :                       + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
   25727         1272 :                       + ix86_vec_cost (mode, ix86_cost->sse_op));
   25728              :             }
   25729            0 :           gcc_unreachable ();
   25730              :         }
   25731              : 
   25732            0 :       default:
   25733            0 :         gcc_unreachable ();
   25734              :     }
   25735              : }
   25736              : 
   25737              : /* Implement targetm.vectorize.builtin_vectorization_cost.  */
   25738              : static int
   25739      9184377 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   25740              :                                  tree vectype, int)
   25741              : {
   25742      9184377 :   machine_mode mode = TImode;
   25743      9184377 :   if (vectype != NULL)
   25744      5836730 :     mode = TYPE_MODE (vectype);
   25745      9184377 :   return ix86_default_vector_cost (type_of_cost, mode);
   25746              : }
   25747              : 
   25748              : 
   25749              : /* This function returns the calling abi specific va_list type node.
   25750              :    It returns  the FNDECL specific va_list type.  */
   25751              : 
   25752              : static tree
   25753        47389 : ix86_fn_abi_va_list (tree fndecl)
   25754              : {
   25755        47389 :   if (!TARGET_64BIT)
   25756          726 :     return va_list_type_node;
   25757        46663 :   gcc_assert (fndecl != NULL_TREE);
   25758              : 
   25759        46663 :   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
   25760        12868 :     return ms_va_list_type_node;
   25761              :   else
   25762        33795 :     return sysv_va_list_type_node;
   25763              : }
   25764              : 
   25765              : /* Returns the canonical va_list type specified by TYPE. If there
   25766              :    is no valid TYPE provided, it return NULL_TREE.  */
   25767              : 
   25768              : static tree
   25769       246475 : ix86_canonical_va_list_type (tree type)
   25770              : {
   25771       246475 :   if (TARGET_64BIT)
   25772              :     {
   25773       245973 :       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
   25774         5944 :         return ms_va_list_type_node;
   25775              : 
   25776       240029 :       if ((TREE_CODE (type) == ARRAY_TYPE
   25777        49945 :            && integer_zerop (array_type_nelts_minus_one (type)))
   25778       240029 :           || POINTER_TYPE_P (type))
   25779              :         {
   25780       188197 :           tree elem_type = TREE_TYPE (type);
   25781       188197 :           if (TREE_CODE (elem_type) == RECORD_TYPE
   25782       339650 :               && lookup_attribute ("sysv_abi va_list",
   25783       151453 :                                    TYPE_ATTRIBUTES (elem_type)))
   25784       151453 :             return sysv_va_list_type_node;
   25785              :         }
   25786              : 
   25787        88576 :       return NULL_TREE;
   25788              :     }
   25789              : 
   25790          502 :   return std_canonical_va_list_type (type);
   25791              : }
   25792              : 
   25793              : /* Iterate through the target-specific builtin types for va_list.
   25794              :    IDX denotes the iterator, *PTREE is set to the result type of
   25795              :    the va_list builtin, and *PNAME to its internal type.
   25796              :    Returns zero if there is no element for this index, otherwise
   25797              :    IDX should be increased upon the next call.
   25798              :    Note, do not iterate a base builtin's name like __builtin_va_list.
   25799              :    Used from c_common_nodes_and_builtins.  */
   25800              : 
   25801              : static int
   25802       619196 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
   25803              : {
   25804       619196 :   if (TARGET_64BIT)
   25805              :     {
   25806       613824 :       switch (idx)
   25807              :         {
   25808              :         default:
   25809              :           break;
   25810              : 
   25811       204608 :         case 0:
   25812       204608 :           *ptree = ms_va_list_type_node;
   25813       204608 :           *pname = "__builtin_ms_va_list";
   25814       204608 :           return 1;
   25815              : 
   25816       204608 :         case 1:
   25817       204608 :           *ptree = sysv_va_list_type_node;
   25818       204608 :           *pname = "__builtin_sysv_va_list";
   25819       204608 :           return 1;
   25820              :         }
   25821              :     }
   25822              : 
   25823              :   return 0;
   25824              : }
   25825              : 
   25826              : #undef TARGET_SCHED_DISPATCH
   25827              : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
   25828              : #undef TARGET_SCHED_DISPATCH_DO
   25829              : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
   25830              : #undef TARGET_SCHED_REASSOCIATION_WIDTH
   25831              : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
   25832              : #undef TARGET_SCHED_REORDER
   25833              : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
   25834              : #undef TARGET_SCHED_ADJUST_PRIORITY
   25835              : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
   25836              : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
   25837              : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
   25838              :   ix86_dependencies_evaluation_hook
   25839              : 
   25840              : 
   25841              : /* Implementation of reassociation_width target hook used by
   25842              :    reassoc phase to identify parallelism level in reassociated
   25843              :    tree.  Statements tree_code is passed in OPC.  Arguments type
   25844              :    is passed in MODE.  */
   25845              : 
   25846              : static int
   25847        28501 : ix86_reassociation_width (unsigned int op, machine_mode mode)
   25848              : {
   25849        28501 :   int width = 1;
   25850              :   /* Vector part.  */
   25851        28501 :   if (VECTOR_MODE_P (mode))
   25852              :     {
   25853         8372 :       int div = 1;
   25854         8372 :       if (INTEGRAL_MODE_P (mode))
   25855         2705 :         width = ix86_cost->reassoc_vec_int;
   25856         5667 :       else if (FLOAT_MODE_P (mode))
   25857         5667 :         width = ix86_cost->reassoc_vec_fp;
   25858              : 
   25859         8372 :       if (width == 1)
   25860              :         return 1;
   25861              : 
   25862              :       /* Znver1-4 Integer vector instructions execute in FP unit
   25863              :          and can execute 3 additions and one multiplication per cycle.  */
   25864         8366 :       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
   25865         8366 :            || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
   25866         8366 :            || ix86_tune == PROCESSOR_C86_4G_M4
   25867         8366 :            || ix86_tune == PROCESSOR_C86_4G_M6
   25868         8366 :            || ix86_tune == PROCESSOR_C86_4G_M7)
   25869            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25870              :         return 1;
   25871              :       /* Znver5 can do 2 integer multiplications per cycle with latency
   25872              :          of 3.  */
   25873         8366 :       if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
   25874            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25875         8366 :         width = 6;
   25876              : 
   25877              :       /* Account for targets that splits wide vectors into multiple parts.  */
   25878         8366 :       if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
   25879            0 :         div = GET_MODE_BITSIZE (mode) / 256;
   25880         8366 :       else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
   25881            0 :         div = GET_MODE_BITSIZE (mode) / 128;
   25882         8366 :       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
   25883            0 :         div = GET_MODE_BITSIZE (mode) / 64;
   25884         8366 :       width = (width + div - 1) / div;
   25885         8366 :     }
   25886              :   /* Scalar part.  */
   25887              :   else if (INTEGRAL_MODE_P (mode))
   25888        14151 :     width = ix86_cost->reassoc_int;
   25889              :   else if (FLOAT_MODE_P (mode))
   25890         5978 :     width = ix86_cost->reassoc_fp;
   25891              : 
   25892              :   /* Avoid using too many registers in 32bit mode.  */
   25893        28495 :   if (!TARGET_64BIT && width > 2)
   25894        28501 :     width = 2;
   25895              :   return width;
   25896              : }
   25897              : 
   25898              : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
   25899              :    place emms and femms instructions.  */
   25900              : 
   25901              : static machine_mode
   25902      5152797 : ix86_preferred_simd_mode (scalar_mode mode)
   25903              : {
   25904      5152797 :   if (!TARGET_SSE)
   25905          859 :     return word_mode;
   25906              : 
   25907      5151938 :   switch (mode)
   25908              :     {
   25909       417353 :     case E_QImode:
   25910       417353 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25911              :         return V64QImode;
   25912       405925 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25913              :         return V32QImode;
   25914              :       else
   25915       385750 :         return V16QImode;
   25916              : 
   25917       195980 :     case E_HImode:
   25918       195980 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25919              :         return V32HImode;
   25920       185510 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25921              :         return V16HImode;
   25922              :       else
   25923       169365 :         return V8HImode;
   25924              : 
   25925      1517797 :     case E_SImode:
   25926      1517797 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25927              :         return V16SImode;
   25928      1450244 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25929              :         return V8SImode;
   25930              :       else
   25931      1297123 :         return V4SImode;
   25932              : 
   25933      1881210 :     case E_DImode:
   25934      1881210 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25935              :         return V8DImode;
   25936      1477122 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25937              :         return V4DImode;
   25938              :       else
   25939      1415192 :         return V2DImode;
   25940              : 
   25941       142648 :     case E_HFmode:
   25942       142648 :       if (TARGET_AVX512FP16)
   25943              :         {
   25944       141902 :           if (TARGET_AVX512VL)
   25945              :             {
   25946        69031 :               if (TARGET_PREFER_AVX128)
   25947              :                 return V8HFmode;
   25948        68809 :               else if (TARGET_PREFER_AVX256)
   25949              :                 return V16HFmode;
   25950              :             }
   25951       139559 :           return V32HFmode;
   25952              :         }
   25953          746 :       return word_mode;
   25954              : 
   25955        62894 :     case E_BFmode:
   25956        62894 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25957              :         return V32BFmode;
   25958        26462 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25959              :         return V16BFmode;
   25960              :       else
   25961        13459 :         return V8BFmode;
   25962              : 
   25963       610034 :     case E_SFmode:
   25964       610034 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25965              :         return V16SFmode;
   25966       409048 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25967              :         return V8SFmode;
   25968              :       else
   25969       342695 :         return V4SFmode;
   25970              : 
   25971       288423 :     case E_DFmode:
   25972       288423 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25973              :         return V8DFmode;
   25974       167933 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25975              :         return V4DFmode;
   25976       115465 :       else if (TARGET_SSE2)
   25977              :         return V2DFmode;
   25978              :       /* FALLTHRU */
   25979              : 
   25980        35655 :     default:
   25981        35655 :       return word_mode;
   25982              :     }
   25983              : }
   25984              : 
   25985              : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
   25986              :    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
   25987              :    256bit and 128bit vectors.  */
   25988              : 
   25989              : static unsigned int
   25990      2197412 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
   25991              : {
   25992      2197412 :   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25993              :     {
   25994        74820 :       modes->safe_push (V64QImode);
   25995        74820 :       modes->safe_push (V32QImode);
   25996        74820 :       modes->safe_push (V16QImode);
   25997              :     }
   25998      2122592 :   else if (TARGET_AVX512F && all)
   25999              :     {
   26000          558 :       modes->safe_push (V32QImode);
   26001          558 :       modes->safe_push (V16QImode);
   26002          558 :       modes->safe_push (V64QImode);
   26003              :     }
   26004      2122034 :   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   26005              :     {
   26006        28728 :       modes->safe_push (V32QImode);
   26007        28728 :       modes->safe_push (V16QImode);
   26008              :     }
   26009      2093306 :   else if (TARGET_AVX && all)
   26010              :     {
   26011           24 :       modes->safe_push (V16QImode);
   26012           24 :       modes->safe_push (V32QImode);
   26013              :     }
   26014      2093282 :   else if (TARGET_SSE2)
   26015      2091016 :     modes->safe_push (V16QImode);
   26016              : 
   26017      2197412 :   if (TARGET_MMX_WITH_SSE)
   26018      1800857 :     modes->safe_push (V8QImode);
   26019              : 
   26020      2197412 :   if (TARGET_SSE2)
   26021      2195146 :     modes->safe_push (V4QImode);
   26022              : 
   26023      2197412 :   return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
   26024              : }
   26025              : 
   26026              : /* Implemenation of targetm.vectorize.get_mask_mode.  */
   26027              : 
   26028              : static opt_machine_mode
   26029      3365759 : ix86_get_mask_mode (machine_mode data_mode)
   26030              : {
   26031      3365759 :   unsigned vector_size = GET_MODE_SIZE (data_mode);
   26032      3365759 :   unsigned nunits = GET_MODE_NUNITS (data_mode);
   26033      3365759 :   unsigned elem_size = vector_size / nunits;
   26034              : 
   26035              :   /* Scalar mask case.  */
   26036       479380 :   if ((TARGET_AVX512F && vector_size == 64)
   26037      3250527 :       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
   26038              :       /* AVX512FP16 only supports vector comparison
   26039              :          to kmask for _Float16.  */
   26040      3075961 :       || (TARGET_AVX512VL && TARGET_AVX512FP16
   26041        18345 :           && GET_MODE_INNER (data_mode) == E_HFmode)
   26042      6446393 :       || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
   26043              :     {
   26044       292821 :       if (elem_size == 4
   26045       292821 :           || elem_size == 8
   26046       135117 :           || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
   26047       262370 :         return smallest_int_mode_for_size (nunits).require ();
   26048              :     }
   26049              : 
   26050      3103389 :   scalar_int_mode elem_mode
   26051      3103389 :     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
   26052              : 
   26053      3103389 :   gcc_assert (elem_size * nunits == vector_size);
   26054              : 
   26055      3103389 :   return mode_for_vector (elem_mode, nunits);
   26056              : }
   26057              : 
   26058              : 
   26059              : 
   26060              : /* Return class of registers which could be used for pseudo of MODE
   26061              :    and of class RCLASS for spilling instead of memory.  Return NO_REGS
   26062              :    if it is not possible or non-profitable.  */
   26063              : 
   26064              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   26065              : 
   26066              : static reg_class_t
   26067   6246357973 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
   26068              : {
   26069   6246357973 :   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
   26070              :       && TARGET_SSE2
   26071              :       && TARGET_INTER_UNIT_MOVES_TO_VEC
   26072              :       && TARGET_INTER_UNIT_MOVES_FROM_VEC
   26073              :       && (mode == SImode || (TARGET_64BIT && mode == DImode))
   26074              :       && INTEGER_CLASS_P (rclass))
   26075              :     return ALL_SSE_REGS;
   26076   6246357973 :   return NO_REGS;
   26077              : }
   26078              : 
   26079              : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
   26080              :    but returns a lower bound.  */
   26081              : 
   26082              : static unsigned int
   26083      1856398 : ix86_max_noce_ifcvt_seq_cost (edge e)
   26084              : {
   26085      1856398 :   bool predictable_p = predictable_edge_p (e);
   26086      1856398 :   if (predictable_p)
   26087              :     {
   26088       143887 :       if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
   26089            8 :         return param_max_rtl_if_conversion_predictable_cost;
   26090              :     }
   26091              :   else
   26092              :     {
   26093      1712511 :       if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
   26094           73 :         return param_max_rtl_if_conversion_unpredictable_cost;
   26095              :     }
   26096              : 
   26097              :   /* For modern machines with deeper pipeline, the penalty for branch
   26098              :      misprediction could be higher than before to reset the pipeline
   26099              :      slots. Add parameter br_mispredict_scale as a factor to describe
   26100              :      the impact of reseting the pipeline.  */
   26101              : 
   26102      1856317 :   return BRANCH_COST (true, predictable_p)
   26103      1856317 :          * ix86_tune_cost->br_mispredict_scale;
   26104              : }
   26105              : 
   26106              : /* Return true if SEQ is a good candidate as a replacement for the
   26107              :    if-convertible sequence described in IF_INFO.  */
   26108              : 
   26109              : static bool
   26110       201269 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
   26111              : {
   26112       201269 :   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
   26113              :     {
   26114              :       int cmov_cnt = 0;
   26115              :       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
   26116              :          Maybe we should allow even more conditional moves as long as they
   26117              :          are used far enough not to stall the CPU, or also consider
   26118              :          IF_INFO->TEST_BB succ edge probabilities.  */
   26119          247 :       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
   26120              :         {
   26121          205 :           rtx set = single_set (insn);
   26122          205 :           if (!set)
   26123            0 :             continue;
   26124          205 :           if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
   26125          163 :             continue;
   26126           42 :           rtx src = SET_SRC (set);
   26127           42 :           machine_mode mode = GET_MODE (src);
   26128           42 :           if (GET_MODE_CLASS (mode) != MODE_INT
   26129            0 :               && GET_MODE_CLASS (mode) != MODE_FLOAT)
   26130            0 :             continue;
   26131           42 :           if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
   26132           41 :               || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
   26133            1 :             continue;
   26134              :           /* insn is CMOV or FCMOV.  */
   26135           41 :           if (++cmov_cnt > 1)
   26136              :             return false;
   26137              :         }
   26138              :     }
   26139              : 
   26140              :   /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
   26141              :      for movdfcc/movsfcc, and could possibly fail cost comparison.
   26142              :      Increase branch cost will hurt performance for other modes, so
   26143              :      specially add some preference for floating point ifcvt.  */
   26144       201261 :   if (!TARGET_SSE4_1 && if_info->x
   26145       157779 :       && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
   26146        34073 :       && if_info->speed_p)
   26147              :     {
   26148        27049 :       unsigned cost = seq_cost (seq, true);
   26149              : 
   26150        27049 :       if (cost <= if_info->original_cost)
   26151              :         return true;
   26152              : 
   26153        25860 :       return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
   26154              :     }
   26155              : 
   26156       174212 :   return default_noce_conversion_profitable_p (seq, if_info);
   26157              : }
   26158              : 
   26159              : /* x86-specific vector costs.  */
   26160              : class ix86_vector_costs : public vector_costs
   26161              : {
   26162              : public:
   26163              :   ix86_vector_costs (vec_info *, bool);
   26164              : 
   26165              :   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
   26166              :                               stmt_vec_info stmt_info, slp_tree node,
   26167              :                               tree vectype, int misalign,
   26168              :                               vect_cost_model_location where) override;
   26169              :   void finish_cost (const vector_costs *) override;
   26170              :   bool better_main_loop_than_p (const vector_costs *) const override;
   26171              :   bool better_epilogue_loop_than_p (const vector_costs *other,
   26172              :                                     loop_vec_info main_loop) const;
   26173              : 
   26174              : private:
   26175              : 
   26176              :   /* Estimate register pressure of the vectorized code.  */
   26177              :   void ix86_vect_estimate_reg_pressure ();
   26178              :   /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
   26179              :      estimation of register pressure.
   26180              :      ??? Currently it's only used by vec_construct/scalar_to_vec
   26181              :      where we know it's not loaded from memory.  */
   26182              :   unsigned m_num_gpr_needed[3];
   26183              :   unsigned m_num_sse_needed[3];
   26184              :   /* Number of 256-bit vector permutation.  */
   26185              :   unsigned m_num_avx256_vec_perm[3];
   26186              :   /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR  */
   26187              :   unsigned m_num_reduc[X86_REDUC_LAST];
   26188              :   /* Don't do unroll if m_prefer_unroll is false, default is true.  */
   26189              :   bool m_prefer_unroll;
   26190              : };
   26191              : 
   26192      2101264 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   26193              :   : vector_costs (vinfo, costing_for_scalar),
   26194      2101264 :     m_num_gpr_needed (),
   26195      2101264 :     m_num_sse_needed (),
   26196      2101264 :     m_num_avx256_vec_perm (),
   26197      2101264 :     m_num_reduc (),
   26198      2101264 :     m_prefer_unroll (true)
   26199      2101264 : {}
   26200              : 
   26201              : /* Implement targetm.vectorize.create_costs.  */
   26202              : 
   26203              : static vector_costs *
   26204      2101264 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
   26205              : {
   26206      2101264 :   return new ix86_vector_costs (vinfo, costing_for_scalar);
   26207              : }
   26208              : 
   26209              : unsigned
   26210      7499381 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   26211              :                                   stmt_vec_info stmt_info, slp_tree node,
   26212              :                                   tree vectype, int,
   26213              :                                   vect_cost_model_location where)
   26214              : {
   26215      7499381 :   unsigned retval = 0;
   26216      7499381 :   bool scalar_p
   26217              :     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
   26218      7499381 :   int stmt_cost = - 1;
   26219              : 
   26220      7499381 :   bool fp = false;
   26221      7499381 :   machine_mode mode = scalar_p ? SImode : TImode;
   26222              : 
   26223      7499381 :   if (vectype != NULL)
   26224              :     {
   26225      3374739 :       fp = FLOAT_TYPE_P (vectype);
   26226      3374739 :       mode = TYPE_MODE (vectype);
   26227      3374739 :       if (scalar_p)
   26228       284497 :         mode = TYPE_MODE (TREE_TYPE (vectype));
   26229              :     }
   26230              :   /* When we are costing a scalar stmt use the scalar stmt to get at the
   26231              :      type of the operation.  */
   26232      4124642 :   else if (scalar_p && stmt_info)
   26233      4041207 :     if (tree lhs = gimple_get_lhs (stmt_info->stmt))
   26234              :       {
   26235      3865540 :         fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
   26236      3865540 :         mode = TYPE_MODE (TREE_TYPE (lhs));
   26237              :       }
   26238              : 
   26239      7499381 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26240      1976343 :       && stmt_info
   26241      9466596 :       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
   26242              :     {
   26243      1583998 :       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26244              :       /*machine_mode inner_mode = mode;
   26245              :       if (VECTOR_MODE_P (mode))
   26246              :         inner_mode = GET_MODE_INNER (mode);*/
   26247              : 
   26248      1583998 :       switch (subcode)
   26249              :         {
   26250       618535 :         case PLUS_EXPR:
   26251       618535 :         case POINTER_PLUS_EXPR:
   26252       618535 :         case MINUS_EXPR:
   26253       618535 :           if (kind == scalar_stmt)
   26254              :             {
   26255       391411 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26256        96973 :                 stmt_cost = ix86_cost->addss;
   26257       294438 :               else if (X87_FLOAT_MODE_P (mode))
   26258          136 :                 stmt_cost = ix86_cost->fadd;
   26259              :               else
   26260       294302 :                 stmt_cost = ix86_cost->add;
   26261              :             }
   26262              :           else
   26263       227124 :             stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
   26264              :                                        : ix86_cost->sse_op);
   26265              :           break;
   26266              : 
   26267       253261 :         case MULT_EXPR:
   26268              :           /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
   26269              :              take it as MULT_EXPR.  */
   26270       253261 :         case MULT_HIGHPART_EXPR:
   26271       253261 :           stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26272       253261 :           break;
   26273              :           /* There's no direct instruction for WIDEN_MULT_EXPR,
   26274              :              take emulation into account.  */
   26275         1076 :         case WIDEN_MULT_EXPR:
   26276         2152 :           stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
   26277         1076 :                                             TYPE_UNSIGNED (vectype));
   26278         1076 :           break;
   26279              : 
   26280        10672 :         case NEGATE_EXPR:
   26281        10672 :           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26282         3547 :             stmt_cost = ix86_cost->sse_op;
   26283         7125 :           else if (X87_FLOAT_MODE_P (mode))
   26284            0 :             stmt_cost = ix86_cost->fchs;
   26285         7125 :           else if (VECTOR_MODE_P (mode))
   26286         3626 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26287              :           else
   26288         3499 :             stmt_cost = ix86_cost->add;
   26289              :           break;
   26290        14027 :         case TRUNC_DIV_EXPR:
   26291        14027 :         case CEIL_DIV_EXPR:
   26292        14027 :         case FLOOR_DIV_EXPR:
   26293        14027 :         case ROUND_DIV_EXPR:
   26294        14027 :         case TRUNC_MOD_EXPR:
   26295        14027 :         case CEIL_MOD_EXPR:
   26296        14027 :         case FLOOR_MOD_EXPR:
   26297        14027 :         case RDIV_EXPR:
   26298        14027 :         case ROUND_MOD_EXPR:
   26299        14027 :         case EXACT_DIV_EXPR:
   26300        14027 :           stmt_cost = ix86_division_cost (ix86_cost, mode);
   26301        14027 :           break;
   26302              : 
   26303        72462 :         case RSHIFT_EXPR:
   26304        72462 :         case LSHIFT_EXPR:
   26305        72462 :         case LROTATE_EXPR:
   26306        72462 :         case RROTATE_EXPR:
   26307        72462 :           {
   26308        72462 :             tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
   26309        72462 :             tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
   26310        72462 :             stmt_cost = ix86_shift_rotate_cost
   26311        72462 :                            (ix86_cost,
   26312              :                             (subcode == RSHIFT_EXPR
   26313        37787 :                              && !TYPE_UNSIGNED (TREE_TYPE (op1)))
   26314              :                             ? ASHIFTRT : LSHIFTRT, mode,
   26315        72462 :                             TREE_CODE (op2) == INTEGER_CST,
   26316        72462 :                             cst_and_fits_in_hwi (op2)
   26317        41413 :                             ? int_cst_value (op2) : -1,
   26318              :                             false, false, NULL, NULL);
   26319              :           }
   26320        72462 :           break;
   26321        98592 :         case NOP_EXPR:
   26322              :           /* Only sign-conversions are free.  */
   26323        98592 :           if (tree_nop_conversion_p
   26324        98592 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
   26325        98592 :                  TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
   26326              :             stmt_cost = 0;
   26327        98592 :           else if (fp)
   26328        10240 :             stmt_cost = vec_fp_conversion_cost
   26329        10240 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26330              :           break;
   26331              : 
   26332        23208 :         case FLOAT_EXPR:
   26333        23208 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26334        17412 :               stmt_cost = ix86_cost->cvtsi2ss;
   26335         5796 :             else if (X87_FLOAT_MODE_P (mode))
   26336              :               /* TODO: We do not have cost tables for x87.  */
   26337           50 :               stmt_cost = ix86_cost->fadd;
   26338              :             else
   26339         5746 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26340              :             break;
   26341              : 
   26342         2203 :         case FIX_TRUNC_EXPR:
   26343         2203 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26344            0 :               stmt_cost = ix86_cost->cvtss2si;
   26345         2203 :             else if (X87_FLOAT_MODE_P (mode))
   26346              :               /* TODO: We do not have cost tables for x87.  */
   26347            0 :               stmt_cost = ix86_cost->fadd;
   26348              :             else
   26349         2203 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26350              :             break;
   26351              : 
   26352        56015 :         case COND_EXPR:
   26353        56015 :           {
   26354              :             /* SSE2 conditinal move sequence is:
   26355              :                  pcmpgtd %xmm5, %xmm0 (accounted separately)
   26356              :                  pand    %xmm0, %xmm2
   26357              :                  pandn   %xmm1, %xmm0
   26358              :                  por     %xmm2, %xmm0
   26359              :                while SSE4 uses cmp + blend
   26360              :                and AVX512 masked moves.
   26361              : 
   26362              :                The condition is accounted separately since we usually have
   26363              :                  p = a < b
   26364              :                  c = p ? x : y
   26365              :                and we will account first statement as setcc.  Exception is when
   26366              :                p is loaded from memory as bool and then we will not acocunt
   26367              :                the compare, but there is no way to check for this.  */
   26368              : 
   26369        56015 :             int ninsns = TARGET_SSE4_1 ? 1 : 3;
   26370              : 
   26371              :             /* If one of parameters is 0 or -1 the sequence will be simplified:
   26372              :                (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
   26373        23446 :             if (ninsns > 1
   26374        23446 :                 && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26375        23095 :                     || zerop (gimple_assign_rhs3 (stmt_info->stmt))
   26376        13145 :                     || integer_minus_onep
   26377        13145 :                         (gimple_assign_rhs2 (stmt_info->stmt))
   26378        12716 :                     || integer_minus_onep
   26379        12716 :                         (gimple_assign_rhs3 (stmt_info->stmt))))
   26380              :               ninsns = 1;
   26381              : 
   26382        56015 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26383         5044 :               stmt_cost = ninsns * ix86_cost->sse_op;
   26384        50971 :             else if (X87_FLOAT_MODE_P (mode))
   26385              :               /* x87 requires conditional branch.  We don't have cost for
   26386              :                  that.  */
   26387              :               ;
   26388        50962 :             else if (VECTOR_MODE_P (mode))
   26389        21217 :               stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
   26390              :             else
   26391              :               /* compare (accounted separately) + cmov.  */
   26392        29745 :               stmt_cost = ix86_cost->add;
   26393              :           }
   26394              :           break;
   26395              : 
   26396        26740 :         case MIN_EXPR:
   26397        26740 :         case MAX_EXPR:
   26398        26740 :           if (fp)
   26399              :             {
   26400         1486 :               if (X87_FLOAT_MODE_P (mode)
   26401          520 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26402              :                 /* x87 requires conditional branch.  We don't have cost for
   26403              :                    that.  */
   26404              :                 ;
   26405              :               else
   26406              :                 /* minss  */
   26407         1486 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26408              :             }
   26409              :           else
   26410              :             {
   26411        25254 :               if (VECTOR_MODE_P (mode))
   26412              :                 {
   26413         5151 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26414              :                   /* vpmin was introduced in SSE3.
   26415              :                      SSE2 needs pcmpgtd + pand + pandn + pxor.
   26416              :                      If one of parameters is 0 or -1 the sequence is simplified
   26417              :                      to pcmpgtd + pand.  */
   26418         5151 :                   if (!TARGET_SSSE3)
   26419              :                     {
   26420         3191 :                       if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26421         4611 :                           || integer_minus_onep
   26422         1420 :                                 (gimple_assign_rhs2 (stmt_info->stmt)))
   26423         1771 :                         stmt_cost *= 2;
   26424              :                       else
   26425         1420 :                         stmt_cost *= 4;
   26426              :                     }
   26427              :                 }
   26428              :               else
   26429              :                 /* cmp + cmov.  */
   26430        20103 :                 stmt_cost = ix86_cost->add * 2;
   26431              :             }
   26432              :           break;
   26433              : 
   26434         1307 :         case ABS_EXPR:
   26435         1307 :         case ABSU_EXPR:
   26436         1307 :           if (fp)
   26437              :             {
   26438          471 :               if (X87_FLOAT_MODE_P (mode)
   26439          171 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26440              :                 /* fabs.  */
   26441            0 :                 stmt_cost = ix86_cost->fabs;
   26442              :               else
   26443              :                 /* andss of sign bit.  */
   26444          471 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26445              :             }
   26446              :           else
   26447              :             {
   26448          836 :               if (VECTOR_MODE_P (mode))
   26449              :                 {
   26450          105 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26451              :                   /* vabs was introduced in SSE3.
   26452              :                      SSE3 uses psrat + pxor + psub.  */
   26453          105 :                   if (!TARGET_SSSE3)
   26454           75 :                     stmt_cost *= 3;
   26455              :                 }
   26456              :               else
   26457              :                 /* neg + cmov.  */
   26458          731 :                 stmt_cost = ix86_cost->add * 2;
   26459              :             }
   26460              :           break;
   26461              : 
   26462       152216 :         case BIT_IOR_EXPR:
   26463       152216 :         case BIT_XOR_EXPR:
   26464       152216 :         case BIT_AND_EXPR:
   26465       152216 :         case BIT_NOT_EXPR:
   26466       152216 :           gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
   26467              :                       && !X87_FLOAT_MODE_P (mode));
   26468       152216 :           if (VECTOR_MODE_P (mode))
   26469        52913 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26470              :           else
   26471        99303 :             stmt_cost = ix86_cost->add;
   26472              :           break;
   26473              : 
   26474       253684 :         default:
   26475       253684 :           if (truth_value_p (subcode))
   26476              :             {
   26477        99000 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26478              :                 /* CMPccS? insructions are cheap, so use sse_op.  While they
   26479              :                    produce a mask which may need to be turned to 0/1 by and,
   26480              :                    expect that this will be optimized away in a common case.  */
   26481            0 :                 stmt_cost = ix86_cost->sse_op;
   26482        99000 :               else if (X87_FLOAT_MODE_P (mode))
   26483              :                 /* fcmp + setcc.  */
   26484            0 :                 stmt_cost = ix86_cost->fadd + ix86_cost->add;
   26485        99000 :               else if (VECTOR_MODE_P (mode))
   26486        20598 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26487              :               else
   26488              :                 /* setcc.  */
   26489        78402 :                 stmt_cost = ix86_cost->add;
   26490              :               break;
   26491              :             }
   26492              :           break;
   26493              :         }
   26494              :     }
   26495              : 
   26496              :   /* Record number of load/store/gather/scatter in vectorized body.  */
   26497      7499381 :   if (where == vect_body && !m_costing_for_scalar)
   26498              :     {
   26499      1948001 :       int scale = 1;
   26500      1948001 :       if (vectype
   26501      3887219 :           && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
   26502        59608 :               && TARGET_AVX512_SPLIT_REGS)
   26503      3878330 :               || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26504       118072 :                   && TARGET_AVX256_SPLIT_REGS)))
   26505              :         scale = 2;
   26506              : 
   26507      1948001 :       switch (kind)
   26508              :         {
   26509              :           /* Emulated gather/scatter or any scalarization.  */
   26510       112347 :         case scalar_load:
   26511       112347 :         case scalar_stmt:
   26512       112347 :         case scalar_store:
   26513       112347 :         case vector_gather_load:
   26514       112347 :         case vector_scatter_store:
   26515       112347 :           m_prefer_unroll = false;
   26516       112347 :           break;
   26517              : 
   26518       596137 :         case vector_stmt:
   26519       596137 :         case vec_to_scalar:
   26520              :           /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
   26521              :              unroll in the vectorizer will enable partial sum.  */
   26522       596137 :           if (stmt_info
   26523       596111 :               && vect_is_reduction (stmt_info)
   26524       669363 :               && stmt_info->stmt)
   26525              :             {
   26526              :               /* Handle __builtin_fma.  */
   26527        73226 :               if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
   26528              :                 {
   26529           11 :                   m_num_reduc[X86_REDUC_FMA] += count * scale;
   26530           11 :                   break;
   26531              :                 }
   26532              : 
   26533        73215 :               if (!is_gimple_assign (stmt_info->stmt))
   26534              :                 break;
   26535              : 
   26536        70381 :               tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26537        70381 :               machine_mode inner_mode = GET_MODE_INNER (mode);
   26538        70381 :               tree rhs1, rhs2;
   26539        70381 :               bool native_vnni_p = true;
   26540        70381 :               gimple* def;
   26541        70381 :               machine_mode mode_rhs;
   26542        70381 :               switch (subcode)
   26543              :                 {
   26544        53773 :                 case PLUS_EXPR:
   26545        53773 :                 case MINUS_EXPR:
   26546        53773 :                   if (!fp || !flag_associative_math
   26547        26148 :                       || flag_fp_contract_mode != FP_CONTRACT_FAST)
   26548              :                     break;
   26549              : 
   26550              :                   /* FMA condition for different modes.  */
   26551        26148 :                   if (((inner_mode == DFmode || inner_mode == SFmode)
   26552        26118 :                        && !TARGET_FMA && !TARGET_AVX512VL)
   26553         8489 :                       || (inner_mode == HFmode && !TARGET_AVX512FP16)
   26554         8489 :                       || (inner_mode == BFmode && !TARGET_AVX10_2))
   26555              :                     break;
   26556              : 
   26557              :                   /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
   26558              :                      to FMA/FNMA after vectorization.  */
   26559         8489 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26560         8489 :                   rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26561         8489 :                   if (subcode == PLUS_EXPR
   26562         6668 :                       && TREE_CODE (rhs1) == SSA_NAME
   26563         6668 :                       && (def = SSA_NAME_DEF_STMT (rhs1), true)
   26564         6668 :                       && is_gimple_assign (def)
   26565        11819 :                       && gimple_assign_rhs_code (def) == MULT_EXPR)
   26566         1951 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26567         6538 :                   else if (TREE_CODE (rhs2) == SSA_NAME
   26568         6538 :                            && (def = SSA_NAME_DEF_STMT (rhs2), true)
   26569         6538 :                            && is_gimple_assign (def)
   26570        12989 :                            && gimple_assign_rhs_code (def) == MULT_EXPR)
   26571         6445 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26572              :                   break;
   26573              : 
   26574              :                   /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
   26575              :                      WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
   26576              :                      SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR.  */
   26577          608 :                 case DOT_PROD_EXPR:
   26578          608 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26579          608 :                   mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
   26580          608 :                   if (mode_rhs == QImode)
   26581              :                     {
   26582          335 :                       rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26583          335 :                       signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
   26584          335 :                       signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
   26585              : 
   26586              :                       /* vpdpbusd.  */
   26587          335 :                       if (signop1_p != signop2_p)
   26588           85 :                         native_vnni_p
   26589           85 :                           = (GET_MODE_SIZE (mode) == 64
   26590           85 :                              ? TARGET_AVX512VNNI
   26591           28 :                              : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
   26592           85 :                                 || TARGET_AVXVNNI));
   26593              :                       else
   26594              :                         /* vpdpbssd.  */
   26595          250 :                         native_vnni_p
   26596          266 :                           = (GET_MODE_SIZE (mode) == 64
   26597          250 :                              ? TARGET_AVX10_2
   26598          234 :                              : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
   26599              :                     }
   26600          608 :                   m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
   26601              : 
   26602              :                   /* Dislike to do unroll and partial sum for
   26603              :                      emulated DOT_PROD_EXPR.  */
   26604          608 :                   if (!native_vnni_p)
   26605          151 :                     m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
   26606              :                   break;
   26607              : 
   26608          106 :                 case SAD_EXPR:
   26609          106 :                   m_num_reduc[X86_REDUC_SAD] += count * scale;
   26610          106 :                   break;
   26611              : 
   26612              :                 default:
   26613              :                   break;
   26614              :                 }
   26615              :             }
   26616              : 
   26617              :         default:
   26618              :           break;
   26619              :         }
   26620              :     }
   26621              : 
   26622              : 
   26623      7499381 :   combined_fn cfn;
   26624      7499381 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26625      1976343 :       && stmt_info
   26626      1967215 :       && stmt_info->stmt
   26627      9466596 :       && is_gimple_call (stmt_info->stmt))
   26628              :     {
   26629        26415 :       tree fndecl = gimple_call_fndecl (stmt_info->stmt);
   26630        26415 :       cgraph_node *node;
   26631        26415 :       if ((fndecl
   26632         5305 :            && (node = cgraph_node::get (fndecl))
   26633         5280 :            && node->simd_clones)
   26634        30703 :           || gimple_call_internal_p (stmt_info->stmt, IFN_MASK_CALL))
   26635         2484 :         stmt_cost = 10 * ix86_vec_cost (mode,
   26636         1242 :                                         mode == SFmode ? ix86_cost->fmass
   26637              :                                         : ix86_cost->fmasd);
   26638        25173 :       else if ((cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
   26639        23818 :         switch (cfn)
   26640              :           {
   26641          107 :           case CFN_FMA:
   26642          107 :             stmt_cost = ix86_vec_cost (mode,
   26643          107 :                                        mode == SFmode ? ix86_cost->fmass
   26644              :                                        : ix86_cost->fmasd);
   26645          107 :             break;
   26646           62 :           case CFN_MULH:
   26647           62 :             stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26648           62 :             break;
   26649              :           default:
   26650              :             break;
   26651              :           }
   26652              :     }
   26653              : 
   26654      7499381 :   if (kind == vec_promote_demote)
   26655              :     {
   26656        61358 :       int outer_size
   26657              :         = tree_to_uhwi
   26658        61358 :             (TYPE_SIZE
   26659        61358 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
   26660        61358 :       int inner_size
   26661              :         = tree_to_uhwi
   26662        61358 :             (TYPE_SIZE
   26663        61358 :                 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
   26664        61358 :       bool inner_fp = FLOAT_TYPE_P
   26665              :                         (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
   26666              : 
   26667         5581 :       if (fp && inner_fp)
   26668         5076 :         stmt_cost = vec_fp_conversion_cost
   26669         5076 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26670        56282 :       else if (fp && !inner_fp)
   26671         6116 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26672        50166 :       else if (!fp && inner_fp)
   26673          505 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26674              :       else
   26675        49661 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26676              :       /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
   26677              :          greater than inner size we will end up doing two conversions and
   26678              :          packing them.  We always pack pairs; if the size difference is greater
   26679              :          it is split into multiple demote operations.  */
   26680        61358 :       if (inner_size > outer_size)
   26681        23316 :         stmt_cost = stmt_cost * 2
   26682        23316 :                     + ix86_vec_cost (mode, ix86_cost->sse_op);
   26683              :     }
   26684              : 
   26685              :   /* If we do elementwise loads into a vector then we are bound by
   26686              :      latency and execution resources for the many scalar loads
   26687              :      (AGU and load ports).  Try to account for this by scaling the
   26688              :      construction cost by the number of elements involved.  */
   26689      7499381 :   if ((kind == vec_construct || kind == vec_to_scalar)
   26690      7499381 :       && ((node
   26691       430590 :            && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
   26692       443277 :                  || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
   26693        37623 :                      && SLP_TREE_LANES (node) == 1))
   26694        40332 :                 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
   26695              :                                         (SLP_TREE_REPRESENTATIVE (node))))
   26696              :                     != INTEGER_CST))
   26697        62775 :                || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
   26698              :     {
   26699        31420 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26700        31420 :       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
   26701              :     }
   26702      7467961 :   else if ((kind == vec_construct || kind == scalar_to_vec)
   26703       486537 :            && node
   26704       455040 :            && SLP_TREE_DEF_TYPE (node) == vect_external_def)
   26705              :     {
   26706       311626 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26707       311626 :       unsigned i;
   26708       311626 :       tree op;
   26709      1328833 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26710       705581 :         if (TREE_CODE (op) == SSA_NAME)
   26711       482353 :           TREE_VISITED (op) = 0;
   26712      1017207 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26713              :         {
   26714       705581 :           if (TREE_CODE (op) != SSA_NAME
   26715       482353 :               || TREE_VISITED (op))
   26716       257697 :             continue;
   26717       447884 :           TREE_VISITED (op) = 1;
   26718       447884 :           gimple *def = SSA_NAME_DEF_STMT (op);
   26719       447884 :           tree tem;
   26720              :           /* Look through a conversion.  */
   26721       447884 :           if (is_gimple_assign (def)
   26722       256281 :               && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
   26723        28713 :               && ((tem = gimple_assign_rhs1 (def)), true)
   26724       476597 :               && TREE_CODE (tem) == SSA_NAME)
   26725        28502 :             def = SSA_NAME_DEF_STMT (tem);
   26726              :           /* When the component is loaded from memory without sign-
   26727              :              or zero-extension we can move it to a vector register and/or
   26728              :              insert it via vpinsr with a memory operand.  */
   26729       447884 :           if (gimple_assign_load_p (def)
   26730       133827 :               && tree_nop_conversion_p (TREE_TYPE (op),
   26731       133827 :                                         TREE_TYPE (gimple_assign_lhs (def)))
   26732       709242 :               && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
   26733         5584 :                   || TARGET_SSE4_1))
   26734              :             ;
   26735              :           /* When the component is extracted from a vector it is already
   26736              :              in a vector register.  */
   26737       321768 :           else if (is_gimple_assign (def)
   26738       125229 :                    && gimple_assign_rhs_code (def) == BIT_FIELD_REF
   26739       324646 :                    && VECTOR_TYPE_P (TREE_TYPE
   26740              :                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
   26741              :             ;
   26742              :           else
   26743              :             {
   26744       319321 :               if (fp)
   26745              :                 {
   26746              :                   /* Scalar FP values residing in x87 registers need to be
   26747              :                      spilled and reloaded.  */
   26748        13684 :                   auto mode2 = TYPE_MODE (TREE_TYPE (op));
   26749        13684 :                   if (IS_STACK_MODE (mode2))
   26750              :                     {
   26751          967 :                       int cost
   26752              :                         = (ix86_cost->hard_register.fp_store[mode2 == SFmode
   26753          967 :                                                              ? 0 : 1]
   26754          967 :                            + ix86_cost->sse_load[sse_store_index (mode2)]);
   26755          967 :                       stmt_cost += COSTS_N_INSNS (cost) / 2;
   26756              :                     }
   26757        13684 :                   m_num_sse_needed[where]++;
   26758              :                 }
   26759              :               else
   26760              :                 {
   26761       305637 :                   m_num_gpr_needed[where]++;
   26762              : 
   26763       305637 :                   stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
   26764              :                 }
   26765              :             }
   26766              :         }
   26767      1017207 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26768       705581 :         if (TREE_CODE (op) == SSA_NAME)
   26769       482353 :           TREE_VISITED (op) = 0;
   26770              :     }
   26771      7499381 :   if (stmt_cost == -1)
   26772      5752613 :     stmt_cost = ix86_default_vector_cost (kind, mode);
   26773              : 
   26774      7499381 :   if (kind == vec_perm && vectype
   26775       198228 :       && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26776              :       /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body.  */
   26777      7503496 :       && count != 0)
   26778              :     {
   26779         4115 :       bool real_perm = true;
   26780         4115 :       unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
   26781              : 
   26782         4115 :       if (node
   26783         4112 :           && SLP_TREE_LOAD_PERMUTATION (node).exists ()
   26784              :           /* Loop vectorization will have 4 times vec_perm
   26785              :              with index as {0, 0, 0, 0}.
   26786              :              But it actually generates
   26787              :              vec_perm_expr <vect, vect, 0, 0, 0, 0>
   26788              :              vec_perm_expr <vect, vect, 1, 1, 1, 1>
   26789              :              vec_perm_expr <vect, vect, 2, 2, 2, 2>
   26790              :              Need to be handled separately.  */
   26791         7528 :           && is_a <bb_vec_info> (m_vinfo))
   26792              :         {
   26793           35 :           unsigned half = nunits / 2;
   26794           35 :           unsigned i = 0;
   26795           35 :           bool allsame = true;
   26796           35 :           unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
   26797           35 :           bool cross_lane_p = false;
   26798          184 :           for (i = 0 ; i != SLP_TREE_LANES (node); i++)
   26799              :             {
   26800          183 :               unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
   26801              :               /* allsame is just a broadcast.  */
   26802          183 :               if (tmp != first)
   26803           84 :                 allsame = false;
   26804              : 
   26805              :               /* 4 times vec_perm with number of lanes multiple of nunits.  */
   26806          183 :               tmp = tmp & (nunits - 1);
   26807          183 :               unsigned index = i & (nunits - 1);
   26808          183 :               if ((index < half && tmp >= half)
   26809          183 :                   || (index >= half && tmp < half))
   26810           59 :                 cross_lane_p = true;
   26811              : 
   26812          183 :               if (!allsame && cross_lane_p)
   26813              :                 break;
   26814              :             }
   26815              : 
   26816           35 :           if (i == SLP_TREE_LANES (node))
   26817              :             real_perm = false;
   26818              :         }
   26819              : 
   26820              :       if (real_perm)
   26821              :         {
   26822         4114 :           m_num_avx256_vec_perm[where] += count;
   26823         4114 :           if (dump_file && (dump_flags & TDF_DETAILS))
   26824              :             {
   26825          247 :               fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
   26826          247 :               if (stmt_info)
   26827          244 :                 print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
   26828          247 :               fprintf (dump_file, " \n");
   26829              :             }
   26830              :         }
   26831              :     }
   26832              : 
   26833              :   /* Penalize DFmode vector operations for Bonnell.  */
   26834      7499381 :   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
   26835      7499464 :       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
   26836           12 :     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
   26837              : 
   26838              :   /* Statements in an inner loop relative to the loop being
   26839              :      vectorized are weighted more heavily.  The value here is
   26840              :      arbitrary and could potentially be improved with analysis.  */
   26841      7499381 :   retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
   26842              : 
   26843              :   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
   26844              :      for Silvermont as it has out of order integer pipeline and can execute
   26845              :      2 scalar instruction per tick, but has in order SIMD pipeline.  */
   26846      7499381 :   if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
   26847      7499381 :        || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
   26848         2476 :       && stmt_info && stmt_info->stmt)
   26849              :     {
   26850         2132 :       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
   26851         2132 :       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
   26852         1597 :         retval = (retval * 17) / 10;
   26853              :     }
   26854              : 
   26855      7499381 :   m_costs[where] += retval;
   26856              : 
   26857      7499381 :   return retval;
   26858              : }
   26859              : 
   26860              : void
   26861      1813607 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
   26862              : {
   26863      1813607 :   unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
   26864      1813607 :   unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
   26865              : 
   26866              :   /* Any better way to have target available fp registers, currently use SSE_REGS.  */
   26867      1813607 :   unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
   26868      7254428 :   for (unsigned i = 0; i != 3; i++)
   26869              :     {
   26870      5440821 :       if (m_num_gpr_needed[i] > target_avail_regs)
   26871          754 :         m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
   26872              :       /* Only measure sse registers pressure.  */
   26873      5440821 :       if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
   26874           90 :         m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
   26875              :     }
   26876      1813607 : }
   26877              : 
   26878              : void
   26879      1813607 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
   26880              : {
   26881      1813607 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
   26882       488639 :   if (loop_vinfo && !m_costing_for_scalar)
   26883              :     {
   26884              :       /* We are currently not asking the vectorizer to compare costs
   26885              :          between different vector mode sizes.  When using predication
   26886              :          that will end up always choosing the prefered mode size even
   26887              :          if there's a smaller mode covering all lanes.  Test for this
   26888              :          situation and artificially reject the larger mode attempt.
   26889              :          ???  We currently lack masked ops for sub-SSE sized modes,
   26890              :          so we could restrict this rejection to AVX and AVX512 modes
   26891              :          but error on the safe side for now.  */
   26892       125122 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
   26893           26 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26894           16 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26895       125132 :           && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
   26896           20 :               > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
   26897            8 :         m_costs[vect_body] = INT_MAX;
   26898              : 
   26899              :       /* We'd like to avoid using masking if there's an in-order reduction
   26900              :          to vectorize because that will also perform in-order adds of
   26901              :          masked elements (as neutral value, of course) here, but there
   26902              :          is currently no way to indicate to try un-masked with the same
   26903              :          mode.  */
   26904              : 
   26905       125122 :       bool any_reduc_p = false;
   26906       497150 :       for (int i = 0; i != X86_REDUC_LAST; i++)
   26907       373284 :         if (m_num_reduc[i])
   26908              :           {
   26909              :             any_reduc_p = true;
   26910              :             break;
   26911              :           }
   26912              : 
   26913       125122 :       if (any_reduc_p
   26914              :           /* Not much gain for loop with gather and scatter.  */
   26915         1256 :           && m_prefer_unroll
   26916         1098 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
   26917              :         {
   26918         1770 :           unsigned unroll_factor
   26919          885 :             = OPTION_SET_P (ix86_vect_unroll_limit)
   26920          885 :             ? ix86_vect_unroll_limit
   26921          885 :             : ix86_cost->vect_unroll_limit;
   26922              : 
   26923          885 :           if (unroll_factor > 1)
   26924              :             {
   26925         3540 :               for (int i = 0 ; i != X86_REDUC_LAST; i++)
   26926              :                 {
   26927         2655 :                   if (m_num_reduc[i])
   26928              :                     {
   26929          885 :                       unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
   26930              :                                            m_num_reduc[i]);
   26931         2655 :                       unroll_factor = MIN (unroll_factor, tmp);
   26932              :                     }
   26933              :                 }
   26934              : 
   26935         1770 :               m_suggested_unroll_factor  = 1 << ceil_log2 (unroll_factor);
   26936              :             }
   26937              :         }
   26938              : 
   26939              :     }
   26940              : 
   26941      1813607 :   ix86_vect_estimate_reg_pressure ();
   26942              : 
   26943      7254428 :   for (int i = 0; i != 3; i++)
   26944      5440821 :     if (m_num_avx256_vec_perm[i]
   26945          517 :         && TARGET_AVX256_AVOID_VEC_PERM)
   26946            7 :       m_costs[i] = INT_MAX;
   26947              : 
   26948              :   /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
   26949              :      a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
   26950      1813607 :   if (loop_vinfo
   26951       488639 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26952        43740 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
   26953      1814378 :       && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26954           23 :     m_suggested_epilogue_mode = V16QImode;
   26955              :   /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
   26956              :      enable a 64bit SSE epilogue.  */
   26957      1813607 :   if (loop_vinfo
   26958       488639 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26959        43740 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
   26960      1816291 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
   26961          104 :     m_suggested_epilogue_mode = V8QImode;
   26962              : 
   26963              :   /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
   26964              :      a masked epilogue if that doesn't seem detrimental.  */
   26965      1813607 :   if (loop_vinfo
   26966       488639 :       && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26967       466769 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
   26968              :       /* Avoid a masked epilog if cascaded epilogues eventually get us
   26969              :          to one with VF 1 as that means no scalar epilog at all.  */
   26970        75886 :       && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
   26971        75886 :             / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
   26972           34 :            && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26973        75885 :       && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
   26974      1813779 :       && !OPTION_SET_P (param_vect_partial_vector_usage))
   26975              :     {
   26976          154 :       bool avoid = false;
   26977          154 :       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26978          122 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
   26979              :         {
   26980          122 :           unsigned int peel_niter
   26981              :             = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   26982          122 :           if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
   26983            0 :             peel_niter += 1;
   26984              :           /* When we know the number of scalar iterations of the epilogue,
   26985              :              avoid masking when a single vector epilog iteration handles
   26986              :              it in full.  */
   26987          122 :           if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
   26988          122 :                          % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
   26989              :             avoid = true;
   26990              :         }
   26991          152 :       if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
   26992           14 :         for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
   26993              :           {
   26994            4 :             if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   26995              :               ;
   26996            4 :             else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
   26997              :               ;
   26998              :             else
   26999              :               {
   27000            2 :                 int loop_depth
   27001            4 :                     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   27002            2 :                                           DDR_LOOP_NEST (ddr));
   27003            4 :                 if (DDR_NUM_DIST_VECTS (ddr) == 1
   27004            2 :                     && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
   27005              :                   {
   27006              :                     /* Avoid the case when there's an outer loop that might
   27007              :                        traverse a multi-dimensional array with the inner
   27008              :                        loop just executing the masked epilogue with a
   27009              :                        read-write where the next outer iteration might
   27010              :                        read from the masked part of the previous write,
   27011              :                        'n' filling half a vector.
   27012              :                          for (j = 0; j < m; ++j)
   27013              :                            for (i = 0; i < n; ++i)
   27014              :                              a[j][i] = c * a[j][i];  */
   27015              :                     avoid = true;
   27016              :                     break;
   27017              :                   }
   27018              :               }
   27019              :           }
   27020              :       /* Avoid using masking if there's an in-order reduction
   27021              :          to vectorize because that will also perform in-order adds of
   27022              :          masked elements (as neutral value, of course).  */
   27023          154 :       if (!avoid)
   27024              :         {
   27025          604 :           for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
   27026          158 :             if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
   27027          158 :                 && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
   27028              :                     == FOLD_LEFT_REDUCTION))
   27029              :               {
   27030              :                 avoid = true;
   27031              :                 break;
   27032              :               }
   27033              :         }
   27034          150 :       if (!avoid)
   27035              :         {
   27036          146 :           m_suggested_epilogue_mode = loop_vinfo->vector_mode;
   27037          146 :           m_masked_epilogue = 1;
   27038              :         }
   27039              :     }
   27040              : 
   27041      1813607 :   vector_costs::finish_cost (scalar_costs);
   27042      1813607 : }
   27043              : 
   27044              : /* Return true if THIS should be preferred over OTHER as main vector loop.  */
   27045              : 
   27046              : bool
   27047        30878 : ix86_vector_costs::better_main_loop_than_p (const vector_costs *other) const
   27048              : {
   27049        30878 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->vinfo ());
   27050        30878 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->vinfo ());
   27051              : 
   27052              :   /* If the other loop is masked it does not need an epilog.  Prefer that
   27053              :      if the current loop cannot be vectorized fully with a vector
   27054              :      epilogs with at most one scalar iteration left.  */
   27055        21050 :   if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
   27056        21050 :       && LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
   27057            4 :       && known_gt (LOOP_VINFO_VECT_FACTOR (other_loop_vinfo),
   27058              :                    LOOP_VINFO_INT_NITERS (this_loop_vinfo))
   27059        30882 :       && (popcount_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo) & ~1)
   27060            4 :           > (param_vect_epilogues_nomask != 0)))
   27061              :     return false;
   27062              : 
   27063        30874 :   return vector_costs::better_main_loop_than_p (other);
   27064              : }
   27065              : 
   27066              : /* Return true if THIS should be preferred over OTHER as epilog vector
   27067              :    loop when vectorizing MAIN_LOOP.  */
   27068              : 
   27069              : bool
   27070         1515 : ix86_vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
   27071              :                                                 loop_vec_info main_loop) const
   27072              : {
   27073         1515 :   loop_vec_info this_loop_info = as_a <loop_vec_info> (this->vinfo ());
   27074              :   /* The x86 target allows for multiple vector epilogues, if THIS is
   27075              :      the suggested epilog mode of OTHER then keep the latter unless
   27076              :      THIS has a VF of one which means no further epilog needed.  */
   27077         1515 :   int tem;
   27078         1515 :   if (known_gt (LOOP_VINFO_VECT_FACTOR (this_loop_info), 1U)
   27079         1515 :       && (GET_MODE_SIZE (other->suggested_epilogue_mode (tem))
   27080         2980 :           == GET_MODE_SIZE (this_loop_info->vector_mode)))
   27081              :     return false;
   27082         1438 :   return vector_costs::better_epilogue_loop_than_p (other, main_loop);
   27083              : }
   27084              : 
   27085              : /* Validate target specific memory model bits in VAL. */
   27086              : 
   27087              : static unsigned HOST_WIDE_INT
   27088       410611 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
   27089              : {
   27090       410611 :   enum memmodel model = memmodel_from_int (val);
   27091       410611 :   bool strong;
   27092              : 
   27093       410611 :   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
   27094              :                                       |MEMMODEL_MASK)
   27095       410607 :       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
   27096              :     {
   27097            4 :       warning (OPT_Winvalid_memory_model,
   27098              :                "unknown architecture specific memory model");
   27099            4 :       return MEMMODEL_SEQ_CST;
   27100              :     }
   27101       410607 :   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
   27102       410607 :   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
   27103              :     {
   27104            0 :       warning (OPT_Winvalid_memory_model,
   27105              :               "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
   27106              :                "memory model");
   27107            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
   27108              :     }
   27109       410607 :   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
   27110              :     {
   27111            0 :       warning (OPT_Winvalid_memory_model,
   27112              :               "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
   27113              :                "memory model");
   27114            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
   27115              :     }
   27116              :   return val;
   27117              : }
   27118              : 
   27119              : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
   27120              :    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
   27121              :    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
   27122              :    or number of vecsize_mangle variants that should be emitted.  */
   27123              : 
   27124              : static int
   27125         7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   27126              :                                              struct cgraph_simd_clone *clonei,
   27127              :                                              tree base_type, int num,
   27128              :                                              bool explicit_p)
   27129              : {
   27130         7593 :   int ret = 1;
   27131              : 
   27132         7593 :   if (clonei->simdlen
   27133         7593 :       && (clonei->simdlen < 2
   27134         1321 :           || clonei->simdlen > 1024
   27135         1321 :           || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
   27136              :     {
   27137            0 :       if (explicit_p)
   27138            0 :         warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27139              :                     "unsupported simdlen %wd", clonei->simdlen.to_constant ());
   27140            0 :       return 0;
   27141              :     }
   27142              : 
   27143         7593 :   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
   27144         7593 :   if (TREE_CODE (ret_type) != VOID_TYPE)
   27145         6801 :     switch (TYPE_MODE (ret_type))
   27146              :       {
   27147         6801 :       case E_QImode:
   27148         6801 :       case E_HImode:
   27149         6801 :       case E_SImode:
   27150         6801 :       case E_DImode:
   27151         6801 :       case E_SFmode:
   27152         6801 :       case E_DFmode:
   27153              :       /* case E_SCmode: */
   27154              :       /* case E_DCmode: */
   27155         6801 :         if (!AGGREGATE_TYPE_P (ret_type))
   27156              :           break;
   27157              :         /* FALLTHRU */
   27158            2 :       default:
   27159            2 :         if (explicit_p)
   27160            2 :           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27161              :                       "unsupported return type %qT for simd", ret_type);
   27162            2 :         return 0;
   27163              :       }
   27164              : 
   27165         7591 :   tree t;
   27166         7591 :   int i;
   27167         7591 :   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
   27168         7591 :   bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
   27169              : 
   27170         7591 :   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
   27171        20438 :        t && t != void_list_node; t = TREE_CHAIN (t), i++)
   27172              :     {
   27173        16678 :       tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
   27174        12852 :       switch (TYPE_MODE (arg_type))
   27175              :         {
   27176        12833 :         case E_QImode:
   27177        12833 :         case E_HImode:
   27178        12833 :         case E_SImode:
   27179        12833 :         case E_DImode:
   27180        12833 :         case E_SFmode:
   27181        12833 :         case E_DFmode:
   27182              :         /* case E_SCmode: */
   27183              :         /* case E_DCmode: */
   27184        12833 :           if (!AGGREGATE_TYPE_P (arg_type))
   27185              :             break;
   27186              :           /* FALLTHRU */
   27187           41 :         default:
   27188           41 :           if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
   27189              :             break;
   27190            5 :           if (explicit_p)
   27191            5 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27192              :                         "unsupported argument type %qT for simd", arg_type);
   27193              :           return 0;
   27194              :         }
   27195              :     }
   27196              : 
   27197         7586 :   if (!TREE_PUBLIC (node->decl) || !explicit_p)
   27198              :     {
   27199              :       /* If the function isn't exported, we can pick up just one ISA
   27200              :          for the clones.  */
   27201          114 :       if (TARGET_AVX512F)
   27202            0 :         clonei->vecsize_mangle = 'e';
   27203          114 :       else if (TARGET_AVX2)
   27204            1 :         clonei->vecsize_mangle = 'd';
   27205          113 :       else if (TARGET_AVX)
   27206           88 :         clonei->vecsize_mangle = 'c';
   27207              :       else
   27208           25 :         clonei->vecsize_mangle = 'b';
   27209              :       ret = 1;
   27210              :     }
   27211              :   else
   27212              :     {
   27213         7472 :       clonei->vecsize_mangle = "bcde"[num];
   27214         7472 :       ret = 4;
   27215              :     }
   27216         7586 :   clonei->mask_mode = VOIDmode;
   27217         7586 :   switch (clonei->vecsize_mangle)
   27218              :     {
   27219         1893 :     case 'b':
   27220         1893 :       clonei->vecsize_int = 128;
   27221         1893 :       clonei->vecsize_float = 128;
   27222         1893 :       break;
   27223         1956 :     case 'c':
   27224         1956 :       clonei->vecsize_int = 128;
   27225         1956 :       clonei->vecsize_float = 256;
   27226         1956 :       break;
   27227         1869 :     case 'd':
   27228         1869 :       clonei->vecsize_int = 256;
   27229         1869 :       clonei->vecsize_float = 256;
   27230         1869 :       break;
   27231         1868 :     case 'e':
   27232         1868 :       clonei->vecsize_int = 512;
   27233         1868 :       clonei->vecsize_float = 512;
   27234         1868 :       if (TYPE_MODE (base_type) == QImode)
   27235           19 :         clonei->mask_mode = DImode;
   27236              :       else
   27237         1849 :         clonei->mask_mode = SImode;
   27238              :       break;
   27239              :     }
   27240         7586 :   if (clonei->simdlen == 0)
   27241              :     {
   27242         6265 :       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
   27243         3297 :         clonei->simdlen = clonei->vecsize_int;
   27244              :       else
   27245         2968 :         clonei->simdlen = clonei->vecsize_float;
   27246         6265 :       clonei->simdlen = clonei->simdlen
   27247        12530 :                         / GET_MODE_BITSIZE (TYPE_MODE (base_type));
   27248              :     }
   27249         1321 :   else if (clonei->simdlen > 16)
   27250              :     {
   27251              :       /* For compatibility with ICC, use the same upper bounds
   27252              :          for simdlen.  In particular, for CTYPE below, use the return type,
   27253              :          unless the function returns void, in that case use the characteristic
   27254              :          type.  If it is possible for given SIMDLEN to pass CTYPE value
   27255              :          in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
   27256              :          for 64-bit code), accept that SIMDLEN, otherwise warn and don't
   27257              :          emit corresponding clone.  */
   27258           12 :       tree ctype = ret_type;
   27259           12 :       if (VOID_TYPE_P (ret_type))
   27260            0 :         ctype = base_type;
   27261           24 :       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
   27262           12 :       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
   27263            8 :         cnt /= clonei->vecsize_int;
   27264              :       else
   27265            4 :         cnt /= clonei->vecsize_float;
   27266           12 :       if (cnt > (TARGET_64BIT ? 16 : 8))
   27267              :         {
   27268            0 :           if (explicit_p)
   27269            0 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27270              :                         "unsupported simdlen %wd",
   27271              :                         clonei->simdlen.to_constant ());
   27272            0 :           return 0;
   27273              :         }
   27274              :       }
   27275              :   return ret;
   27276              : }
   27277              : 
   27278              : /* If SIMD clone NODE can't be used in a vectorized loop
   27279              :    in current function, return -1, otherwise return a badness of using it
   27280              :    (0 if it is most desirable from vecsize_mangle point of view, 1
   27281              :    slightly less desirable, etc.).  */
   27282              : 
   27283              : static int
   27284         1790 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
   27285              : {
   27286         1790 :   switch (node->simdclone->vecsize_mangle)
   27287              :     {
   27288          638 :     case 'b':
   27289          638 :       if (!TARGET_SSE2)
   27290              :         return -1;
   27291          638 :       if (!TARGET_AVX)
   27292              :         return 0;
   27293          537 :       return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
   27294          630 :     case 'c':
   27295          630 :       if (!TARGET_AVX)
   27296              :         return -1;
   27297          585 :       return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
   27298          334 :     case 'd':
   27299          334 :       if (!TARGET_AVX2)
   27300              :         return -1;
   27301          141 :       return TARGET_AVX512F ? 1 : 0;
   27302          188 :     case 'e':
   27303          188 :       if (!TARGET_AVX512F)
   27304          130 :         return -1;
   27305              :       return 0;
   27306            0 :     default:
   27307            0 :       gcc_unreachable ();
   27308              :     }
   27309              : }
   27310              : 
   27311              : /* This function adjusts the unroll factor based on
   27312              :    the hardware capabilities. For ex, bdver3 has
   27313              :    a loop buffer which makes unrolling of smaller
   27314              :    loops less important. This function decides the
   27315              :    unroll factor using number of memory references
   27316              :    (value 32 is used) as a heuristic. */
   27317              : 
   27318              : static unsigned
   27319       808322 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
   27320              : {
   27321       808322 :   basic_block *bbs;
   27322       808322 :   rtx_insn *insn;
   27323       808322 :   unsigned i;
   27324       808322 :   unsigned mem_count = 0;
   27325              : 
   27326              :   /* Unroll small size loop when unroll factor is not explicitly
   27327              :      specified.  */
   27328       808322 :   if (ix86_unroll_only_small_loops && !loop->unroll)
   27329              :     {
   27330       762614 :       if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
   27331        72386 :         return MIN (nunroll, ix86_cost->small_unroll_factor);
   27332              :       else
   27333              :         return 1;
   27334              :     }
   27335              : 
   27336        45708 :   if (!TARGET_ADJUST_UNROLL)
   27337              :      return nunroll;
   27338              : 
   27339              :   /* Count the number of memory references within the loop body.
   27340              :      This value determines the unrolling factor for bdver3 and bdver4
   27341              :      architectures. */
   27342            8 :   subrtx_iterator::array_type array;
   27343            8 :   bbs = get_loop_body (loop);
   27344           24 :   for (i = 0; i < loop->num_nodes; i++)
   27345          120 :     FOR_BB_INSNS (bbs[i], insn)
   27346          104 :       if (NONDEBUG_INSN_P (insn))
   27347          588 :         FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
   27348          516 :           if (const_rtx x = *iter)
   27349          516 :             if (MEM_P (x))
   27350              :               {
   27351           28 :                 machine_mode mode = GET_MODE (x);
   27352           56 :                 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
   27353           28 :                 if (n_words > 4)
   27354            0 :                   mem_count += 2;
   27355              :                 else
   27356           28 :                   mem_count += 1;
   27357              :               }
   27358            8 :   free (bbs);
   27359              : 
   27360            8 :   if (mem_count && mem_count <=32)
   27361            8 :     return MIN (nunroll, 32 / mem_count);
   27362              : 
   27363              :   return nunroll;
   27364            8 : }
   27365              : 
   27366              : 
   27367              : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
   27368              : 
   27369              : static bool
   27370       418884 : ix86_float_exceptions_rounding_supported_p (void)
   27371              : {
   27372              :   /* For x87 floating point with standard excess precision handling,
   27373              :      there is no adddf3 pattern (since x87 floating point only has
   27374              :      XFmode operations) so the default hook implementation gets this
   27375              :      wrong.  */
   27376       418884 :   return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
   27377              : }
   27378              : 
   27379              : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
   27380              : 
   27381              : static void
   27382         7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   27383              : {
   27384         7054 :   if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
   27385              :     return;
   27386         7054 :   tree exceptions_var = create_tmp_var_raw (integer_type_node);
   27387         7054 :   if (TARGET_80387)
   27388              :     {
   27389         7054 :       tree fenv_index_type = build_index_type (size_int (6));
   27390         7054 :       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
   27391         7054 :       tree fenv_var = create_tmp_var_raw (fenv_type);
   27392         7054 :       TREE_ADDRESSABLE (fenv_var) = 1;
   27393         7054 :       tree fenv_ptr = build_pointer_type (fenv_type);
   27394         7054 :       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
   27395         7054 :       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
   27396         7054 :       tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
   27397         7054 :       tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
   27398         7054 :       tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
   27399         7054 :       tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
   27400         7054 :       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
   27401         7054 :       tree hold_fnclex = build_call_expr (fnclex, 0);
   27402         7054 :       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
   27403              :                          NULL_TREE, NULL_TREE);
   27404         7054 :       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
   27405              :                       hold_fnclex);
   27406         7054 :       *clear = build_call_expr (fnclex, 0);
   27407         7054 :       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
   27408         7054 :       tree fnstsw_call = build_call_expr (fnstsw, 0);
   27409         7054 :       tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
   27410              :                             fnstsw_call, NULL_TREE, NULL_TREE);
   27411         7054 :       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
   27412         7054 :       tree update_mod = build4 (TARGET_EXPR, integer_type_node,
   27413              :                                 exceptions_var, exceptions_x87,
   27414              :                                 NULL_TREE, NULL_TREE);
   27415         7054 :       *update = build2 (COMPOUND_EXPR, integer_type_node,
   27416              :                         sw_mod, update_mod);
   27417         7054 :       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
   27418         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
   27419              :     }
   27420         7054 :   if (TARGET_SSE && TARGET_SSE_MATH)
   27421              :     {
   27422         7054 :       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
   27423         7054 :       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
   27424         7054 :       tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
   27425         7054 :       tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
   27426         7054 :       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
   27427         7054 :       tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
   27428              :                                       mxcsr_orig_var, stmxcsr_hold_call,
   27429              :                                       NULL_TREE, NULL_TREE);
   27430         7054 :       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
   27431              :                                   mxcsr_orig_var,
   27432              :                                   build_int_cst (unsigned_type_node, 0x1f80));
   27433         7054 :       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
   27434              :                              build_int_cst (unsigned_type_node, 0xffffffc0));
   27435         7054 :       tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
   27436              :                                      mxcsr_mod_var, hold_mod_val,
   27437              :                                      NULL_TREE, NULL_TREE);
   27438         7054 :       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27439         7054 :       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
   27440              :                               hold_assign_orig, hold_assign_mod);
   27441         7054 :       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
   27442              :                          ldmxcsr_hold_call);
   27443         7054 :       if (*hold)
   27444         7054 :         *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
   27445              :       else
   27446            0 :         *hold = hold_all;
   27447         7054 :       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27448         7054 :       if (*clear)
   27449         7054 :         *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
   27450              :                          ldmxcsr_clear_call);
   27451              :       else
   27452            0 :         *clear = ldmxcsr_clear_call;
   27453         7054 :       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
   27454         7054 :       tree exceptions_sse = fold_convert (integer_type_node,
   27455              :                                           stxmcsr_update_call);
   27456         7054 :       if (*update)
   27457              :         {
   27458         7054 :           tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
   27459              :                                         exceptions_var, exceptions_sse);
   27460         7054 :           tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
   27461              :                                            exceptions_var, exceptions_mod);
   27462         7054 :           *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
   27463              :                             exceptions_assign);
   27464              :         }
   27465              :       else
   27466            0 :         *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
   27467              :                           exceptions_sse, NULL_TREE, NULL_TREE);
   27468         7054 :       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
   27469         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27470              :                         ldmxcsr_update_call);
   27471              :     }
   27472         7054 :   tree atomic_feraiseexcept
   27473         7054 :     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
   27474         7054 :   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
   27475              :                                                     1, exceptions_var);
   27476         7054 :   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27477              :                     atomic_feraiseexcept_call);
   27478              : }
   27479              : 
   27480              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   27481              : /* For i386, common symbol is local only for non-PIE binaries.  For
   27482              :    x86-64, common symbol is local only for non-PIE binaries or linker
   27483              :    supports copy reloc in PIE binaries.   */
   27484              : 
   27485              : static bool
   27486    773860184 : ix86_binds_local_p (const_tree exp)
   27487              : {
   27488    773860184 :   bool direct_extern_access
   27489    773860184 :     = (ix86_direct_extern_access
   27490   1544201800 :        && !(VAR_OR_FUNCTION_DECL_P (exp)
   27491    770341616 :             && lookup_attribute ("nodirect_extern_access",
   27492    770341616 :                                  DECL_ATTRIBUTES (exp))));
   27493    773860184 :   if (!direct_extern_access)
   27494         1225 :     ix86_has_no_direct_extern_access = true;
   27495    773860184 :   return default_binds_local_p_3 (exp, flag_shlib != 0, true,
   27496              :                                   direct_extern_access,
   27497              :                                   (direct_extern_access
   27498    773858959 :                                    && (!flag_pic
   27499    132684209 :                                        || (TARGET_64BIT
   27500    773860184 :                                            && HAVE_LD_PIE_COPYRELOC != 0))));
   27501              : }
   27502              : 
   27503              : /* If flag_pic or ix86_direct_extern_access is false, then neither
   27504              :    local nor global relocs should be placed in readonly memory.  */
   27505              : 
   27506              : static int
   27507      5141460 : ix86_reloc_rw_mask (void)
   27508              : {
   27509      5141460 :   return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
   27510              : }
   27511              : #endif
   27512              : 
   27513              : /* Return true iff ADDR can be used as a symbolic base address.  */
   27514              : 
   27515              : static bool
   27516         3004 : symbolic_base_address_p (rtx addr)
   27517              : {
   27518            0 :   if (SYMBOL_REF_P (addr))
   27519              :     return true;
   27520              : 
   27521         2980 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
   27522            0 :     return true;
   27523              : 
   27524              :   return false;
   27525              : }
   27526              : 
   27527              : /* Return true iff ADDR can be used as a base address.  */
   27528              : 
   27529              : static bool
   27530         4577 : base_address_p (rtx addr)
   27531              : {
   27532            0 :   if (REG_P (addr))
   27533              :     return true;
   27534              : 
   27535         2830 :   if (symbolic_base_address_p (addr))
   27536            0 :     return true;
   27537              : 
   27538              :   return false;
   27539              : }
   27540              : 
   27541              : /* If MEM is in the form of [(base+symbase)+offset], extract the three
   27542              :    parts of address and set to BASE, SYMBASE and OFFSET, otherwise
   27543              :    return false.  */
   27544              : 
   27545              : static bool
   27546         2953 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
   27547              : {
   27548         2953 :   rtx addr;
   27549              : 
   27550         2953 :   gcc_assert (MEM_P (mem));
   27551              : 
   27552         2953 :   addr = XEXP (mem, 0);
   27553              : 
   27554         2953 :   if (GET_CODE (addr) == CONST)
   27555           10 :     addr = XEXP (addr, 0);
   27556              : 
   27557         2953 :   if (base_address_p (addr))
   27558              :     {
   27559         1329 :       *base = addr;
   27560         1329 :       *symbase = const0_rtx;
   27561         1329 :       *offset = const0_rtx;
   27562         1329 :       return true;
   27563              :     }
   27564              : 
   27565         1624 :   if (GET_CODE (addr) == PLUS
   27566         1624 :       && base_address_p (XEXP (addr, 0)))
   27567              :     {
   27568          442 :       rtx addend = XEXP (addr, 1);
   27569              : 
   27570          442 :       if (GET_CODE (addend) == CONST)
   27571            0 :         addend = XEXP (addend, 0);
   27572              : 
   27573          442 :       if (CONST_INT_P (addend))
   27574              :         {
   27575          268 :           *base = XEXP (addr, 0);
   27576          268 :           *symbase = const0_rtx;
   27577          268 :           *offset = addend;
   27578          268 :           return true;
   27579              :         }
   27580              : 
   27581              :       /* Also accept REG + symbolic ref, with or without a CONST_INT
   27582              :          offset.  */
   27583          174 :       if (REG_P (XEXP (addr, 0)))
   27584              :         {
   27585          174 :           if (symbolic_base_address_p (addend))
   27586              :             {
   27587            0 :               *base = XEXP (addr, 0);
   27588            0 :               *symbase = addend;
   27589            0 :               *offset = const0_rtx;
   27590            0 :               return true;
   27591              :             }
   27592              : 
   27593          174 :           if (GET_CODE (addend) == PLUS
   27594            0 :               && symbolic_base_address_p (XEXP (addend, 0))
   27595          174 :               && CONST_INT_P (XEXP (addend, 1)))
   27596              :             {
   27597            0 :               *base = XEXP (addr, 0);
   27598            0 :               *symbase = XEXP (addend, 0);
   27599            0 :               *offset = XEXP (addend, 1);
   27600            0 :               return true;
   27601              :             }
   27602              :         }
   27603              :     }
   27604              : 
   27605              :   return false;
   27606              : }
   27607              : 
   27608              : /* Given OPERANDS of consecutive load/store, check if we can merge
   27609              :    them into move multiple.  LOAD is true if they are load instructions.
   27610              :    MODE is the mode of memory operands.  */
   27611              : 
   27612              : bool
   27613         1618 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
   27614              :                                     machine_mode mode)
   27615              : {
   27616         1618 :   HOST_WIDE_INT offval_1, offval_2, msize;
   27617         1618 :   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
   27618              :     symbase_1, symbase_2, offset_1, offset_2;
   27619              : 
   27620         1618 :   if (load)
   27621              :     {
   27622         1322 :       mem_1 = operands[1];
   27623         1322 :       mem_2 = operands[3];
   27624         1322 :       reg_1 = operands[0];
   27625         1322 :       reg_2 = operands[2];
   27626              :     }
   27627              :   else
   27628              :     {
   27629          296 :       mem_1 = operands[0];
   27630          296 :       mem_2 = operands[2];
   27631          296 :       reg_1 = operands[1];
   27632          296 :       reg_2 = operands[3];
   27633              :     }
   27634              : 
   27635         1618 :   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
   27636              : 
   27637         1618 :   if (REGNO (reg_1) != REGNO (reg_2))
   27638              :     return false;
   27639              : 
   27640              :   /* Check if the addresses are in the form of [base+offset].  */
   27641         1616 :   if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
   27642              :     return false;
   27643         1337 :   if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
   27644              :     return false;
   27645              : 
   27646              :   /* Check if the bases are the same.  */
   27647          260 :   if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
   27648          115 :     return false;
   27649              : 
   27650          145 :   offval_1 = INTVAL (offset_1);
   27651          145 :   offval_2 = INTVAL (offset_2);
   27652          145 :   msize = GET_MODE_SIZE (mode);
   27653              :   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
   27654          145 :   if (offval_1 + msize != offval_2)
   27655              :     return false;
   27656              : 
   27657              :   return true;
   27658              : }
   27659              : 
   27660              : /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
   27661              : 
   27662              : static bool
   27663       363920 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
   27664              :                         optimization_type opt_type)
   27665              : {
   27666       363920 :   switch (op)
   27667              :     {
   27668          231 :     case asin_optab:
   27669          231 :     case acos_optab:
   27670          231 :     case log1p_optab:
   27671          231 :     case exp_optab:
   27672          231 :     case exp10_optab:
   27673          231 :     case exp2_optab:
   27674          231 :     case expm1_optab:
   27675          231 :     case ldexp_optab:
   27676          231 :     case scalb_optab:
   27677          231 :     case round_optab:
   27678          231 :     case lround_optab:
   27679          231 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27680              : 
   27681          286 :     case rint_optab:
   27682          286 :       if (SSE_FLOAT_MODE_P (mode1)
   27683          139 :           && TARGET_SSE_MATH
   27684          127 :           && !flag_trapping_math
   27685           21 :           && !TARGET_SSE4_1
   27686              :           && mode1 != HFmode)
   27687           21 :         return opt_type == OPTIMIZE_FOR_SPEED;
   27688              :       return true;
   27689              : 
   27690         1972 :     case floor_optab:
   27691         1972 :     case ceil_optab:
   27692         1972 :     case btrunc_optab:
   27693         1972 :       if ((SSE_FLOAT_MODE_P (mode1)
   27694         1582 :            && TARGET_SSE_MATH
   27695         1515 :            && TARGET_SSE4_1)
   27696         1905 :           || mode1 == HFmode)
   27697              :         return true;
   27698         1836 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27699              : 
   27700           66 :     case rsqrt_optab:
   27701           66 :       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
   27702              : 
   27703              :     default:
   27704              :       return true;
   27705              :     }
   27706              : }
   27707              : 
   27708              : /* Address space support.
   27709              : 
   27710              :    This is not "far pointers" in the 16-bit sense, but an easy way
   27711              :    to use %fs and %gs segment prefixes.  Therefore:
   27712              : 
   27713              :     (a) All address spaces have the same modes,
   27714              :     (b) All address spaces have the same addresss forms,
   27715              :     (c) While %fs and %gs are technically subsets of the generic
   27716              :         address space, they are probably not subsets of each other.
   27717              :     (d) Since we have no access to the segment base register values
   27718              :         without resorting to a system call, we cannot convert a
   27719              :         non-default address space to a default address space.
   27720              :         Therefore we do not claim %fs or %gs are subsets of generic.
   27721              : 
   27722              :    Therefore we can (mostly) use the default hooks.  */
   27723              : 
   27724              : /* All use of segmentation is assumed to make address 0 valid.  */
   27725              : 
   27726              : static bool
   27727     67987938 : ix86_addr_space_zero_address_valid (addr_space_t as)
   27728              : {
   27729     67987938 :   return as != ADDR_SPACE_GENERIC;
   27730              : }
   27731              : 
   27732              : static void
   27733       778167 : ix86_init_libfuncs (void)
   27734              : {
   27735       778167 :   if (TARGET_64BIT)
   27736              :     {
   27737       763217 :       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
   27738       763217 :       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
   27739              :     }
   27740              :   else
   27741              :     {
   27742        14950 :       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
   27743        14950 :       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
   27744              :     }
   27745              : 
   27746              : #if TARGET_MACHO
   27747              :   darwin_rename_builtins ();
   27748              : #endif
   27749       778167 : }
   27750              : 
   27751              : /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
   27752              :    FPU, assume that the fpcw is set to extended precision; when using
   27753              :    only SSE, rounding is correct; when using both SSE and the FPU,
   27754              :    the rounding precision is indeterminate, since either may be chosen
   27755              :    apparently at random.  */
   27756              : 
   27757              : static enum flt_eval_method
   27758     89719083 : ix86_get_excess_precision (enum excess_precision_type type)
   27759              : {
   27760     89719083 :   switch (type)
   27761              :     {
   27762     85724855 :       case EXCESS_PRECISION_TYPE_FAST:
   27763              :         /* The fastest type to promote to will always be the native type,
   27764              :            whether that occurs with implicit excess precision or
   27765              :            otherwise.  */
   27766     85724855 :         return TARGET_AVX512FP16
   27767     85724855 :                ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
   27768     85724855 :                : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27769      3994147 :       case EXCESS_PRECISION_TYPE_STANDARD:
   27770      3994147 :       case EXCESS_PRECISION_TYPE_IMPLICIT:
   27771              :         /* Otherwise, the excess precision we want when we are
   27772              :            in a standards compliant mode, and the implicit precision we
   27773              :            provide would be identical were it not for the unpredictable
   27774              :            cases.  */
   27775      3994147 :         if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
   27776              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27777      3988341 :         else if (!TARGET_80387)
   27778              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27779      3982219 :         else if (!TARGET_MIX_SSE_I387)
   27780              :           {
   27781      3982047 :             if (!(TARGET_SSE && TARGET_SSE_MATH))
   27782              :               return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
   27783      2993429 :             else if (TARGET_SSE2)
   27784              :               return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27785              :           }
   27786              : 
   27787              :         /* If we are in standards compliant mode, but we know we will
   27788              :            calculate in unpredictable precision, return
   27789              :            FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
   27790              :            excess precision if the target can't guarantee it will honor
   27791              :            it.  */
   27792          320 :         return (type == EXCESS_PRECISION_TYPE_STANDARD
   27793          320 :                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
   27794              :                 : FLT_EVAL_METHOD_UNPREDICTABLE);
   27795           81 :       case EXCESS_PRECISION_TYPE_FLOAT16:
   27796           81 :         if (TARGET_80387
   27797           75 :             && !(TARGET_SSE_MATH && TARGET_SSE))
   27798            4 :           error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
   27799              :         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27800            0 :       default:
   27801            0 :         gcc_unreachable ();
   27802              :     }
   27803              : 
   27804              :   return FLT_EVAL_METHOD_UNPREDICTABLE;
   27805              : }
   27806              : 
   27807              : /* Return true if _BitInt(N) is supported and fill its details into *INFO.  */
   27808              : bool
   27809       356241 : ix86_bitint_type_info (int n, struct bitint_info *info)
   27810              : {
   27811       356241 :   if (n <= 8)
   27812         9031 :     info->limb_mode = QImode;
   27813       347210 :   else if (n <= 16)
   27814         1893 :     info->limb_mode = HImode;
   27815       345317 :   else if (n <= 32 || (!TARGET_64BIT && n > 64))
   27816        45572 :     info->limb_mode = SImode;
   27817              :   else
   27818       299745 :     info->limb_mode = DImode;
   27819       356241 :   info->abi_limb_mode = info->limb_mode;
   27820       356241 :   info->big_endian = false;
   27821       356241 :   info->extended = bitint_ext_undef;
   27822       356241 :   return true;
   27823              : }
   27824              : 
   27825              : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return DFmode, TFmode
   27826              :    or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
   27827              :    based on long double bits, go with the default one for the others.  */
   27828              : 
   27829              : static machine_mode
   27830      3656455 : ix86_c_mode_for_floating_type (enum tree_index ti)
   27831              : {
   27832      3656455 :   if (ti == TI_LONG_DOUBLE_TYPE)
   27833       610377 :     return (TARGET_LONG_DOUBLE_64 ? DFmode
   27834       610345 :                                   : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
   27835      3046078 :   return default_mode_for_floating_type (ti);
   27836              : }
   27837              : 
   27838              : /* Returns modified FUNCTION_TYPE for cdtor callabi.  */
   27839              : tree
   27840        13904 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
   27841              : {
   27842        13904 :   if (TARGET_64BIT
   27843           71 :       || TARGET_RTD
   27844        13975 :       || ix86_function_type_abi (fntype) != MS_ABI)
   27845        13904 :     return fntype;
   27846              :   /* For 32-bit MS ABI add thiscall attribute.  */
   27847            0 :   tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
   27848            0 :                             TYPE_ATTRIBUTES (fntype));
   27849            0 :   return build_type_attribute_variant (fntype, attribs);
   27850              : }
   27851              : 
   27852              : /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
   27853              :    decrements by exactly 2 no matter what the position was, there is no pushb.
   27854              : 
   27855              :    But as CIE data alignment factor on this arch is -4 for 32bit targets
   27856              :    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
   27857              :    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
   27858              : 
   27859              : poly_int64
   27860    273906841 : ix86_push_rounding (poly_int64 bytes)
   27861              : {
   27862    353608129 :   return ROUND_UP (bytes, UNITS_PER_WORD);
   27863              : }
   27864              : 
   27865              : /* Use 8 bits metadata start from bit48 for LAM_U48,
   27866              :    6 bits metadat start from bit57 for LAM_U57.  */
   27867              : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48             \
   27868              :                            ? 48                                 \
   27869              :                            : (ix86_lam_type == lam_u57 ? 57 : 0))
   27870              : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48          \
   27871              :                               ? 8                               \
   27872              :                               : (ix86_lam_type == lam_u57 ? 6 : 0))
   27873              : 
   27874              : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  */
   27875              : bool
   27876      6241371 : ix86_memtag_can_tag_addresses ()
   27877              : {
   27878      6241371 :   return ix86_lam_type != lam_none && TARGET_LP64;
   27879              : }
   27880              : 
   27881              : /* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
   27882              : unsigned char
   27883          435 : ix86_memtag_tag_bitsize ()
   27884              : {
   27885          435 :   return IX86_HWASAN_TAG_SIZE;
   27886              : }
   27887              : 
   27888              : /* Implement TARGET_MEMTAG_SET_TAG.  */
   27889              : rtx
   27890          103 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
   27891              : {
   27892              :   /* default_memtag_insert_random_tag may
   27893              :      generate tag with value more than 6 bits.  */
   27894          103 :   if (ix86_lam_type == lam_u57)
   27895              :     {
   27896          103 :       unsigned HOST_WIDE_INT and_imm
   27897              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27898              : 
   27899          103 :       emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
   27900              :     }
   27901          103 :   tag = expand_simple_binop (Pmode, ASHIFT, tag,
   27902          103 :                              GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
   27903              :                              /* unsignedp = */1, OPTAB_WIDEN);
   27904          103 :   rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
   27905              :                                  /* unsignedp = */1, OPTAB_DIRECT);
   27906          103 :   return ret;
   27907              : }
   27908              : 
   27909              : /* Implement TARGET_MEMTAG_EXTRACT_TAG.  */
   27910              : rtx
   27911          174 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
   27912              : {
   27913          174 :   rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
   27914          174 :                                  GEN_INT (IX86_HWASAN_SHIFT), target,
   27915              :                                  /* unsignedp = */0,
   27916              :                                  OPTAB_DIRECT);
   27917          174 :   rtx ret = gen_reg_rtx (QImode);
   27918              :   /* Mask off bit63 when LAM_U57.  */
   27919          174 :   if (ix86_lam_type == lam_u57)
   27920              :     {
   27921          174 :       unsigned HOST_WIDE_INT and_imm
   27922              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27923          174 :       emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
   27924          174 :                              gen_int_mode (and_imm, QImode)));
   27925              :     }
   27926              :   else
   27927            0 :     emit_move_insn (ret, gen_lowpart (QImode, tag));
   27928          174 :   return ret;
   27929              : }
   27930              : 
   27931              : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER.  */
   27932              : rtx
   27933          111 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
   27934              : {
   27935              :   /* Leave bit63 alone.  */
   27936          111 :   rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
   27937          111 :                                 + (HOST_WIDE_INT_1U << 63) - 1),
   27938          111 :                                Pmode);
   27939          111 :   rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
   27940              :                                            tag_mask, target, true,
   27941              :                                            OPTAB_DIRECT);
   27942          111 :   gcc_assert (untagged_base);
   27943          111 :   return untagged_base;
   27944              : }
   27945              : 
   27946              : /* Implement TARGET_MEMTAG_ADD_TAG.  */
   27947              : rtx
   27948           87 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
   27949              : {
   27950           87 :   rtx base_tag = gen_reg_rtx (QImode);
   27951           87 :   rtx base_addr = gen_reg_rtx (Pmode);
   27952           87 :   rtx tagged_addr = gen_reg_rtx (Pmode);
   27953           87 :   rtx new_tag = gen_reg_rtx (QImode);
   27954          174 :   unsigned HOST_WIDE_INT and_imm
   27955           87 :     = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
   27956              : 
   27957              :   /* When there's "overflow" in tag adding,
   27958              :      need to mask the most significant bit off.  */
   27959           87 :   emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
   27960           87 :   emit_move_insn (base_addr,
   27961              :                   ix86_memtag_untagged_pointer (base, NULL_RTX));
   27962           87 :   emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
   27963           87 :   emit_move_insn (new_tag, base_tag);
   27964           87 :   emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
   27965           87 :   emit_move_insn (tagged_addr,
   27966              :                   ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
   27967           87 :   return plus_constant (Pmode, tagged_addr, offset);
   27968              : }
   27969              : 
   27970              : /* Implement TARGET_HAVE_CCMP.  */
   27971              : static bool
   27972      8079737 : ix86_have_ccmp ()
   27973              : {
   27974      8079737 :   return (bool) TARGET_APX_CCMP;
   27975              : }
   27976              : 
   27977              : /* Implement TARGET_MODE_CAN_TRANSFER_BITS.  */
   27978              : static bool
   27979      4591201 : ix86_mode_can_transfer_bits (machine_mode mode)
   27980              : {
   27981      4591201 :   if (GET_MODE_CLASS (mode) == MODE_FLOAT
   27982      4544198 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
   27983       112540 :     switch (GET_MODE_INNER (mode))
   27984              :       {
   27985        54066 :       case E_SFmode:
   27986        54066 :       case E_DFmode:
   27987              :         /* These suffer from normalization upon load when not using SSE.  */
   27988        54066 :         return !(ix86_fpmath & FPMATH_387);
   27989              :       default:
   27990              :         return true;
   27991              :       }
   27992              : 
   27993              :   return true;
   27994              : }
   27995              : 
   27996              : /* Implement TARGET_REDZONE_CLOBBER.  */
   27997              : static rtx
   27998            2 : ix86_redzone_clobber ()
   27999              : {
   28000            2 :   cfun->machine->asm_redzone_clobber_seen = true;
   28001            2 :   if (ix86_using_red_zone ())
   28002              :     {
   28003            2 :       rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
   28004            2 :       rtx mem = gen_rtx_MEM (BLKmode, base);
   28005            2 :       set_mem_size (mem, RED_ZONE_SIZE);
   28006            2 :       return mem;
   28007              :     }
   28008              :   return NULL_RTX;
   28009              : }
   28010              : 
   28011              : /* Target-specific selftests.  */
   28012              : 
   28013              : #if CHECKING_P
   28014              : 
   28015              : namespace selftest {
   28016              : 
   28017              : /* Verify that hard regs are dumped as expected (in compact mode).  */
   28018              : 
   28019              : static void
   28020            4 : ix86_test_dumping_hard_regs ()
   28021              : {
   28022            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
   28023            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
   28024            4 : }
   28025              : 
   28026              : /* Test dumping an insn with repeated references to the same SCRATCH,
   28027              :    to verify the rtx_reuse code.  */
   28028              : 
   28029              : static void
   28030            4 : ix86_test_dumping_memory_blockage ()
   28031              : {
   28032            4 :   set_new_first_and_last_insn (NULL, NULL);
   28033              : 
   28034            4 :   rtx pat = gen_memory_blockage ();
   28035            4 :   rtx_reuse_manager r;
   28036            4 :   r.preprocess (pat);
   28037              : 
   28038              :   /* Verify that the repeated references to the SCRATCH show use
   28039              :      reuse IDS.  The first should be prefixed with a reuse ID,
   28040              :      and the second should be dumped as a "reuse_rtx" of that ID.
   28041              :      The expected string assumes Pmode == DImode.  */
   28042            4 :   if (Pmode == DImode)
   28043            4 :     ASSERT_RTL_DUMP_EQ_WITH_REUSE
   28044              :       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
   28045              :        "        (unspec:BLK [\n"
   28046              :        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
   28047              :        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
   28048            4 : }
   28049              : 
   28050              : /* Verify loading an RTL dump; specifically a dump of copying
   28051              :    a param on x86_64 from a hard reg into the frame.
   28052              :    This test is target-specific since the dump contains target-specific
   28053              :    hard reg names.  */
   28054              : 
   28055              : static void
   28056            4 : ix86_test_loading_dump_fragment_1 ()
   28057              : {
   28058            4 :   rtl_dump_test t (SELFTEST_LOCATION,
   28059            4 :                    locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
   28060              : 
   28061            4 :   rtx_insn *insn = get_insn_by_uid (1);
   28062              : 
   28063              :   /* The block structure and indentation here is purely for
   28064              :      readability; it mirrors the structure of the rtx.  */
   28065            4 :   tree mem_expr;
   28066            4 :   {
   28067            4 :     rtx pat = PATTERN (insn);
   28068            4 :     ASSERT_EQ (SET, GET_CODE (pat));
   28069            4 :     {
   28070            4 :       rtx dest = SET_DEST (pat);
   28071            4 :       ASSERT_EQ (MEM, GET_CODE (dest));
   28072              :       /* Verify the "/c" was parsed.  */
   28073            4 :       ASSERT_TRUE (RTX_FLAG (dest, call));
   28074            4 :       ASSERT_EQ (SImode, GET_MODE (dest));
   28075            4 :       {
   28076            4 :         rtx addr = XEXP (dest, 0);
   28077            4 :         ASSERT_EQ (PLUS, GET_CODE (addr));
   28078            4 :         ASSERT_EQ (DImode, GET_MODE (addr));
   28079            4 :         {
   28080            4 :           rtx lhs = XEXP (addr, 0);
   28081              :           /* Verify that the "frame" REG was consolidated.  */
   28082            4 :           ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
   28083              :         }
   28084            4 :         {
   28085            4 :           rtx rhs = XEXP (addr, 1);
   28086            4 :           ASSERT_EQ (CONST_INT, GET_CODE (rhs));
   28087            4 :           ASSERT_EQ (-4, INTVAL (rhs));
   28088              :         }
   28089              :       }
   28090              :       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
   28091            4 :       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
   28092              :       /* "i" should have been handled by synthesizing a global int
   28093              :          variable named "i".  */
   28094            4 :       mem_expr = MEM_EXPR (dest);
   28095            4 :       ASSERT_NE (mem_expr, NULL);
   28096            4 :       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
   28097            4 :       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
   28098            4 :       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
   28099            4 :       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
   28100              :       /* "+0".  */
   28101            4 :       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
   28102            4 :       ASSERT_EQ (0, MEM_OFFSET (dest));
   28103              :       /* "S4".  */
   28104            4 :       ASSERT_EQ (4, MEM_SIZE (dest));
   28105              :       /* "A32.  */
   28106            4 :       ASSERT_EQ (32, MEM_ALIGN (dest));
   28107              :     }
   28108            4 :     {
   28109            4 :       rtx src = SET_SRC (pat);
   28110            4 :       ASSERT_EQ (REG, GET_CODE (src));
   28111            4 :       ASSERT_EQ (SImode, GET_MODE (src));
   28112            4 :       ASSERT_EQ (5, REGNO (src));
   28113            4 :       tree reg_expr = REG_EXPR (src);
   28114              :       /* "i" here should point to the same var as for the MEM_EXPR.  */
   28115            4 :       ASSERT_EQ (reg_expr, mem_expr);
   28116              :     }
   28117              :   }
   28118            4 : }
   28119              : 
   28120              : /* Verify that the RTL loader copes with a call_insn dump.
   28121              :    This test is target-specific since the dump contains a target-specific
   28122              :    hard reg name.  */
   28123              : 
   28124              : static void
   28125            4 : ix86_test_loading_call_insn ()
   28126              : {
   28127              :   /* The test dump includes register "xmm0", where requires TARGET_SSE
   28128              :      to exist.  */
   28129            4 :   if (!TARGET_SSE)
   28130            0 :     return;
   28131              : 
   28132            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
   28133              : 
   28134            4 :   rtx_insn *insn = get_insns ();
   28135            4 :   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
   28136              : 
   28137              :   /* "/j".  */
   28138            4 :   ASSERT_TRUE (RTX_FLAG (insn, jump));
   28139              : 
   28140            4 :   rtx pat = PATTERN (insn);
   28141            4 :   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
   28142              : 
   28143              :   /* Verify REG_NOTES.  */
   28144            4 :   {
   28145              :     /* "(expr_list:REG_CALL_DECL".   */
   28146            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
   28147            4 :     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
   28148            4 :     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
   28149              : 
   28150              :     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
   28151            4 :     rtx_expr_list *note1 = note0->next ();
   28152            4 :     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
   28153              : 
   28154            4 :     ASSERT_EQ (NULL, note1->next ());
   28155              :   }
   28156              : 
   28157              :   /* Verify CALL_INSN_FUNCTION_USAGE.  */
   28158            4 :   {
   28159              :     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
   28160            4 :     rtx_expr_list *usage
   28161            4 :       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
   28162            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
   28163            4 :     ASSERT_EQ (DFmode, GET_MODE (usage));
   28164            4 :     ASSERT_EQ (USE, GET_CODE (usage->element ()));
   28165            4 :     ASSERT_EQ (NULL, usage->next ());
   28166              :   }
   28167            4 : }
   28168              : 
   28169              : /* Verify that the RTL loader copes a dump from print_rtx_function.
   28170              :    This test is target-specific since the dump contains target-specific
   28171              :    hard reg names.  */
   28172              : 
   28173              : static void
   28174            4 : ix86_test_loading_full_dump ()
   28175              : {
   28176            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
   28177              : 
   28178            4 :   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28179              : 
   28180            4 :   rtx_insn *insn_1 = get_insn_by_uid (1);
   28181            4 :   ASSERT_EQ (NOTE, GET_CODE (insn_1));
   28182              : 
   28183            4 :   rtx_insn *insn_7 = get_insn_by_uid (7);
   28184            4 :   ASSERT_EQ (INSN, GET_CODE (insn_7));
   28185            4 :   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
   28186              : 
   28187            4 :   rtx_insn *insn_15 = get_insn_by_uid (15);
   28188            4 :   ASSERT_EQ (INSN, GET_CODE (insn_15));
   28189            4 :   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
   28190              : 
   28191              :   /* Verify crtl->return_rtx.  */
   28192            4 :   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
   28193            4 :   ASSERT_EQ (0, REGNO (crtl->return_rtx));
   28194            4 :   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
   28195            4 : }
   28196              : 
   28197              : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
   28198              :    In particular, verify that it correctly loads the 2nd operand.
   28199              :    This test is target-specific since these are machine-specific
   28200              :    operands (and enums).  */
   28201              : 
   28202              : static void
   28203            4 : ix86_test_loading_unspec ()
   28204              : {
   28205            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
   28206              : 
   28207            4 :   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28208              : 
   28209            4 :   ASSERT_TRUE (cfun);
   28210              : 
   28211              :   /* Test of an UNSPEC.  */
   28212            4 :    rtx_insn *insn = get_insns ();
   28213            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28214            4 :   rtx set = single_set (insn);
   28215            4 :   ASSERT_NE (NULL, set);
   28216            4 :   rtx dst = SET_DEST (set);
   28217            4 :   ASSERT_EQ (MEM, GET_CODE (dst));
   28218            4 :   rtx src = SET_SRC (set);
   28219            4 :   ASSERT_EQ (UNSPEC, GET_CODE (src));
   28220            4 :   ASSERT_EQ (BLKmode, GET_MODE (src));
   28221            4 :   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
   28222              : 
   28223            4 :   rtx v0 = XVECEXP (src, 0, 0);
   28224              : 
   28225              :   /* Verify that the two uses of the first SCRATCH have pointer
   28226              :      equality.  */
   28227            4 :   rtx scratch_a = XEXP (dst, 0);
   28228            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
   28229              : 
   28230            4 :   rtx scratch_b = XEXP (v0, 0);
   28231            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
   28232              : 
   28233            4 :   ASSERT_EQ (scratch_a, scratch_b);
   28234              : 
   28235              :   /* Verify that the two mems are thus treated as equal.  */
   28236            4 :   ASSERT_TRUE (rtx_equal_p (dst, v0));
   28237              : 
   28238              :   /* Verify that the insn is recognized.  */
   28239            4 :   ASSERT_NE(-1, recog_memoized (insn));
   28240              : 
   28241              :   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
   28242            4 :   insn = NEXT_INSN (insn);
   28243            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28244              : 
   28245            4 :   set = single_set (insn);
   28246            4 :   ASSERT_NE (NULL, set);
   28247              : 
   28248            4 :   src = SET_SRC (set);
   28249            4 :   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
   28250            4 :   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
   28251            4 : }
   28252              : 
   28253              : /* Run all target-specific selftests.  */
   28254              : 
   28255              : static void
   28256            4 : ix86_run_selftests (void)
   28257              : {
   28258            4 :   ix86_test_dumping_hard_regs ();
   28259            4 :   ix86_test_dumping_memory_blockage ();
   28260              : 
   28261              :   /* Various tests of loading RTL dumps, here because they contain
   28262              :      ix86-isms (e.g. names of hard regs).  */
   28263            4 :   ix86_test_loading_dump_fragment_1 ();
   28264            4 :   ix86_test_loading_call_insn ();
   28265            4 :   ix86_test_loading_full_dump ();
   28266            4 :   ix86_test_loading_unspec ();
   28267            4 : }
   28268              : 
   28269              : } // namespace selftest
   28270              : 
   28271              : #endif /* CHECKING_P */
   28272              : 
   28273              : static const scoped_attribute_specs *const ix86_attribute_table[] =
   28274              : {
   28275              :   &ix86_gnu_attribute_table
   28276              : };
   28277              : 
   28278              : /* Initialize the GCC target structure.  */
   28279              : #undef TARGET_RETURN_IN_MEMORY
   28280              : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
   28281              : 
   28282              : #undef TARGET_LEGITIMIZE_ADDRESS
   28283              : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
   28284              : 
   28285              : #undef TARGET_ATTRIBUTE_TABLE
   28286              : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
   28287              : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
   28288              : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
   28289              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28290              : #  undef TARGET_MERGE_DECL_ATTRIBUTES
   28291              : #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
   28292              : #endif
   28293              : 
   28294              : #undef TARGET_INVALID_CONVERSION
   28295              : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
   28296              : 
   28297              : #undef TARGET_INVALID_UNARY_OP
   28298              : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
   28299              : 
   28300              : #undef TARGET_INVALID_BINARY_OP
   28301              : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
   28302              : 
   28303              : #undef TARGET_COMP_TYPE_ATTRIBUTES
   28304              : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
   28305              : 
   28306              : #undef TARGET_INIT_BUILTINS
   28307              : #define TARGET_INIT_BUILTINS ix86_init_builtins
   28308              : #undef TARGET_BUILTIN_DECL
   28309              : #define TARGET_BUILTIN_DECL ix86_builtin_decl
   28310              : #undef TARGET_EXPAND_BUILTIN
   28311              : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
   28312              : 
   28313              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
   28314              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
   28315              :   ix86_builtin_vectorized_function
   28316              : 
   28317              : #undef TARGET_VECTORIZE_BUILTIN_GATHER
   28318              : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
   28319              : 
   28320              : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
   28321              : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
   28322              : 
   28323              : #undef TARGET_BUILTIN_RECIPROCAL
   28324              : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
   28325              : 
   28326              : #undef TARGET_ASM_FUNCTION_EPILOGUE
   28327              : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
   28328              : 
   28329              : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
   28330              : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
   28331              :   ix86_print_patchable_function_entry
   28332              : 
   28333              : #undef TARGET_ENCODE_SECTION_INFO
   28334              : #ifndef SUBTARGET_ENCODE_SECTION_INFO
   28335              : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
   28336              : #else
   28337              : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
   28338              : #endif
   28339              : 
   28340              : #undef TARGET_ASM_OPEN_PAREN
   28341              : #define TARGET_ASM_OPEN_PAREN ""
   28342              : #undef TARGET_ASM_CLOSE_PAREN
   28343              : #define TARGET_ASM_CLOSE_PAREN ""
   28344              : 
   28345              : #undef TARGET_ASM_BYTE_OP
   28346              : #define TARGET_ASM_BYTE_OP ASM_BYTE
   28347              : 
   28348              : #undef TARGET_ASM_ALIGNED_HI_OP
   28349              : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
   28350              : #undef TARGET_ASM_ALIGNED_SI_OP
   28351              : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
   28352              : #ifdef ASM_QUAD
   28353              : #undef TARGET_ASM_ALIGNED_DI_OP
   28354              : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
   28355              : #endif
   28356              : 
   28357              : #undef TARGET_PROFILE_BEFORE_PROLOGUE
   28358              : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
   28359              : 
   28360              : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
   28361              : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
   28362              : 
   28363              : #undef TARGET_ASM_UNALIGNED_HI_OP
   28364              : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
   28365              : #undef TARGET_ASM_UNALIGNED_SI_OP
   28366              : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
   28367              : #undef TARGET_ASM_UNALIGNED_DI_OP
   28368              : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
   28369              : 
   28370              : #undef TARGET_PRINT_OPERAND
   28371              : #define TARGET_PRINT_OPERAND ix86_print_operand
   28372              : #undef TARGET_PRINT_OPERAND_ADDRESS
   28373              : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
   28374              : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
   28375              : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
   28376              : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
   28377              : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
   28378              : 
   28379              : #undef TARGET_SCHED_INIT_GLOBAL
   28380              : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
   28381              : #undef TARGET_SCHED_ADJUST_COST
   28382              : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
   28383              : #undef TARGET_SCHED_ISSUE_RATE
   28384              : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
   28385              : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
   28386              : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   28387              :   ia32_multipass_dfa_lookahead
   28388              : #undef TARGET_SCHED_MACRO_FUSION_P
   28389              : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
   28390              : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
   28391              : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
   28392              : 
   28393              : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
   28394              : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
   28395              : 
   28396              : #undef TARGET_MEMMODEL_CHECK
   28397              : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
   28398              : 
   28399              : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
   28400              : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
   28401              : 
   28402              : #ifdef HAVE_AS_TLS
   28403              : #undef TARGET_HAVE_TLS
   28404              : #define TARGET_HAVE_TLS true
   28405              : #endif
   28406              : #undef TARGET_CANNOT_FORCE_CONST_MEM
   28407              : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
   28408              : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
   28409              : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
   28410              : 
   28411              : #undef TARGET_DELEGITIMIZE_ADDRESS
   28412              : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
   28413              : 
   28414              : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
   28415              : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
   28416              : 
   28417              : #undef TARGET_MS_BITFIELD_LAYOUT_P
   28418              : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
   28419              : 
   28420              : #if TARGET_MACHO
   28421              : #undef TARGET_BINDS_LOCAL_P
   28422              : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
   28423              : #else
   28424              : #undef TARGET_BINDS_LOCAL_P
   28425              : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
   28426              : #endif
   28427              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28428              : #undef TARGET_BINDS_LOCAL_P
   28429              : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
   28430              : #endif
   28431              : 
   28432              : #undef TARGET_ASM_OUTPUT_MI_THUNK
   28433              : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
   28434              : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
   28435              : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
   28436              : 
   28437              : #undef TARGET_ASM_FILE_START
   28438              : #define TARGET_ASM_FILE_START x86_file_start
   28439              : 
   28440              : #undef TARGET_OPTION_OVERRIDE
   28441              : #define TARGET_OPTION_OVERRIDE ix86_option_override
   28442              : 
   28443              : #undef TARGET_REGISTER_MOVE_COST
   28444              : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
   28445              : #undef TARGET_MEMORY_MOVE_COST
   28446              : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
   28447              : #undef TARGET_RTX_COSTS
   28448              : #define TARGET_RTX_COSTS ix86_rtx_costs
   28449              : #undef TARGET_INSN_COST
   28450              : #define TARGET_INSN_COST ix86_insn_cost
   28451              : #undef TARGET_ADDRESS_COST
   28452              : #define TARGET_ADDRESS_COST ix86_address_cost
   28453              : 
   28454              : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
   28455              : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   28456              :   ix86_use_by_pieces_infrastructure_p
   28457              : 
   28458              : #undef TARGET_OVERLAP_OP_BY_PIECES_P
   28459              : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
   28460              : 
   28461              : #undef TARGET_FLAGS_REGNUM
   28462              : #define TARGET_FLAGS_REGNUM FLAGS_REG
   28463              : #undef TARGET_FIXED_CONDITION_CODE_REGS
   28464              : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
   28465              : #undef TARGET_CC_MODES_COMPATIBLE
   28466              : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
   28467              : 
   28468              : #undef TARGET_MACHINE_DEPENDENT_REORG
   28469              : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
   28470              : 
   28471              : #undef TARGET_BUILD_BUILTIN_VA_LIST
   28472              : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
   28473              : 
   28474              : #undef TARGET_FOLD_BUILTIN
   28475              : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
   28476              : 
   28477              : #undef TARGET_GIMPLE_FOLD_BUILTIN
   28478              : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
   28479              : 
   28480              : #undef TARGET_COMPARE_VERSION_PRIORITY
   28481              : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
   28482              : 
   28483              : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
   28484              : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
   28485              :   ix86_generate_version_dispatcher_body
   28486              : 
   28487              : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
   28488              : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
   28489              :   ix86_get_function_versions_dispatcher
   28490              : 
   28491              : #undef TARGET_ENUM_VA_LIST_P
   28492              : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
   28493              : 
   28494              : #undef TARGET_FN_ABI_VA_LIST
   28495              : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
   28496              : 
   28497              : #undef TARGET_CANONICAL_VA_LIST_TYPE
   28498              : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
   28499              : 
   28500              : #undef TARGET_EXPAND_BUILTIN_VA_START
   28501              : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
   28502              : 
   28503              : #undef TARGET_MD_ASM_ADJUST
   28504              : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
   28505              : 
   28506              : #undef TARGET_C_EXCESS_PRECISION
   28507              : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
   28508              : #undef TARGET_C_BITINT_TYPE_INFO
   28509              : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
   28510              : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
   28511              : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
   28512              : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
   28513              : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
   28514              : #undef TARGET_PROMOTE_PROTOTYPES
   28515              : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
   28516              : #undef TARGET_PUSH_ARGUMENT
   28517              : #define TARGET_PUSH_ARGUMENT ix86_push_argument
   28518              : #undef TARGET_SETUP_INCOMING_VARARGS
   28519              : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
   28520              : #undef TARGET_MUST_PASS_IN_STACK
   28521              : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
   28522              : #undef TARGET_OVERALIGNED_STACK_SLOT_REQUIRED
   28523              : #define TARGET_OVERALIGNED_STACK_SLOT_REQUIRED ix86_overaligned_stack_slot_required
   28524              : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
   28525              : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
   28526              : #undef TARGET_FUNCTION_ARG_ADVANCE
   28527              : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
   28528              : #undef TARGET_FUNCTION_ARG
   28529              : #define TARGET_FUNCTION_ARG ix86_function_arg
   28530              : #undef TARGET_INIT_PIC_REG
   28531              : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
   28532              : #undef TARGET_USE_PSEUDO_PIC_REG
   28533              : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
   28534              : #undef TARGET_FUNCTION_ARG_BOUNDARY
   28535              : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
   28536              : #undef TARGET_PASS_BY_REFERENCE
   28537              : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
   28538              : #undef TARGET_INTERNAL_ARG_POINTER
   28539              : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
   28540              : #undef TARGET_UPDATE_STACK_BOUNDARY
   28541              : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
   28542              : #undef TARGET_GET_DRAP_RTX
   28543              : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
   28544              : #undef TARGET_STRICT_ARGUMENT_NAMING
   28545              : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
   28546              : #undef TARGET_STATIC_CHAIN
   28547              : #define TARGET_STATIC_CHAIN ix86_static_chain
   28548              : #undef TARGET_TRAMPOLINE_INIT
   28549              : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
   28550              : #undef TARGET_RETURN_POPS_ARGS
   28551              : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
   28552              : 
   28553              : #undef TARGET_WARN_FUNC_RETURN
   28554              : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
   28555              : 
   28556              : #undef TARGET_LEGITIMATE_COMBINED_INSN
   28557              : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
   28558              : 
   28559              : #undef TARGET_ASAN_SHADOW_OFFSET
   28560              : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
   28561              : 
   28562              : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
   28563              : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
   28564              : 
   28565              : #undef TARGET_SCALAR_MODE_SUPPORTED_P
   28566              : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
   28567              : 
   28568              : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
   28569              : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
   28570              : ix86_libgcc_floating_mode_supported_p
   28571              : 
   28572              : #undef TARGET_VECTOR_MODE_SUPPORTED_P
   28573              : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
   28574              : 
   28575              : #undef TARGET_C_MODE_FOR_SUFFIX
   28576              : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
   28577              : 
   28578              : #ifdef HAVE_AS_TLS
   28579              : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
   28580              : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
   28581              : #endif
   28582              : 
   28583              : #ifdef SUBTARGET_INSERT_ATTRIBUTES
   28584              : #undef TARGET_INSERT_ATTRIBUTES
   28585              : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
   28586              : #endif
   28587              : 
   28588              : #undef TARGET_MANGLE_TYPE
   28589              : #define TARGET_MANGLE_TYPE ix86_mangle_type
   28590              : 
   28591              : #undef TARGET_EMIT_SUPPORT_TINFOS
   28592              : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
   28593              : 
   28594              : #undef TARGET_STACK_PROTECT_GUARD
   28595              : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
   28596              : 
   28597              : #undef TARGET_STACK_PROTECT_GUARD_SYMBOL_P
   28598              : #define TARGET_STACK_PROTECT_GUARD_SYMBOL_P \
   28599              :   ix86_stack_protect_guard_symbol_p
   28600              : 
   28601              : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
   28602              : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
   28603              :   ix86_stack_protect_runtime_enabled_p
   28604              : 
   28605              : #if !TARGET_MACHO
   28606              : #undef TARGET_STACK_PROTECT_FAIL
   28607              : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
   28608              : #endif
   28609              : 
   28610              : #undef TARGET_FUNCTION_VALUE
   28611              : #define TARGET_FUNCTION_VALUE ix86_function_value
   28612              : 
   28613              : #undef TARGET_FUNCTION_VALUE_REGNO_P
   28614              : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
   28615              : 
   28616              : #undef TARGET_ZERO_CALL_USED_REGS
   28617              : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
   28618              : 
   28619              : #undef TARGET_PROMOTE_FUNCTION_MODE
   28620              : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
   28621              : 
   28622              : #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
   28623              : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
   28624              : 
   28625              : #undef TARGET_MEMBER_TYPE_FORCES_BLK
   28626              : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
   28627              : 
   28628              : #undef TARGET_INSTANTIATE_DECLS
   28629              : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
   28630              : 
   28631              : #undef TARGET_SECONDARY_RELOAD
   28632              : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
   28633              : #undef TARGET_SECONDARY_MEMORY_NEEDED
   28634              : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
   28635              : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
   28636              : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
   28637              : 
   28638              : #undef TARGET_CLASS_MAX_NREGS
   28639              : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
   28640              : 
   28641              : #undef TARGET_PREFERRED_RELOAD_CLASS
   28642              : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
   28643              : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
   28644              : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
   28645              : /* When this hook returns true for MODE, the compiler allows
   28646              :    registers explicitly used in the rtl to be used as spill registers
   28647              :    but prevents the compiler from extending the lifetime of these
   28648              :    registers.  */
   28649              : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
   28650              : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
   28651              : #undef TARGET_CLASS_LIKELY_SPILLED_P
   28652              : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
   28653              : #undef TARGET_CALLEE_SAVE_COST
   28654              : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
   28655              : 
   28656              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
   28657              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   28658              :   ix86_builtin_vectorization_cost
   28659              : #undef TARGET_VECTORIZE_VEC_PERM_CONST
   28660              : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
   28661              : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
   28662              : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
   28663              :   ix86_preferred_simd_mode
   28664              : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
   28665              : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
   28666              :   ix86_split_reduction
   28667              : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
   28668              : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   28669              :   ix86_autovectorize_vector_modes
   28670              : #undef TARGET_VECTORIZE_GET_MASK_MODE
   28671              : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
   28672              : #undef TARGET_VECTORIZE_CREATE_COSTS
   28673              : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
   28674              : 
   28675              : #undef TARGET_SET_CURRENT_FUNCTION
   28676              : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
   28677              : 
   28678              : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
   28679              : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
   28680              : 
   28681              : #undef TARGET_OPTION_SAVE
   28682              : #define TARGET_OPTION_SAVE ix86_function_specific_save
   28683              : 
   28684              : #undef TARGET_OPTION_RESTORE
   28685              : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
   28686              : 
   28687              : #undef TARGET_OPTION_POST_STREAM_IN
   28688              : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
   28689              : 
   28690              : #undef TARGET_OPTION_PRINT
   28691              : #define TARGET_OPTION_PRINT ix86_function_specific_print
   28692              : 
   28693              : #undef TARGET_CAN_INLINE_P
   28694              : #define TARGET_CAN_INLINE_P ix86_can_inline_p
   28695              : 
   28696              : #undef TARGET_LEGITIMATE_ADDRESS_P
   28697              : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
   28698              : 
   28699              : #undef TARGET_REGISTER_PRIORITY
   28700              : #define TARGET_REGISTER_PRIORITY ix86_register_priority
   28701              : 
   28702              : #undef TARGET_REGISTER_USAGE_LEVELING_P
   28703              : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
   28704              : 
   28705              : #undef TARGET_LEGITIMATE_CONSTANT_P
   28706              : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
   28707              : 
   28708              : #undef TARGET_COMPUTE_FRAME_LAYOUT
   28709              : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
   28710              : 
   28711              : #undef TARGET_FRAME_POINTER_REQUIRED
   28712              : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
   28713              : 
   28714              : #undef TARGET_CAN_ELIMINATE
   28715              : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
   28716              : 
   28717              : #undef TARGET_EXTRA_LIVE_ON_ENTRY
   28718              : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
   28719              : 
   28720              : #undef TARGET_ASM_CODE_END
   28721              : #define TARGET_ASM_CODE_END ix86_code_end
   28722              : 
   28723              : #undef TARGET_CONDITIONAL_REGISTER_USAGE
   28724              : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
   28725              : 
   28726              : #undef TARGET_CANONICALIZE_COMPARISON
   28727              : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
   28728              : 
   28729              : #undef TARGET_LOOP_UNROLL_ADJUST
   28730              : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
   28731              : 
   28732              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   28733              : #undef TARGET_SPILL_CLASS
   28734              : #define TARGET_SPILL_CLASS ix86_spill_class
   28735              : 
   28736              : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
   28737              : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
   28738              :   ix86_simd_clone_compute_vecsize_and_simdlen
   28739              : 
   28740              : #undef TARGET_SIMD_CLONE_ADJUST
   28741              : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
   28742              : 
   28743              : #undef TARGET_SIMD_CLONE_USABLE
   28744              : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
   28745              : 
   28746              : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
   28747              : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
   28748              : 
   28749              : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
   28750              : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
   28751              :   ix86_float_exceptions_rounding_supported_p
   28752              : 
   28753              : #undef TARGET_MODE_EMIT
   28754              : #define TARGET_MODE_EMIT ix86_emit_mode_set
   28755              : 
   28756              : #undef TARGET_MODE_NEEDED
   28757              : #define TARGET_MODE_NEEDED ix86_mode_needed
   28758              : 
   28759              : #undef TARGET_MODE_AFTER
   28760              : #define TARGET_MODE_AFTER ix86_mode_after
   28761              : 
   28762              : #undef TARGET_MODE_ENTRY
   28763              : #define TARGET_MODE_ENTRY ix86_mode_entry
   28764              : 
   28765              : #undef TARGET_MODE_EXIT
   28766              : #define TARGET_MODE_EXIT ix86_mode_exit
   28767              : 
   28768              : #undef TARGET_MODE_PRIORITY
   28769              : #define TARGET_MODE_PRIORITY ix86_mode_priority
   28770              : 
   28771              : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
   28772              : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
   28773              : 
   28774              : #undef TARGET_OFFLOAD_OPTIONS
   28775              : #define TARGET_OFFLOAD_OPTIONS \
   28776              :   ix86_offload_options
   28777              : 
   28778              : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
   28779              : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
   28780              : 
   28781              : #undef TARGET_OPTAB_SUPPORTED_P
   28782              : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
   28783              : 
   28784              : #undef TARGET_HARD_REGNO_SCRATCH_OK
   28785              : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
   28786              : 
   28787              : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
   28788              : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
   28789              : 
   28790              : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
   28791              : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
   28792              : 
   28793              : #undef TARGET_INIT_LIBFUNCS
   28794              : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
   28795              : 
   28796              : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
   28797              : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
   28798              : 
   28799              : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
   28800              : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
   28801              : 
   28802              : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
   28803              : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
   28804              : 
   28805              : #undef TARGET_HARD_REGNO_NREGS
   28806              : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
   28807              : #undef TARGET_HARD_REGNO_MODE_OK
   28808              : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
   28809              : 
   28810              : #undef TARGET_MODES_TIEABLE_P
   28811              : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
   28812              : 
   28813              : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
   28814              : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   28815              :   ix86_hard_regno_call_part_clobbered
   28816              : 
   28817              : #undef TARGET_INSN_CALLEE_ABI
   28818              : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
   28819              : 
   28820              : #undef TARGET_CAN_CHANGE_MODE_CLASS
   28821              : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
   28822              : 
   28823              : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
   28824              : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
   28825              : 
   28826              : #undef TARGET_STATIC_RTX_ALIGNMENT
   28827              : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
   28828              : #undef TARGET_CONSTANT_ALIGNMENT
   28829              : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
   28830              : 
   28831              : #undef TARGET_EMPTY_RECORD_P
   28832              : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
   28833              : 
   28834              : #undef TARGET_WARN_PARAMETER_PASSING_ABI
   28835              : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
   28836              : 
   28837              : #undef TARGET_GET_MULTILIB_ABI_NAME
   28838              : #define TARGET_GET_MULTILIB_ABI_NAME \
   28839              :   ix86_get_multilib_abi_name
   28840              : 
   28841              : #undef TARGET_IFUNC_REF_LOCAL_OK
   28842              : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
   28843              : 
   28844              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28845              : # undef TARGET_ASM_RELOC_RW_MASK
   28846              : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
   28847              : #endif
   28848              : 
   28849              : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
   28850              : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
   28851              : 
   28852              : #undef TARGET_MEMTAG_ADD_TAG
   28853              : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
   28854              : 
   28855              : #undef TARGET_MEMTAG_SET_TAG
   28856              : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
   28857              : 
   28858              : #undef TARGET_MEMTAG_EXTRACT_TAG
   28859              : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
   28860              : 
   28861              : #undef TARGET_MEMTAG_UNTAGGED_POINTER
   28862              : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
   28863              : 
   28864              : #undef TARGET_MEMTAG_TAG_BITSIZE
   28865              : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
   28866              : 
   28867              : #undef TARGET_GEN_CCMP_FIRST
   28868              : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
   28869              : 
   28870              : #undef TARGET_GEN_CCMP_NEXT
   28871              : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
   28872              : 
   28873              : #undef TARGET_HAVE_CCMP
   28874              : #define TARGET_HAVE_CCMP ix86_have_ccmp
   28875              : 
   28876              : #undef TARGET_MODE_CAN_TRANSFER_BITS
   28877              : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
   28878              : 
   28879              : #undef TARGET_REDZONE_CLOBBER
   28880              : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
   28881              : 
   28882              : static bool
   28883        95287 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
   28884              : {
   28885              : #ifdef OPTION_GLIBC
   28886        95287 :   if (OPTION_GLIBC)
   28887        95287 :     return (built_in_function)fcode == BUILT_IN_MEMPCPY;
   28888              :   else
   28889              :     return false;
   28890              : #else
   28891              :   return false;
   28892              : #endif
   28893              : }
   28894              : 
   28895              : #undef TARGET_LIBC_HAS_FAST_FUNCTION
   28896              : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
   28897              : 
   28898              : static unsigned
   28899        78134 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
   28900              :                               bool boundary_p)
   28901              : {
   28902              : #ifdef OPTION_GLIBC
   28903        78134 :   bool glibc_p = OPTION_GLIBC;
   28904              : #else
   28905              :   bool glibc_p = false;
   28906              : #endif
   28907        78134 :   if (glibc_p)
   28908              :     {
   28909              :       /* If __FAST_MATH__ is defined, glibc provides libmvec.  */
   28910        78134 :       unsigned int libmvec_ret = 0;
   28911        78134 :       if (!flag_trapping_math
   28912         8300 :           && flag_unsafe_math_optimizations
   28913         3378 :           && flag_finite_math_only
   28914         3352 :           && !flag_signed_zeros
   28915         3352 :           && !flag_errno_math)
   28916         3352 :         switch (cfn)
   28917              :           {
   28918         1396 :           CASE_CFN_COS:
   28919         1396 :           CASE_CFN_COS_FN:
   28920         1396 :           CASE_CFN_SIN:
   28921         1396 :           CASE_CFN_SIN_FN:
   28922         1396 :             if (!boundary_p)
   28923              :               {
   28924              :                 /* With non-default rounding modes, libmvec provides
   28925              :                    complete garbage in results.  E.g.
   28926              :                    _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
   28927              :                    returns 0.00333309174f rather than 1.40129846e-45f.  */
   28928          587 :                 if (flag_rounding_math)
   28929              :                   return ~0U;
   28930              :                 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
   28931              :                    claims libmvec maximum error is 4ulps.
   28932              :                    My own random testing indicates 2ulps for SFmode and
   28933              :                    0.5ulps for DFmode, but let's go with the 4ulps.  */
   28934              :                 libmvec_ret = 4;
   28935              :               }
   28936              :             break;
   28937              :           default:
   28938              :             break;
   28939              :           }
   28940        78134 :       unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
   28941              :                                                               boundary_p);
   28942        78134 :       return MAX (ret, libmvec_ret);
   28943              :     }
   28944            0 :   return default_libm_function_max_error (cfn, mode, boundary_p);
   28945              : }
   28946              : 
   28947              : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
   28948              : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
   28949              : 
   28950              : #if TARGET_MACHO
   28951              : static bool
   28952              : ix86_cannot_copy_insn_p (rtx_insn *insn)
   28953              : {
   28954              :   if (TARGET_64BIT)
   28955              :     return false;
   28956              : 
   28957              :   rtx set = single_set (insn);
   28958              :   if (set)
   28959              :     {
   28960              :       rtx src = SET_SRC (set);
   28961              :       if (GET_CODE (src) == UNSPEC
   28962              :           && XINT (src, 1) == UNSPEC_SET_GOT)
   28963              :         return true;
   28964              :     }
   28965              :   return false;
   28966              : }
   28967              : 
   28968              : #undef TARGET_CANNOT_COPY_INSN_P
   28969              : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
   28970              : 
   28971              : #endif
   28972              : 
   28973              : #if CHECKING_P
   28974              : #undef TARGET_RUN_TARGET_SELFTESTS
   28975              : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
   28976              : #endif /* #if CHECKING_P */
   28977              : 
   28978              : #undef TARGET_DOCUMENTATION_NAME
   28979              : #define TARGET_DOCUMENTATION_NAME "x86"
   28980              : 
   28981              : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
   28982              : sbitmap
   28983       737990 : ix86_get_separate_components (void)
   28984              : {
   28985       737990 :   HOST_WIDE_INT offset, to_allocate;
   28986       737990 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   28987       737990 :   bitmap_clear (components);
   28988       737990 :   struct machine_function *m = cfun->machine;
   28989              : 
   28990       737990 :   offset = m->frame.stack_pointer_offset;
   28991       737990 :   to_allocate = offset - m->frame.sse_reg_save_offset;
   28992              : 
   28993              :   /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
   28994              :      Experiments show that APX PPX can speed up the prologue.  If the function
   28995              :      does not exit early during actual execution, then using APX PPX is faster.
   28996              :      If the function always exits early during actual execution, then shrink
   28997              :      wrap separate reduces the number of MOV (PUSH/POP) instructions actually
   28998              :      executed, thus speeding up execution.
   28999              :      foo:
   29000              :           movl    $1, %eax
   29001              :           testq   %rdi, %rdi
   29002              :           jne.L60
   29003              :           ret   ---> early return.
   29004              :     .L60:
   29005              :           subq    $88, %rsp     ---> belong to prologue.
   29006              :           xorl    %eax, %eax
   29007              :           movq    %rbx, 40 (%rsp) ---> belong to prologue.
   29008              :           movq    8 (%rdi), %rbx
   29009              :           movq    %rbp, 48 (%rsp) ---> belong to prologue.
   29010              :           movq    %rdi, %rbp
   29011              :           testq   %rbx, %rbx
   29012              :           jne.L61
   29013              :           movq    40 (%rsp), %rbx
   29014              :           movq    48 (%rsp), %rbp
   29015              :           addq    $88, %rsp
   29016              :           ret
   29017              :      .L61:
   29018              :           movq    %r12, 56 (%rsp) ---> belong to prologue.
   29019              :           movq    %r13, 64 (%rsp) ---> belong to prologue.
   29020              :           movq    %r14, 72 (%rsp) ---> belong to prologue.
   29021              :      ... ...
   29022              : 
   29023              :      Disable shrink wrap separate when PPX is enabled.  */
   29024       737990 :   if ((TARGET_APX_PPX && !crtl->calls_eh_return)
   29025       737523 :       || cfun->machine->func_type != TYPE_NORMAL
   29026              :       || TARGET_SEH
   29027       737425 :       || crtl->stack_realign_needed
   29028       727821 :       || m->call_ms2sysv)
   29029              :     return components;
   29030              : 
   29031              :   /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
   29032              :      Disable shrink wrap separate when MOV is prohibited.  */
   29033       725899 :   if (save_regs_using_push_pop (to_allocate))
   29034              :     return components;
   29035              : 
   29036     32748276 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29037     32396144 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29038              :       {
   29039              :         /* Skip registers with large offsets, where a pseudo may be needed.  */
   29040       608499 :         if (IN_RANGE (offset, -0x8000, 0x7fff))
   29041       607433 :           bitmap_set_bit (components, regno);
   29042       654457 :         offset += UNITS_PER_WORD;
   29043              :       }
   29044              : 
   29045              :   /* Don't mess with the following registers.  */
   29046       352132 :   if (frame_pointer_needed)
   29047         6349 :     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
   29048              : 
   29049       352132 :   if (crtl->drap_reg)
   29050          129 :     bitmap_clear_bit (components, REGNO (crtl->drap_reg));
   29051              : 
   29052       352132 :   if (pic_offset_table_rtx)
   29053        29890 :     bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
   29054              : 
   29055              :   return components;
   29056              : }
   29057              : 
   29058              : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
   29059              : sbitmap
   29060      9380733 : ix86_components_for_bb (basic_block bb)
   29061              : {
   29062      9380733 :   bitmap in = DF_LIVE_IN (bb);
   29063      9380733 :   bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
   29064      9380733 :   bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
   29065              : 
   29066      9380733 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   29067      9380733 :   bitmap_clear (components);
   29068              : 
   29069      9380733 :   function_abi_aggregator callee_abis;
   29070      9380733 :   rtx_insn *insn;
   29071    110329227 :   FOR_BB_INSNS (bb, insn)
   29072    100948494 :     if (CALL_P (insn))
   29073      3080334 :       callee_abis.note_callee_abi (insn_callee_abi (insn));
   29074      9380733 :   HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
   29075              : 
   29076              :   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   29077    872408169 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29078    863027436 :     if (!fixed_regs[regno]
   29079    863027436 :         && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
   29080    438604005 :             || bitmap_bit_p (in, regno)
   29081    413106099 :             || bitmap_bit_p (gen, regno)
   29082    400561765 :             || bitmap_bit_p (kill, regno)))
   29083     38307633 :       bitmap_set_bit (components, regno);
   29084              : 
   29085      9380733 :   return components;
   29086              : }
   29087              : 
   29088              : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
   29089              : void
   29090       478144 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
   29091              : {
   29092              :   /* Nothing to do for x86.  */
   29093       478144 : }
   29094              : 
   29095              : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
   29096              : void
   29097       166473 : ix86_emit_prologue_components (sbitmap components)
   29098              : {
   29099       166473 :   HOST_WIDE_INT cfa_offset;
   29100       166473 :   struct machine_function *m = cfun->machine;
   29101              : 
   29102       166473 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   29103       166473 :                - m->frame.stack_pointer_offset;
   29104     15481989 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29105     15315516 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29106              :       {
   29107       760453 :         if (bitmap_bit_p (components, regno))
   29108       194522 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
   29109       810322 :         cfa_offset -= UNITS_PER_WORD;
   29110              :       }
   29111       166473 : }
   29112              : 
   29113              : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
   29114              : void
   29115       148615 : ix86_emit_epilogue_components (sbitmap components)
   29116              : {
   29117       148615 :   HOST_WIDE_INT cfa_offset;
   29118       148615 :   struct machine_function *m = cfun->machine;
   29119       148615 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   29120       148615 :                - m->frame.stack_pointer_offset;
   29121              : 
   29122     13821195 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29123     13672580 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29124              :       {
   29125       681344 :         if (bitmap_bit_p (components, regno))
   29126              :           {
   29127       257206 :             rtx reg = gen_rtx_REG (word_mode, regno);
   29128       257206 :             rtx mem;
   29129       257206 :             rtx_insn *insn;
   29130              : 
   29131       257206 :             mem = choose_baseaddr (cfa_offset, NULL);
   29132       257206 :             mem = gen_frame_mem (word_mode, mem);
   29133       257206 :             insn = emit_move_insn (reg, mem);
   29134              : 
   29135       257206 :             RTX_FRAME_RELATED_P (insn) = 1;
   29136       257206 :             add_reg_note (insn, REG_CFA_RESTORE, reg);
   29137              :           }
   29138       737784 :         cfa_offset -= UNITS_PER_WORD;
   29139              :       }
   29140       148615 : }
   29141              : 
   29142              : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
   29143              : void
   29144        44725 : ix86_set_handled_components (sbitmap components)
   29145              : {
   29146      4159425 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29147      4114700 :     if (bitmap_bit_p (components, regno))
   29148              :       {
   29149       105903 :         cfun->machine->reg_is_wrapped_separately[regno] = true;
   29150       105903 :         cfun->machine->use_fast_prologue_epilogue = true;
   29151       105903 :         cfun->machine->frame.save_regs_using_mov = true;
   29152              :       }
   29153        44725 : }
   29154              : 
   29155              : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
   29156              : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
   29157              : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
   29158              : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
   29159              : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
   29160              : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
   29161              : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
   29162              : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
   29163              :   ix86_emit_prologue_components
   29164              : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
   29165              : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
   29166              :   ix86_emit_epilogue_components
   29167              : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
   29168              : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
   29169              : 
   29170              : struct gcc_target targetm = TARGET_INITIALIZER;
   29171              : 
   29172              : #include "gt-i386.h"
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.